1 /* 2 * Copyright 2001-2009, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 //! block bitmap handling and allocation policies 7 8 9 #include "BlockAllocator.h" 10 11 #include "bfs_control.h" 12 #include "BPlusTree.h" 13 #include "Debug.h" 14 #include "Inode.h" 15 #include "Volume.h" 16 17 18 // Things the BlockAllocator should do: 19 20 // - find a range of blocks of a certain size nearby a specific position 21 // - allocating an unsharp range of blocks for pre-allocation 22 // - free blocks 23 // - know how to deal with each allocation, special handling for directories, 24 // files, symlinks, etc. (type sensitive allocation policies) 25 26 // What makes the code complicated is the fact that we are not just reading 27 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 28 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 29 // of partitions will grow even more - so that's not an option. 30 // Instead we are reading in every block when it's used - since an allocation 31 // group can span several blocks in the block bitmap, the AllocationBlock 32 // class is there to make handling those easier. 33 34 // The current implementation is only slightly optimized and could probably 35 // be improved a lot. Furthermore, the allocation policies used here should 36 // have some real world tests. 37 38 #if BFS_TRACING && !defined(BFS_SHELL) 39 namespace BFSBlockTracing { 40 41 class Allocate : public AbstractTraceEntry { 42 public: 43 Allocate(block_run run) 44 : 45 fRun(run) 46 { 47 Initialized(); 48 } 49 50 virtual void AddDump(TraceOutput& out) 51 { 52 out.Print("bfs:alloc %lu.%u.%u", fRun.AllocationGroup(), 53 fRun.Start(), fRun.Length()); 54 } 55 56 const block_run& Run() const { return fRun; } 57 58 private: 59 block_run fRun; 60 }; 61 62 class Free : public AbstractTraceEntry { 63 public: 64 Free(block_run run) 65 : 66 fRun(run) 67 { 68 Initialized(); 69 } 70 71 virtual void AddDump(TraceOutput& out) 72 { 73 out.Print("bfs:free %lu.%u.%u", fRun.AllocationGroup(), 74 fRun.Start(), fRun.Length()); 75 } 76 77 const block_run& Run() const { return fRun; } 78 79 private: 80 block_run fRun; 81 }; 82 83 84 static uint32 85 checksum(const uint8* data, size_t size) 86 { 87 const uint32* data4 = (const uint32*)data; 88 uint32 sum = 0; 89 while (size >= 4) { 90 sum += *data4; 91 data4++; 92 size -= 4; 93 } 94 return sum; 95 } 96 97 98 class Block : public AbstractTraceEntry { 99 public: 100 Block(const char* label, off_t blockNumber, const uint8* data, 101 size_t size, uint32 start = 0, uint32 length = 0) 102 : 103 fBlock(blockNumber), 104 fData(data), 105 fStart(start), 106 fLength(length), 107 fLabel(label) 108 { 109 fSum = checksum(data, size); 110 Initialized(); 111 } 112 113 virtual void AddDump(TraceOutput& out) 114 { 115 out.Print("bfs:%s: block %Ld (%p), sum %lu, s/l %lu/%lu", fLabel, 116 fBlock, fData, fSum, fStart, fLength); 117 } 118 119 private: 120 off_t fBlock; 121 const uint8 *fData; 122 uint32 fStart; 123 uint32 fLength; 124 uint32 fSum; 125 const char* fLabel; 126 }; 127 128 129 class BlockChange : public AbstractTraceEntry { 130 public: 131 BlockChange(const char* label, int32 block, uint32 oldData, uint32 newData) 132 : 133 fBlock(block), 134 fOldData(oldData), 135 fNewData(newData), 136 fLabel(label) 137 { 138 Initialized(); 139 } 140 141 virtual void AddDump(TraceOutput& out) 142 { 143 out.Print("bfs:%s: block %ld, %08lx -> %08lx", fLabel, 144 fBlock, fOldData, fNewData); 145 } 146 147 private: 148 int32 fBlock; 149 uint32 fOldData; 150 uint32 fNewData; 151 const char* fLabel; 152 }; 153 154 } // namespace BFSBlockTracing 155 156 # define T(x) new(std::nothrow) BFSBlockTracing::x; 157 #else 158 # define T(x) ; 159 #endif 160 161 #ifdef DEBUG_ALLOCATION_GROUPS 162 # define CHECK_ALLOCATION_GROUP(group) _CheckGroup(group) 163 #else 164 # define CHECK_ALLOCATION_GROUP(group) ; 165 #endif 166 167 168 struct check_cookie { 169 check_cookie() {} 170 171 block_run current; 172 Inode *parent; 173 mode_t parent_mode; 174 Stack<block_run> stack; 175 TreeIterator *iterator; 176 }; 177 178 179 class AllocationBlock : public CachedBlock { 180 public: 181 AllocationBlock(Volume* volume); 182 183 inline void Allocate(uint16 start, uint16 numBlocks); 184 inline void Free(uint16 start, uint16 numBlocks); 185 inline bool IsUsed(uint16 block); 186 187 status_t SetTo(AllocationGroup& group, uint16 block); 188 status_t SetToWritable(Transaction& transaction, AllocationGroup& group, 189 uint16 block); 190 191 uint32 NumBlockBits() const { return fNumBits; } 192 uint32& Block(int32 index) { return ((uint32*)fBlock)[index]; } 193 uint8* Block() const { return (uint8*)fBlock; } 194 195 private: 196 uint32 fNumBits; 197 #ifdef DEBUG 198 bool fWritable; 199 #endif 200 }; 201 202 203 class AllocationGroup { 204 public: 205 AllocationGroup(); 206 207 void AddFreeRange(int32 start, int32 blocks); 208 bool IsFull() const { return fFreeBits == 0; } 209 210 status_t Allocate(Transaction& transaction, uint16 start, int32 length); 211 status_t Free(Transaction& transaction, uint16 start, int32 length); 212 213 uint32 NumBits() const { return fNumBits; } 214 uint32 NumBlocks() const { return fNumBlocks; } 215 int32 Start() const { return fStart; } 216 217 private: 218 friend class BlockAllocator; 219 220 uint32 fNumBits; 221 uint32 fNumBlocks; 222 int32 fStart; 223 int32 fFirstFree; 224 int32 fFreeBits; 225 226 int32 fLargestStart; 227 int32 fLargestLength; 228 bool fLargestValid; 229 }; 230 231 232 AllocationBlock::AllocationBlock(Volume* volume) 233 : CachedBlock(volume) 234 { 235 } 236 237 238 status_t 239 AllocationBlock::SetTo(AllocationGroup& group, uint16 block) 240 { 241 // 8 blocks per byte 242 fNumBits = fVolume->BlockSize() << 3; 243 // the last group may have less bits than the others 244 if ((block + 1) * fNumBits > group.NumBits()) 245 fNumBits = group.NumBits() % fNumBits; 246 247 #ifdef DEBUG 248 fWritable = false; 249 #endif 250 return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR; 251 } 252 253 254 status_t 255 AllocationBlock::SetToWritable(Transaction& transaction, AllocationGroup& group, 256 uint16 block) 257 { 258 // 8 blocks per byte 259 fNumBits = fVolume->BlockSize() << 3; 260 // the last group may have less bits in the last block 261 if ((block + 1) * fNumBits > group.NumBits()) 262 fNumBits = group.NumBits() % fNumBits; 263 264 #ifdef DEBUG 265 fWritable = true; 266 #endif 267 return CachedBlock::SetToWritable(transaction, group.Start() + block) 268 != NULL ? B_OK : B_ERROR; 269 } 270 271 272 bool 273 AllocationBlock::IsUsed(uint16 block) 274 { 275 if (block > fNumBits) 276 return true; 277 278 // the block bitmap is accessed in 32-bit blocks 279 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 280 } 281 282 283 void 284 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 285 { 286 ASSERT(start < fNumBits); 287 ASSERT(uint32(start + numBlocks) <= fNumBits); 288 #ifdef DEBUG 289 ASSERT(fWritable); 290 #endif 291 292 T(Block("b-alloc-in", fBlockNumber, fBlock, fVolume->BlockSize(), 293 start, numBlocks)); 294 295 int32 block = start >> 5; 296 297 while (numBlocks > 0) { 298 uint32 mask = 0; 299 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 300 mask |= 1UL << i; 301 302 T(BlockChange("b-alloc", block, Block(block), 303 Block(block) | HOST_ENDIAN_TO_BFS_INT32(mask))); 304 305 #if KDEBUG 306 // check for already set blocks 307 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32*)fBlock)[block]) { 308 FATAL(("AllocationBlock::Allocate(): some blocks are already " 309 "allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 310 panic("blocks already set!"); 311 } 312 #endif 313 314 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 315 start = 0; 316 } 317 T(Block("b-alloc-out", fBlockNumber, fBlock, fVolume->BlockSize(), 318 start, numBlocks)); 319 } 320 321 322 void 323 AllocationBlock::Free(uint16 start, uint16 numBlocks) 324 { 325 ASSERT(start < fNumBits); 326 ASSERT(uint32(start + numBlocks) <= fNumBits); 327 #ifdef DEBUG 328 ASSERT(fWritable); 329 #endif 330 331 int32 block = start >> 5; 332 333 while (numBlocks > 0) { 334 uint32 mask = 0; 335 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 336 mask |= 1UL << (i % 32); 337 338 T(BlockChange("b-free", block, Block(block), 339 Block(block) & HOST_ENDIAN_TO_BFS_INT32(~mask))); 340 341 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 342 start = 0; 343 } 344 } 345 346 347 // #pragma mark - 348 349 350 /*! The allocation groups are created and initialized in 351 BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap() 352 respectively. 353 */ 354 AllocationGroup::AllocationGroup() 355 : 356 fFirstFree(-1), 357 fFreeBits(0), 358 fLargestValid(false) 359 { 360 } 361 362 363 void 364 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 365 { 366 //D(if (blocks > 512) 367 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 368 369 if (fFirstFree == -1) 370 fFirstFree = start; 371 372 if (!fLargestValid || fLargestLength < blocks) { 373 fLargestStart = start; 374 fLargestLength = blocks; 375 fLargestValid = true; 376 } 377 378 fFreeBits += blocks; 379 } 380 381 382 /*! Allocates the specified run in the allocation group. 383 Doesn't check if the run is valid or already allocated partially, nor 384 does it maintain the free ranges hints or the volume's used blocks count. 385 It only does the low-level work of allocating some bits in the block bitmap. 386 Assumes that the block bitmap lock is hold. 387 */ 388 status_t 389 AllocationGroup::Allocate(Transaction& transaction, uint16 start, int32 length) 390 { 391 ASSERT(start + length <= (int32)fNumBits); 392 393 // Update the allocation group info 394 // TODO: this info will be incorrect if something goes wrong later 395 // Note, the fFirstFree block doesn't have to be really free 396 if (start == fFirstFree) 397 fFirstFree = start + length; 398 fFreeBits -= length; 399 400 if (fLargestValid) { 401 bool cut = false; 402 if (fLargestStart == start) { 403 // cut from start 404 fLargestStart += length; 405 fLargestLength -= length; 406 cut = true; 407 } else if (start > fLargestStart 408 && start < fLargestStart + fLargestLength) { 409 // cut from end 410 fLargestLength = start - fLargestStart; 411 cut = true; 412 } 413 if (cut && (fLargestLength < fLargestStart 414 || fLargestLength 415 < (int32)fNumBits - (fLargestStart + fLargestLength))) { 416 // might not be the largest block anymore 417 fLargestValid = false; 418 } 419 } 420 421 Volume* volume = transaction.GetVolume(); 422 423 // calculate block in the block bitmap and position within 424 uint32 bitsPerBlock = volume->BlockSize() << 3; 425 uint32 block = start / bitsPerBlock; 426 start = start % bitsPerBlock; 427 428 AllocationBlock cached(volume); 429 430 while (length > 0) { 431 if (cached.SetToWritable(transaction, *this, block) < B_OK) { 432 fLargestValid = false; 433 RETURN_ERROR(B_IO_ERROR); 434 } 435 436 uint32 numBlocks = length; 437 if (start + numBlocks > cached.NumBlockBits()) 438 numBlocks = cached.NumBlockBits() - start; 439 440 cached.Allocate(start, numBlocks); 441 442 length -= numBlocks; 443 start = 0; 444 block++; 445 } 446 447 return B_OK; 448 } 449 450 451 /*! Frees the specified run in the allocation group. 452 Doesn't check if the run is valid or was not completely allocated, nor 453 does it maintain the free ranges hints or the volume's used blocks count. 454 It only does the low-level work of freeing some bits in the block bitmap. 455 Assumes that the block bitmap lock is hold. 456 */ 457 status_t 458 AllocationGroup::Free(Transaction& transaction, uint16 start, int32 length) 459 { 460 ASSERT(start + length <= (int32)fNumBits); 461 462 // Update the allocation group info 463 // TODO: this info will be incorrect if something goes wrong later 464 if (fFirstFree > start) 465 fFirstFree = start; 466 fFreeBits += length; 467 468 // The range to be freed cannot be part of the valid largest range 469 ASSERT(!fLargestValid || start + length <= fLargestStart 470 || start > fLargestStart); 471 472 if (fLargestValid 473 && (start + length == fLargestStart 474 || fLargestStart + fLargestLength == start 475 || (start < fLargestStart && fLargestStart > fLargestLength) 476 || (start > fLargestStart 477 && (int32)fNumBits - (fLargestStart + fLargestLength) 478 > fLargestLength))) { 479 fLargestValid = false; 480 } 481 482 Volume* volume = transaction.GetVolume(); 483 484 // calculate block in the block bitmap and position within 485 uint32 bitsPerBlock = volume->BlockSize() << 3; 486 uint32 block = start / bitsPerBlock; 487 start = start % bitsPerBlock; 488 489 AllocationBlock cached(volume); 490 491 while (length > 0) { 492 if (cached.SetToWritable(transaction, *this, block) < B_OK) 493 RETURN_ERROR(B_IO_ERROR); 494 495 T(Block("free-1", block, cached.Block(), volume->BlockSize())); 496 uint16 freeLength = length; 497 if (uint32(start + length) > cached.NumBlockBits()) 498 freeLength = cached.NumBlockBits() - start; 499 500 cached.Free(start, freeLength); 501 502 length -= freeLength; 503 start = 0; 504 T(Block("free-2", block, cached.Block(), volume->BlockSize())); 505 block++; 506 } 507 return B_OK; 508 } 509 510 511 // #pragma mark - 512 513 514 BlockAllocator::BlockAllocator(Volume* volume) 515 : 516 fVolume(volume), 517 fGroups(NULL), 518 fCheckBitmap(NULL) 519 { 520 mutex_init(&fLock, "bfs allocator"); 521 } 522 523 524 BlockAllocator::~BlockAllocator() 525 { 526 mutex_destroy(&fLock); 527 delete[] fGroups; 528 } 529 530 531 status_t 532 BlockAllocator::Initialize(bool full) 533 { 534 fNumGroups = fVolume->AllocationGroups(); 535 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 536 fGroups = new AllocationGroup[fNumGroups]; 537 if (fGroups == NULL) 538 return B_NO_MEMORY; 539 540 if (!full) 541 return B_OK; 542 543 mutex_lock(&fLock); 544 // the lock will be released by the _Initialize() method 545 546 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize, 547 "bfs block allocator", B_LOW_PRIORITY, this); 548 if (id < B_OK) 549 return _Initialize(this); 550 551 mutex_transfer_lock(&fLock, id); 552 553 return resume_thread(id); 554 } 555 556 557 status_t 558 BlockAllocator::InitializeAndClearBitmap(Transaction& transaction) 559 { 560 status_t status = Initialize(false); 561 if (status < B_OK) 562 return status; 563 564 uint32 blocks = fBlocksPerGroup; 565 uint32 numBits = 8 * blocks * fVolume->BlockSize(); 566 uint32 blockShift = fVolume->BlockShift(); 567 568 uint32* buffer = (uint32*)malloc(numBits >> 3); 569 if (buffer == NULL) 570 RETURN_ERROR(B_NO_MEMORY); 571 572 memset(buffer, 0, numBits >> 3); 573 574 off_t offset = 1; 575 576 // initialize the AllocationGroup objects and clear the on-disk bitmap 577 578 for (int32 i = 0; i < fNumGroups; i++) { 579 if (write_pos(fVolume->Device(), offset << blockShift, buffer, 580 blocks << blockShift) < B_OK) 581 return B_ERROR; 582 583 // the last allocation group may contain less blocks than the others 584 if (i == fNumGroups - 1) { 585 fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits; 586 fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) 587 >> (blockShift + 3)); 588 } else { 589 fGroups[i].fNumBits = numBits; 590 fGroups[i].fNumBlocks = blocks; 591 } 592 fGroups[i].fStart = offset; 593 fGroups[i].fFirstFree = fGroups[i].fLargestStart = 0; 594 fGroups[i].fFreeBits = fGroups[i].fLargestLength = fGroups[i].fNumBits; 595 fGroups[i].fLargestValid = true; 596 597 offset += blocks; 598 } 599 free(buffer); 600 601 // reserve the boot block, the log area, and the block bitmap itself 602 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 603 604 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 605 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 606 return B_ERROR; 607 } 608 fVolume->SuperBlock().used_blocks 609 = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 610 611 return B_OK; 612 } 613 614 615 status_t 616 BlockAllocator::_Initialize(BlockAllocator* allocator) 617 { 618 // The lock must already be held at this point 619 620 Volume* volume = allocator->fVolume; 621 uint32 blocks = allocator->fBlocksPerGroup; 622 uint32 blockShift = volume->BlockShift(); 623 off_t freeBlocks = 0; 624 625 uint32* buffer = (uint32*)malloc(blocks << blockShift); 626 if (buffer == NULL) { 627 mutex_unlock(&allocator->fLock); 628 RETURN_ERROR(B_NO_MEMORY); 629 } 630 631 AllocationGroup* groups = allocator->fGroups; 632 off_t offset = 1; 633 uint32 bitsPerGroup = 8 * (blocks << blockShift); 634 int32 numGroups = allocator->fNumGroups; 635 636 for (int32 i = 0; i < numGroups; i++) { 637 if (read_pos(volume->Device(), offset << blockShift, buffer, 638 blocks << blockShift) < B_OK) 639 break; 640 641 // the last allocation group may contain less blocks than the others 642 if (i == numGroups - 1) { 643 groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup; 644 groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) 645 >> (blockShift + 3)); 646 } else { 647 groups[i].fNumBits = bitsPerGroup; 648 groups[i].fNumBlocks = blocks; 649 } 650 groups[i].fStart = offset; 651 652 // finds all free ranges in this allocation group 653 int32 start = -1, range = 0; 654 int32 numBits = groups[i].fNumBits, bit = 0; 655 int32 count = (numBits + 31) / 32; 656 657 for (int32 k = 0; k < count; k++) { 658 for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) { 659 if (buffer[k] & (1UL << j)) { 660 // block is in use 661 if (range > 0) { 662 groups[i].AddFreeRange(start, range); 663 range = 0; 664 } 665 } else if (range++ == 0) { 666 // block is free, start new free range 667 start = bit; 668 } 669 } 670 } 671 if (range) 672 groups[i].AddFreeRange(start, range); 673 674 freeBlocks += groups[i].fFreeBits; 675 676 offset += blocks; 677 } 678 free(buffer); 679 680 // check if block bitmap and log area are reserved 681 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 682 if (allocator->CheckBlockRun(block_run::Run(0, 0, reservedBlocks)) < B_OK) { 683 Transaction transaction(volume, 0); 684 if (groups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 685 FATAL(("could not allocate reserved space for block " 686 "bitmap/log!\n")); 687 volume->Panic(); 688 } else { 689 FATAL(("space for block bitmap or log area was not reserved!\n")); 690 691 transaction.Done(); 692 } 693 } 694 695 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 696 if (volume->UsedBlocks() != usedBlocks) { 697 // If the disk in a dirty state at mount time, it's 698 // normal that the values don't match 699 INFORM(("volume reports %Ld used blocks, correct is %Ld\n", 700 volume->UsedBlocks(), usedBlocks)); 701 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 702 } 703 704 mutex_unlock(&allocator->fLock); 705 return B_OK; 706 } 707 708 709 void 710 BlockAllocator::Uninitialize() 711 { 712 // We only have to make sure that the initializer thread isn't running 713 // anymore. 714 mutex_lock(&fLock); 715 } 716 717 718 status_t 719 BlockAllocator::AllocateBlocks(Transaction& transaction, int32 groupIndex, 720 uint16 start, uint16 maximum, uint16 minimum, block_run& run) 721 { 722 if (maximum == 0) 723 return B_BAD_VALUE; 724 725 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", 726 groupIndex, start, maximum, minimum)); 727 728 AllocationBlock cached(fVolume); 729 MutexLocker lock(fLock); 730 731 uint32 bitsPerFullBlock = fVolume->BlockSize() << 3; 732 733 // Find the block_run that can fulfill the request best 734 int32 bestGroup = -1; 735 int32 bestStart = -1; 736 int32 bestLength = -1; 737 738 for (int32 i = 0; i < fNumGroups + 1; i++, groupIndex++, start = 0) { 739 groupIndex = groupIndex % fNumGroups; 740 AllocationGroup& group = fGroups[groupIndex]; 741 742 CHECK_ALLOCATION_GROUP(groupIndex); 743 744 if (start >= group.NumBits() || group.IsFull()) 745 continue; 746 747 // The wanted maximum is smaller than the largest free block in the 748 // group or already smaller than the minimum 749 750 if (start < group.fFirstFree) 751 start = group.fFirstFree; 752 753 if (group.fLargestValid) { 754 if (group.fLargestLength < bestLength) 755 continue; 756 757 if (group.fLargestStart >= start) { 758 if (group.fLargestLength >= bestLength) { 759 bestGroup = groupIndex; 760 bestStart = group.fLargestStart; 761 bestLength = group.fLargestLength; 762 763 if (bestLength >= maximum) 764 break; 765 } 766 767 // We know everything about this group we have to, let's skip 768 // to the next 769 continue; 770 } 771 } 772 773 // There may be more than one block per allocation group - and 774 // we iterate through it to find a place for the allocation. 775 // (one allocation can't exceed one allocation group) 776 777 uint32 block = start / (fVolume->BlockSize() << 3); 778 int32 currentStart = 0, currentLength = 0; 779 int32 groupLargestStart = -1; 780 int32 groupLargestLength = -1; 781 int32 currentBit = start; 782 bool canFindGroupLargest = start == 0; 783 784 for (; block < group.NumBlocks(); block++) { 785 if (cached.SetTo(group, block) < B_OK) 786 RETURN_ERROR(B_ERROR); 787 788 T(Block("alloc-in", group.Start() + block, cached.Block(), 789 fVolume->BlockSize(), groupIndex, currentStart)); 790 791 // find a block large enough to hold the allocation 792 for (uint32 bit = start % bitsPerFullBlock; 793 bit < cached.NumBlockBits(); bit++) { 794 if (!cached.IsUsed(bit)) { 795 if (currentLength == 0) { 796 // start new range 797 currentStart = currentBit; 798 } 799 800 // have we found a range large enough to hold numBlocks? 801 if (++currentLength >= maximum) { 802 bestGroup = groupIndex; 803 bestStart = currentStart; 804 bestLength = currentLength; 805 break; 806 } 807 } else { 808 if (currentLength) { 809 // end of a range 810 if (currentLength > bestLength) { 811 bestGroup = groupIndex; 812 bestStart = currentStart; 813 bestLength = currentLength; 814 } 815 if (currentLength > groupLargestLength) { 816 groupLargestStart = currentStart; 817 groupLargestLength = currentLength; 818 } 819 currentLength = 0; 820 } 821 if ((int32)group.NumBits() - currentBit 822 <= groupLargestLength) { 823 // We can't find a bigger block in this group anymore, 824 // let's skip the rest. 825 block = group.NumBlocks(); 826 break; 827 } 828 } 829 currentBit++; 830 } 831 832 T(Block("alloc-out", block, cached.Block(), 833 fVolume->BlockSize(), groupIndex, currentStart)); 834 835 if (bestLength >= maximum) { 836 canFindGroupLargest = false; 837 break; 838 } 839 840 // start from the beginning of the next block 841 start = 0; 842 } 843 844 if (currentBit == (int32)group.NumBits()) { 845 if (currentLength > bestLength) { 846 bestGroup = groupIndex; 847 bestStart = currentStart; 848 bestLength = currentLength; 849 } 850 if (canFindGroupLargest && currentLength > groupLargestLength) { 851 groupLargestStart = currentStart; 852 groupLargestLength = currentLength; 853 } 854 } 855 856 if (canFindGroupLargest && !group.fLargestValid 857 && groupLargestLength >= 0) { 858 group.fLargestStart = groupLargestStart; 859 group.fLargestLength = groupLargestLength; 860 group.fLargestValid = true; 861 } 862 863 if (bestLength >= maximum) 864 break; 865 } 866 867 // If we found a suitable range, mark the blocks as in use, and 868 // write the updated block bitmap back to disk 869 if (bestLength < minimum) 870 return B_DEVICE_FULL; 871 if (bestLength > maximum) 872 bestLength = maximum; 873 874 if (fGroups[bestGroup].Allocate(transaction, bestStart, bestLength) 875 < B_OK) 876 RETURN_ERROR(B_IO_ERROR); 877 878 CHECK_ALLOCATION_GROUP(bestGroup); 879 880 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(bestGroup); 881 run.start = HOST_ENDIAN_TO_BFS_INT16(bestStart); 882 run.length = HOST_ENDIAN_TO_BFS_INT16(bestLength); 883 884 fVolume->SuperBlock().used_blocks = 885 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + bestLength); 886 // We are not writing back the disk's super block - it's 887 // either done by the journaling code, or when the disk 888 // is unmounted. 889 // If the value is not correct at mount time, it will be 890 // fixed anyway. 891 892 // We need to flush any remaining blocks in the new allocation to make sure 893 // they won't interfere with the file cache. 894 block_cache_discard(fVolume->BlockCache(), fVolume->ToBlock(run), 895 run.Length()); 896 897 T(Allocate(run)); 898 return B_OK; 899 } 900 901 902 status_t 903 BlockAllocator::AllocateForInode(Transaction& transaction, 904 const block_run* parent, mode_t type, block_run& run) 905 { 906 // Apply some allocation policies here (AllocateBlocks() will break them 907 // if necessary) - we will start with those described in Dominic Giampaolo's 908 // "Practical File System Design", and see how good they work 909 910 // Files are going in the same allocation group as its parent, 911 // sub-directories will be inserted 8 allocation groups after 912 // the one of the parent 913 uint16 group = parent->AllocationGroup(); 914 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 915 group += 8; 916 917 return AllocateBlocks(transaction, group, 0, 1, 1, run); 918 } 919 920 921 status_t 922 BlockAllocator::Allocate(Transaction& transaction, Inode* inode, 923 off_t numBlocks, block_run& run, uint16 minimum) 924 { 925 if (numBlocks <= 0) 926 return B_ERROR; 927 928 // one block_run can't hold more data than there is in one allocation group 929 if (numBlocks > fGroups[0].NumBits()) 930 numBlocks = fGroups[0].NumBits(); 931 932 // since block_run.length is uint16, the largest number of blocks that 933 // can be covered by a block_run is 65535 934 // TODO: if we drop compatibility, couldn't we do this any better? 935 // There are basically two possibilities: 936 // a) since a length of zero doesn't have any sense, take that for 65536 - 937 // but that could cause many problems (bugs) in other areas 938 // b) reduce the maximum amount of blocks per block_run, so that the 939 // remaining number of free blocks can be used in a useful manner 940 // (like 4 blocks) - but that would also reduce the maximum file size 941 // c) have BlockRun::Length() return (length + 1). 942 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 943 numBlocks = MAX_BLOCK_RUN_LENGTH; 944 945 // Apply some allocation policies here (AllocateBlocks() will break them 946 // if necessary) 947 uint16 group = inode->BlockRun().AllocationGroup(); 948 uint16 start = 0; 949 950 // Are there already allocated blocks? (then just try to allocate near the 951 // last one) 952 if (inode->Size() > 0) { 953 const data_stream& data = inode->Node().data; 954 // TODO: we currently don't care for when the data stream 955 // is already grown into the indirect ranges 956 if (data.max_double_indirect_range == 0 957 && data.max_indirect_range == 0) { 958 // Since size > 0, there must be a valid block run in this stream 959 int32 last = 0; 960 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 961 if (data.direct[last + 1].IsZero()) 962 break; 963 964 group = data.direct[last].AllocationGroup(); 965 start = data.direct[last].Start() + data.direct[last].Length(); 966 } 967 } else if (inode->IsContainer() || inode->IsSymLink()) { 968 // directory and symbolic link data will go in the same allocation 969 // group as the inode is in but after the inode data 970 start = inode->BlockRun().Start(); 971 } else { 972 // file data will start in the next allocation group 973 group = inode->BlockRun().AllocationGroup() + 1; 974 } 975 976 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 977 } 978 979 980 status_t 981 BlockAllocator::Free(Transaction& transaction, block_run run) 982 { 983 MutexLocker lock(fLock); 984 985 int32 group = run.AllocationGroup(); 986 uint16 start = run.Start(); 987 uint16 length = run.Length(); 988 989 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, 990 length)); 991 T(Free(run)); 992 993 // doesn't use Volume::IsValidBlockRun() here because it can check better 994 // against the group size (the last group may have a different length) 995 if (group < 0 || group >= fNumGroups 996 || start > fGroups[group].NumBits() 997 || uint32(start + length) > fGroups[group].NumBits() 998 || length == 0) { 999 FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group, 1000 start, length)); 1001 DEBUGGER(("tried to free invalid block_run")); 1002 return B_BAD_VALUE; 1003 } 1004 // check if someone tries to free reserved areas at the beginning of the 1005 // drive 1006 if (group == 0 1007 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 1008 FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group, 1009 start, length)); 1010 DEBUGGER(("tried to free reserved block")); 1011 return B_BAD_VALUE; 1012 } 1013 #ifdef DEBUG 1014 if (CheckBlockRun(run) < B_OK) 1015 return B_BAD_DATA; 1016 #endif 1017 1018 CHECK_ALLOCATION_GROUP(group); 1019 1020 if (fGroups[group].Free(transaction, start, length) < B_OK) 1021 RETURN_ERROR(B_IO_ERROR); 1022 1023 CHECK_ALLOCATION_GROUP(group); 1024 1025 #ifdef DEBUG 1026 if (CheckBlockRun(run, NULL, NULL, false) < B_OK) { 1027 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just " 1028 "freed)\n")); 1029 } 1030 #endif 1031 1032 fVolume->SuperBlock().used_blocks = 1033 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 1034 return B_OK; 1035 } 1036 1037 1038 size_t 1039 BlockAllocator::BitmapSize() const 1040 { 1041 return fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 1042 } 1043 1044 1045 #ifdef DEBUG_ALLOCATION_GROUPS 1046 void 1047 BlockAllocator::_CheckGroup(int32 groupIndex) const 1048 { 1049 AllocationBlock cached(fVolume); 1050 ASSERT_LOCKED_MUTEX(&fLock); 1051 1052 AllocationGroup& group = fGroups[groupIndex]; 1053 1054 int32 currentStart = 0, currentLength = 0; 1055 int32 firstFree = -1; 1056 int32 largestStart = -1; 1057 int32 largestLength = 0; 1058 int32 currentBit = 0; 1059 1060 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1061 if (cached.SetTo(group, block) < B_OK) { 1062 panic("setting group block %d failed\n", (int)block); 1063 return; 1064 } 1065 1066 for (uint32 bit = 0; bit < cached.NumBlockBits(); bit++) { 1067 if (!cached.IsUsed(bit)) { 1068 if (firstFree < 0) { 1069 firstFree = currentBit; 1070 if (!group.fLargestValid) { 1071 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1072 // mostly harmless but noteworthy 1073 dprintf("group %d first free too late: should be " 1074 "%d, is %d\n", (int)groupIndex, (int)firstFree, 1075 (int)group.fFirstFree); 1076 } 1077 return; 1078 } 1079 } 1080 1081 if (currentLength == 0) { 1082 // start new range 1083 currentStart = currentBit; 1084 } 1085 currentLength++; 1086 } else if (currentLength) { 1087 // end of a range 1088 if (currentLength > largestLength) { 1089 largestStart = currentStart; 1090 largestLength = currentLength; 1091 } 1092 currentLength = 0; 1093 } 1094 currentBit++; 1095 } 1096 } 1097 1098 if (currentLength > largestLength) { 1099 largestStart = currentStart; 1100 largestLength = currentLength; 1101 } 1102 1103 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1104 // mostly harmless but noteworthy 1105 dprintf("group %d first free too late: should be %d, is %d\n", 1106 (int)groupIndex, (int)firstFree, (int)group.fFirstFree); 1107 } 1108 if (group.fLargestValid 1109 && (largestStart != group.fLargestStart 1110 || largestLength != group.fLargestLength)) { 1111 panic("bfs %p: group %d largest differs: %d.%d, checked %d.%d.\n", 1112 fVolume, (int)groupIndex, (int)group.fLargestStart, 1113 (int)group.fLargestLength, (int)largestStart, (int)largestLength); 1114 } 1115 } 1116 #endif // DEBUG_ALLOCATION_GROUPS 1117 1118 1119 // #pragma mark - Bitmap validity checking 1120 1121 // TODO: implement new FS checking API 1122 // Functions to check the validity of the bitmap - they are used from 1123 // the "checkfs" command (since this does even a bit more, maybe we should 1124 // move this some place else?) 1125 1126 1127 bool 1128 BlockAllocator::_IsValidCheckControl(check_control* control) 1129 { 1130 if (control == NULL 1131 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 1132 FATAL(("invalid check_control (%p)!\n", control)); 1133 return false; 1134 } 1135 1136 return true; 1137 } 1138 1139 1140 status_t 1141 BlockAllocator::StartChecking(check_control* control) 1142 { 1143 if (!_IsValidCheckControl(control)) 1144 return B_BAD_VALUE; 1145 1146 fVolume->GetJournal(0)->Lock(NULL, true); 1147 // Lock the volume's journal 1148 1149 status_t status = mutex_lock(&fLock); 1150 if (status != B_OK) { 1151 fVolume->GetJournal(0)->Unlock(NULL, true); 1152 return status; 1153 } 1154 1155 size_t size = BitmapSize(); 1156 fCheckBitmap = (uint32*)malloc(size); 1157 if (fCheckBitmap == NULL) { 1158 mutex_unlock(&fLock); 1159 fVolume->GetJournal(0)->Unlock(NULL, true); 1160 return B_NO_MEMORY; 1161 } 1162 1163 check_cookie* cookie = new check_cookie(); 1164 if (cookie == NULL) { 1165 free(fCheckBitmap); 1166 fCheckBitmap = NULL; 1167 mutex_unlock(&fLock); 1168 fVolume->GetJournal(0)->Unlock(NULL, true); 1169 1170 return B_NO_MEMORY; 1171 } 1172 1173 // initialize bitmap 1174 memset(fCheckBitmap, 0, size); 1175 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); 1176 block-- > 0;) { 1177 _SetCheckBitmapAt(block); 1178 } 1179 1180 cookie->stack.Push(fVolume->Root()); 1181 cookie->stack.Push(fVolume->Indices()); 1182 cookie->iterator = NULL; 1183 control->cookie = cookie; 1184 1185 fCheckCookie = cookie; 1186 // to be able to restore nicely if "chkbfs" exited abnormally 1187 1188 // Put removed vnodes to the stack -- they are not reachable by traversing 1189 // the file system anymore. 1190 InodeList::Iterator iterator = fVolume->RemovedInodes().GetIterator(); 1191 while (Inode* inode = iterator.Next()) { 1192 cookie->stack.Push(inode->BlockRun()); 1193 } 1194 1195 // TODO: check reserved area in bitmap! 1196 1197 return B_OK; 1198 } 1199 1200 1201 status_t 1202 BlockAllocator::StopChecking(check_control* control) 1203 { 1204 check_cookie* cookie; 1205 if (control == NULL) 1206 cookie = fCheckCookie; 1207 else 1208 cookie = (check_cookie*)control->cookie; 1209 1210 if (cookie == NULL) 1211 return B_ERROR; 1212 1213 if (cookie->iterator != NULL) { 1214 delete cookie->iterator; 1215 cookie->iterator = NULL; 1216 1217 // the current directory inode is still locked in memory 1218 put_vnode(fVolume->FSVolume(), fVolume->ToVnode(cookie->current)); 1219 } 1220 1221 if (fVolume->IsReadOnly()) { 1222 // We can't fix errors on this volume 1223 control->flags &= ~BFS_FIX_BITMAP_ERRORS; 1224 } 1225 1226 // if CheckNextNode() could completely work through, we can 1227 // fix any damages of the bitmap 1228 if (control != NULL && control->status == B_ENTRY_NOT_FOUND) { 1229 // calculate the number of used blocks in the check bitmap 1230 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 1231 off_t usedBlocks = 0LL; 1232 1233 // TODO: update the allocation groups used blocks info 1234 for (uint32 i = size >> 2; i-- > 0;) { 1235 uint32 compare = 1; 1236 for (int16 j = 0; j < 32; j++, compare <<= 1) { 1237 if (compare & fCheckBitmap[i]) 1238 usedBlocks++; 1239 } 1240 } 1241 1242 control->stats.freed = fVolume->UsedBlocks() - usedBlocks 1243 + control->stats.missing; 1244 if (control->stats.freed < 0) 1245 control->stats.freed = 0; 1246 1247 // Should we fix errors? Were there any errors we can fix? 1248 if ((control->flags & BFS_FIX_BITMAP_ERRORS) != 0 1249 && (control->stats.freed != 0 || control->stats.missing != 0)) { 1250 // if so, write the check bitmap back over the original one, 1251 // and use transactions here to play safe - we even use several 1252 // transactions, so that we don't blow the maximum log size 1253 // on large disks; since we don't need to make this atomic 1254 fVolume->SuperBlock().used_blocks 1255 = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 1256 1257 int32 blocksInBitmap = fNumGroups * fBlocksPerGroup; 1258 size_t blockSize = fVolume->BlockSize(); 1259 1260 for (int32 i = 0; i < blocksInBitmap; i += 512) { 1261 Transaction transaction(fVolume, 1 + i); 1262 1263 int32 blocksToWrite = 512; 1264 if (blocksToWrite + i > blocksInBitmap) 1265 blocksToWrite = blocksInBitmap - i; 1266 1267 status_t status = transaction.WriteBlocks(1 + i, 1268 (uint8*)fCheckBitmap + i * blockSize, blocksToWrite); 1269 if (status < B_OK) { 1270 FATAL(("error writing bitmap: %s\n", strerror(status))); 1271 break; 1272 } 1273 transaction.Done(); 1274 } 1275 } 1276 } else 1277 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 1278 1279 fVolume->SetCheckingThread(-1); 1280 1281 free(fCheckBitmap); 1282 fCheckBitmap = NULL; 1283 fCheckCookie = NULL; 1284 delete cookie; 1285 mutex_unlock(&fLock); 1286 fVolume->GetJournal(0)->Unlock(NULL, true); 1287 1288 return B_OK; 1289 } 1290 1291 1292 status_t 1293 BlockAllocator::CheckNextNode(check_control* control) 1294 { 1295 if (!_IsValidCheckControl(control)) 1296 return B_BAD_VALUE; 1297 1298 check_cookie* cookie = (check_cookie*)control->cookie; 1299 fVolume->SetCheckingThread(find_thread(NULL)); 1300 1301 while (true) { 1302 if (cookie->iterator == NULL) { 1303 if (!cookie->stack.Pop(&cookie->current)) { 1304 // no more runs on the stack, we are obviously finished! 1305 control->status = B_ENTRY_NOT_FOUND; 1306 return B_ENTRY_NOT_FOUND; 1307 } 1308 1309 Vnode vnode(fVolume, cookie->current); 1310 Inode* inode; 1311 if (vnode.Get(&inode) < B_OK) { 1312 FATAL(("check: Could not open inode at %Ld\n", 1313 fVolume->ToBlock(cookie->current))); 1314 continue; 1315 } 1316 1317 control->inode = inode->ID(); 1318 control->mode = inode->Mode(); 1319 1320 if (!inode->IsContainer()) { 1321 // Check file 1322 control->errors = 0; 1323 control->status = CheckInode(inode, control); 1324 1325 if (inode->GetName(control->name) < B_OK) 1326 strcpy(control->name, "(node has no name)"); 1327 1328 return B_OK; 1329 } 1330 1331 // get iterator for the next directory 1332 1333 BPlusTree* tree; 1334 if (inode->GetTree(&tree) != B_OK) { 1335 FATAL(("check: could not open b+tree from inode at %Ld\n", 1336 fVolume->ToBlock(cookie->current))); 1337 continue; 1338 } 1339 1340 cookie->parent = inode; 1341 cookie->parent_mode = inode->Mode(); 1342 1343 cookie->iterator = new TreeIterator(tree); 1344 if (cookie->iterator == NULL) 1345 RETURN_ERROR(B_NO_MEMORY); 1346 1347 // the inode must stay locked in memory until the iterator is freed 1348 vnode.Keep(); 1349 1350 // check the inode of the directory 1351 control->errors = 0; 1352 control->status = CheckInode(inode, control); 1353 1354 if (inode->GetName(control->name) < B_OK) 1355 strcpy(control->name, "(dir has no name)"); 1356 1357 return B_OK; 1358 } 1359 1360 char name[B_FILE_NAME_LENGTH]; 1361 uint16 length; 1362 ino_t id; 1363 1364 status_t status = cookie->iterator->GetNextEntry(name, &length, 1365 B_FILE_NAME_LENGTH, &id); 1366 if (status == B_ENTRY_NOT_FOUND) { 1367 // there are no more entries in this iterator, free it and go on 1368 delete cookie->iterator; 1369 cookie->iterator = NULL; 1370 1371 // unlock the directory's inode from memory 1372 put_vnode(fVolume->FSVolume(), fVolume->ToVnode(cookie->current)); 1373 1374 continue; 1375 } else if (status == B_OK) { 1376 // ignore "." and ".." entries 1377 if (!strcmp(name, ".") || !strcmp(name, "..")) 1378 continue; 1379 1380 // fill in the control data as soon as we have them 1381 strlcpy(control->name, name, B_FILE_NAME_LENGTH); 1382 control->inode = id; 1383 control->errors = 0; 1384 1385 Vnode vnode(fVolume, id); 1386 Inode* inode; 1387 if (vnode.Get(&inode) < B_OK) { 1388 FATAL(("Could not open inode ID %Ld!\n", id)); 1389 control->errors |= BFS_COULD_NOT_OPEN; 1390 control->status = B_ERROR; 1391 return B_OK; 1392 } 1393 1394 // check if the inode's name is the same as in the b+tree 1395 if (inode->IsRegularNode()) { 1396 RecursiveLocker locker(inode->SmallDataLock()); 1397 NodeGetter node(fVolume, inode); 1398 1399 const char* localName = inode->Name(node.Node()); 1400 if (localName == NULL || strcmp(localName, name)) { 1401 control->errors |= BFS_NAMES_DONT_MATCH; 1402 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, 1403 localName)); 1404 } 1405 } 1406 1407 control->mode = inode->Mode(); 1408 1409 // Check for the correct mode of the node (if the mode of the 1410 // file don't fit to its parent, there is a serious problem) 1411 if (((cookie->parent_mode & S_ATTR_DIR) != 0 1412 && !inode->IsAttribute()) 1413 || ((cookie->parent_mode & S_INDEX_DIR) != 0 1414 && !inode->IsIndex()) 1415 || (is_directory(cookie->parent_mode) 1416 && !inode->IsRegularNode())) { 1417 FATAL(("inode at %Ld is of wrong type: %o (parent %o at %Ld)!" 1418 "\n", inode->BlockNumber(), inode->Mode(), 1419 cookie->parent_mode, cookie->parent->BlockNumber())); 1420 1421 // if we are allowed to fix errors, we should remove the file 1422 if ((control->flags & BFS_REMOVE_WRONG_TYPES) != 0 1423 && (control->flags & BFS_FIX_BITMAP_ERRORS) != 0) { 1424 // it's safe to start a transaction, because Inode::Remove() 1425 // won't touch the block bitmap (which we hold the lock for) 1426 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 1427 // the bitmap anyway 1428 Transaction transaction(fVolume, 1429 cookie->parent->BlockNumber()); 1430 1431 inode->Node().flags 1432 |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE); 1433 status = cookie->parent->Remove(transaction, name, NULL, 1434 inode->IsContainer()); 1435 if (status == B_OK) 1436 transaction.Done(); 1437 } else 1438 status = B_ERROR; 1439 1440 control->errors |= BFS_WRONG_TYPE; 1441 control->status = status; 1442 return B_OK; 1443 } 1444 1445 // If the inode has an attribute directory, push it on the stack 1446 if (!inode->Attributes().IsZero()) 1447 cookie->stack.Push(inode->Attributes()); 1448 1449 // push the directory on the stack so that it will be scanned later 1450 if (inode->IsContainer() && !inode->IsIndex()) 1451 cookie->stack.Push(inode->BlockRun()); 1452 else { 1453 // check it now 1454 control->status = CheckInode(inode, control); 1455 1456 return B_OK; 1457 } 1458 } 1459 } 1460 // is never reached 1461 } 1462 1463 1464 bool 1465 BlockAllocator::_CheckBitmapIsUsedAt(off_t block) const 1466 { 1467 size_t size = BitmapSize(); 1468 uint32 index = block / 32; // 32bit resolution 1469 if (index > size / 4) 1470 return false; 1471 1472 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) 1473 & (1UL << (block & 0x1f)); 1474 } 1475 1476 1477 void 1478 BlockAllocator::_SetCheckBitmapAt(off_t block) 1479 { 1480 size_t size = BitmapSize(); 1481 uint32 index = block / 32; // 32bit resolution 1482 if (index > size / 4) 1483 return; 1484 1485 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1486 } 1487 1488 1489 status_t 1490 BlockAllocator::CheckBlockRun(block_run run, const char* type, 1491 check_control* control, bool allocated) 1492 { 1493 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1494 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1495 || uint32(run.Start() + run.Length()) 1496 > fGroups[run.AllocationGroup()].fNumBits 1497 || run.length == 0) { 1498 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, 1499 run.AllocationGroup(), run.Start(), run.Length())); 1500 if (control == NULL) 1501 return B_BAD_DATA; 1502 1503 control->errors |= BFS_INVALID_BLOCK_RUN; 1504 return B_OK; 1505 } 1506 1507 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1508 uint32 block = run.Start() / bitsPerBlock; 1509 uint32 pos = run.Start() % bitsPerBlock; 1510 int32 length = 0; 1511 off_t firstMissing = -1, firstSet = -1; 1512 off_t firstGroupBlock 1513 = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1514 1515 AllocationBlock cached(fVolume); 1516 1517 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1518 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1519 RETURN_ERROR(B_IO_ERROR); 1520 1521 if (pos >= cached.NumBlockBits()) { 1522 // something very strange has happened... 1523 RETURN_ERROR(B_ERROR); 1524 } 1525 1526 while (length < run.Length() && pos < cached.NumBlockBits()) { 1527 if (cached.IsUsed(pos) != allocated) { 1528 if (control == NULL) { 1529 PRINT(("%s: block_run(%ld, %u, %u) is only partially " 1530 "allocated (pos = %ld, length = %ld)!\n", type, 1531 run.AllocationGroup(), run.Start(), run.Length(), 1532 pos, length)); 1533 return B_BAD_DATA; 1534 } 1535 if (firstMissing == -1) { 1536 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1537 control->errors |= BFS_MISSING_BLOCKS; 1538 } 1539 control->stats.missing++; 1540 } else if (firstMissing != -1) { 1541 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are " 1542 "%sallocated!\n", type, run.AllocationGroup(), run.Start(), 1543 run.Length(), firstMissing, 1544 firstGroupBlock + pos + block * bitsPerBlock - 1, 1545 allocated ? "not " : "")); 1546 firstMissing = -1; 1547 } 1548 1549 if (fCheckBitmap != NULL) { 1550 // Set the block in the check bitmap as well, but have a look 1551 // if it is already allocated first 1552 uint32 offset = pos + block * bitsPerBlock; 1553 if (_CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1554 if (firstSet == -1) { 1555 firstSet = firstGroupBlock + offset; 1556 control->errors |= BFS_BLOCKS_ALREADY_SET; 1557 } 1558 control->stats.already_set++; 1559 } else { 1560 if (firstSet != -1) { 1561 FATAL(("%s: block_run(%d, %u, %u): blocks %Ld - %Ld " 1562 "are already set!\n", type, 1563 (int)run.AllocationGroup(), run.Start(), 1564 run.Length(), firstSet, 1565 firstGroupBlock + offset - 1)); 1566 firstSet = -1; 1567 } 1568 _SetCheckBitmapAt(firstGroupBlock + offset); 1569 } 1570 } 1571 length++; 1572 pos++; 1573 } 1574 1575 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1576 if (firstMissing != -1) { 1577 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not " 1578 "allocated!\n", type, run.AllocationGroup(), run.Start(), 1579 run.Length(), firstMissing, 1580 firstGroupBlock + pos + block * bitsPerBlock - 1)); 1581 } 1582 if (firstSet != -1) { 1583 FATAL(("%s: block_run(%d, %u, %u): blocks %Ld - %Ld are " 1584 "already set!\n", type, (int)run.AllocationGroup(), 1585 run.Start(), run.Length(), firstSet, 1586 firstGroupBlock + pos + block * bitsPerBlock - 1)); 1587 } 1588 } 1589 } 1590 1591 return B_OK; 1592 } 1593 1594 1595 status_t 1596 BlockAllocator::CheckInode(Inode* inode, check_control* control) 1597 { 1598 if (control != NULL && fCheckBitmap == NULL) 1599 return B_NO_INIT; 1600 if (inode == NULL) 1601 return B_BAD_VALUE; 1602 1603 status_t status = CheckBlockRun(inode->BlockRun(), "inode", control); 1604 if (status < B_OK) 1605 return status; 1606 1607 if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) { 1608 // symlinks may not have a valid data stream 1609 if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH) 1610 return B_BAD_DATA; 1611 1612 return B_OK; 1613 } 1614 1615 data_stream* data = &inode->Node().data; 1616 1617 // check the direct range 1618 1619 if (data->max_direct_range) { 1620 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1621 if (data->direct[i].IsZero()) 1622 break; 1623 1624 status = CheckBlockRun(data->direct[i], "direct", control); 1625 if (status < B_OK) 1626 return status; 1627 } 1628 } 1629 1630 CachedBlock cached(fVolume); 1631 1632 // check the indirect range 1633 1634 if (data->max_indirect_range) { 1635 status = CheckBlockRun(data->indirect, "indirect", control); 1636 if (status < B_OK) 1637 return status; 1638 1639 off_t block = fVolume->ToBlock(data->indirect); 1640 1641 for (int32 i = 0; i < data->indirect.Length(); i++) { 1642 block_run* runs = (block_run*)cached.SetTo(block + i); 1643 if (runs == NULL) 1644 RETURN_ERROR(B_IO_ERROR); 1645 1646 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1647 int32 index = 0; 1648 for (; index < runsPerBlock; index++) { 1649 if (runs[index].IsZero()) 1650 break; 1651 1652 status = CheckBlockRun(runs[index], "indirect->run", control); 1653 if (status < B_OK) 1654 return status; 1655 } 1656 if (index < runsPerBlock) 1657 break; 1658 } 1659 } 1660 1661 // check the double indirect range 1662 1663 if (data->max_double_indirect_range) { 1664 status = CheckBlockRun(data->double_indirect, "double indirect", 1665 control); 1666 if (status < B_OK) 1667 return status; 1668 1669 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1670 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1671 1672 CachedBlock cachedDirect(fVolume); 1673 int32 maxIndirectIndex = (data->double_indirect.Length() 1674 << fVolume->BlockShift()) / sizeof(block_run); 1675 1676 for (int32 indirectIndex = 0; indirectIndex < maxIndirectIndex; 1677 indirectIndex++) { 1678 // get the indirect array block 1679 block_run* array = (block_run*)cached.SetTo( 1680 fVolume->ToBlock(data->double_indirect) 1681 + indirectIndex / runsPerBlock); 1682 if (array == NULL) 1683 return B_IO_ERROR; 1684 1685 block_run indirect = array[indirectIndex % runsPerBlock]; 1686 // are we finished yet? 1687 if (indirect.IsZero()) 1688 return B_OK; 1689 1690 status = CheckBlockRun(indirect, "double indirect->runs", control); 1691 if (status < B_OK) 1692 return status; 1693 1694 int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) 1695 / sizeof(block_run); 1696 1697 for (int32 index = 0; index < maxIndex; ) { 1698 block_run* runs = (block_run*)cachedDirect.SetTo( 1699 fVolume->ToBlock(indirect) + index / runsPerBlock); 1700 if (runs == NULL) 1701 return B_IO_ERROR; 1702 1703 do { 1704 // are we finished yet? 1705 if (runs[index % runsPerBlock].IsZero()) 1706 return B_OK; 1707 1708 status = CheckBlockRun(runs[index % runsPerBlock], 1709 "double indirect->runs->run", control); 1710 if (status < B_OK) 1711 return status; 1712 } while ((++index % runsPerArray) != 0); 1713 } 1714 } 1715 } 1716 1717 return B_OK; 1718 } 1719 1720 1721 // #pragma mark - debugger commands 1722 1723 1724 #ifdef BFS_DEBUGGER_COMMANDS 1725 1726 void 1727 BlockAllocator::Dump(int32 index) 1728 { 1729 kprintf("allocation groups: %ld\n", fNumGroups); 1730 kprintf("blocks per group: %ld\n", fBlocksPerGroup); 1731 1732 for (int32 i = 0; i < fNumGroups; i++) { 1733 if (index != -1 && i != index) 1734 continue; 1735 1736 AllocationGroup& group = fGroups[i]; 1737 1738 kprintf("[%3ld] num bits: %lu\n", i, group.NumBits()); 1739 kprintf(" num blocks: %lu\n", group.NumBlocks()); 1740 kprintf(" start: %ld\n", group.Start()); 1741 kprintf(" first free: %ld\n", group.fFirstFree); 1742 kprintf(" largest start: %ld%s\n", group.fLargestStart, 1743 group.fLargestValid ? "" : " (invalid)"); 1744 kprintf(" largest length: %ld\n", group.fLargestLength); 1745 kprintf(" free bits: %ld\n", group.fFreeBits); 1746 } 1747 } 1748 1749 1750 #if BFS_TRACING 1751 static char kTraceBuffer[256]; 1752 1753 1754 int 1755 dump_block_allocator_blocks(int argc, char** argv) 1756 { 1757 if (argc != 3 || !strcmp(argv[1], "--help")) { 1758 kprintf("usage: %s <ptr-to-volume> <block>\n", argv[0]); 1759 return 0; 1760 } 1761 1762 Volume* volume = (Volume*)parse_expression(argv[1]); 1763 off_t block = parse_expression(argv[2]); 1764 1765 // iterate over all tracing entries to find overlapping actions 1766 1767 using namespace BFSBlockTracing; 1768 1769 LazyTraceOutput out(kTraceBuffer, sizeof(kTraceBuffer), 0); 1770 TraceEntryIterator iterator; 1771 while (TraceEntry* _entry = iterator.Next()) { 1772 if (Allocate* entry = dynamic_cast<Allocate*>(_entry)) { 1773 off_t first = volume->ToBlock(entry->Run()); 1774 off_t last = first - 1 + entry->Run().Length(); 1775 if (block >= first && block <= last) { 1776 out.Clear(); 1777 const char* dump = out.DumpEntry(entry); 1778 kprintf("%5ld. %s\n", iterator.Index(), dump); 1779 } 1780 } else if (Free* entry = dynamic_cast<Free*>(_entry)) { 1781 off_t first = volume->ToBlock(entry->Run()); 1782 off_t last = first - 1 + entry->Run().Length(); 1783 if (block >= first && block <= last) { 1784 out.Clear(); 1785 const char* dump = out.DumpEntry(entry); 1786 kprintf("%5ld. %s\n", iterator.Index(), dump); 1787 } 1788 } 1789 } 1790 1791 return 0; 1792 } 1793 #endif 1794 1795 1796 int 1797 dump_block_allocator(int argc, char** argv) 1798 { 1799 int32 group = -1; 1800 if (argc == 3) { 1801 group = parse_expression(argv[2]); 1802 argc--; 1803 } 1804 1805 if (argc != 2 || !strcmp(argv[1], "--help")) { 1806 kprintf("usage: %s <ptr-to-volume> [group]\n", argv[0]); 1807 return 0; 1808 } 1809 1810 Volume* volume = (Volume*)parse_expression(argv[1]); 1811 BlockAllocator& allocator = volume->Allocator(); 1812 1813 allocator.Dump(group); 1814 return 0; 1815 } 1816 1817 #endif // BFS_DEBUGGER_COMMANDS 1818 1819