1 /* 2 * Copyright 2001-2020, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 7 //! Block bitmap handling and allocation policies 8 9 10 #include "BlockAllocator.h" 11 12 #include "Debug.h" 13 #include "Inode.h" 14 #include "Volume.h" 15 16 17 // Things the BlockAllocator should do: 18 19 // - find a range of blocks of a certain size nearby a specific position 20 // - allocating an unsharp range of blocks for pre-allocation 21 // - free blocks 22 // - know how to deal with each allocation, special handling for directories, 23 // files, symlinks, etc. (type sensitive allocation policies) 24 25 // What makes the code complicated is the fact that we are not just reading 26 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 27 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 28 // of partitions will grow even more - so that's not an option. 29 // Instead we are reading in every block when it's used - since an allocation 30 // group can span several blocks in the block bitmap, the AllocationBlock 31 // class is there to make handling those easier. 32 33 // The current implementation is only slightly optimized and could probably 34 // be improved a lot. Furthermore, the allocation policies used here should 35 // have some real world tests. 36 37 #if BFS_TRACING && !defined(FS_SHELL) 38 namespace BFSBlockTracing { 39 40 41 class Allocate : public AbstractTraceEntry { 42 public: 43 Allocate(block_run run) 44 : 45 fRun(run) 46 { 47 Initialized(); 48 } 49 50 virtual void AddDump(TraceOutput& out) 51 { 52 out.Print("bfs:alloc %lu.%u.%u", fRun.AllocationGroup(), 53 fRun.Start(), fRun.Length()); 54 } 55 56 const block_run& Run() const { return fRun; } 57 58 private: 59 block_run fRun; 60 }; 61 62 63 class Free : public AbstractTraceEntry { 64 public: 65 Free(block_run run) 66 : 67 fRun(run) 68 { 69 Initialized(); 70 } 71 72 virtual void AddDump(TraceOutput& out) 73 { 74 out.Print("bfs:free %lu.%u.%u", fRun.AllocationGroup(), 75 fRun.Start(), fRun.Length()); 76 } 77 78 const block_run& Run() const { return fRun; } 79 80 private: 81 block_run fRun; 82 }; 83 84 85 static uint32 86 checksum(const uint8* data, size_t size) 87 { 88 const uint32* data4 = (const uint32*)data; 89 uint32 sum = 0; 90 while (size >= 4) { 91 sum += *data4; 92 data4++; 93 size -= 4; 94 } 95 return sum; 96 } 97 98 99 class Block : public AbstractTraceEntry { 100 public: 101 Block(const char* label, off_t blockNumber, const uint8* data, 102 size_t size, uint32 start = 0, uint32 length = 0) 103 : 104 fBlock(blockNumber), 105 fData(data), 106 fStart(start), 107 fLength(length), 108 fLabel(label) 109 { 110 fSum = checksum(data, size); 111 Initialized(); 112 } 113 114 virtual void AddDump(TraceOutput& out) 115 { 116 out.Print("bfs:%s: block %Ld (%p), sum %lu, s/l %lu/%lu", fLabel, 117 fBlock, fData, fSum, fStart, fLength); 118 } 119 120 private: 121 off_t fBlock; 122 const uint8* fData; 123 uint32 fStart; 124 uint32 fLength; 125 uint32 fSum; 126 const char* fLabel; 127 }; 128 129 130 class BlockChange : public AbstractTraceEntry { 131 public: 132 BlockChange(const char* label, int32 block, uint32 oldData, uint32 newData) 133 : 134 fBlock(block), 135 fOldData(oldData), 136 fNewData(newData), 137 fLabel(label) 138 { 139 Initialized(); 140 } 141 142 virtual void AddDump(TraceOutput& out) 143 { 144 out.Print("bfs:%s: block %ld, %08lx -> %08lx", fLabel, 145 fBlock, fOldData, fNewData); 146 } 147 148 private: 149 int32 fBlock; 150 uint32 fOldData; 151 uint32 fNewData; 152 const char* fLabel; 153 }; 154 155 156 } // namespace BFSBlockTracing 157 158 # define T(x) new(std::nothrow) BFSBlockTracing::x; 159 #else 160 # define T(x) ; 161 #endif 162 163 #ifdef DEBUG_ALLOCATION_GROUPS 164 # define CHECK_ALLOCATION_GROUP(group) _CheckGroup(group) 165 #else 166 # define CHECK_ALLOCATION_GROUP(group) ; 167 #endif 168 169 170 class AllocationBlock : public CachedBlock { 171 public: 172 AllocationBlock(Volume* volume); 173 174 inline void Allocate(uint16 start, uint16 numBlocks); 175 inline void Free(uint16 start, uint16 numBlocks); 176 inline bool IsUsed(uint16 block); 177 178 status_t SetTo(AllocationGroup& group, uint16 block); 179 status_t SetToWritable(Transaction& transaction, AllocationGroup& group, 180 uint16 block); 181 182 uint32 NumBlockBits() const { return fNumBits; } 183 uint32& Block(int32 index) { return ((uint32*)fBlock)[index]; } 184 uint8* Block() const { return (uint8*)fBlock; } 185 186 private: 187 uint32 fNumBits; 188 #ifdef DEBUG 189 bool fWritable; 190 #endif 191 }; 192 193 194 class AllocationGroup { 195 public: 196 AllocationGroup(); 197 198 void AddFreeRange(int32 start, int32 blocks); 199 bool IsFull() const { return fFreeBits == 0; } 200 201 status_t Allocate(Transaction& transaction, uint16 start, int32 length); 202 status_t Free(Transaction& transaction, uint16 start, int32 length); 203 204 uint32 NumBits() const { return fNumBits; } 205 uint32 NumBlocks() const { return fNumBlocks; } 206 int32 Start() const { return fStart; } 207 208 private: 209 friend class BlockAllocator; 210 211 uint32 fNumBits; 212 uint32 fNumBlocks; 213 int32 fStart; 214 int32 fFirstFree; 215 int32 fFreeBits; 216 217 int32 fLargestStart; 218 int32 fLargestLength; 219 bool fLargestValid; 220 }; 221 222 223 AllocationBlock::AllocationBlock(Volume* volume) 224 : CachedBlock(volume) 225 { 226 } 227 228 229 status_t 230 AllocationBlock::SetTo(AllocationGroup& group, uint16 block) 231 { 232 // 8 blocks per byte 233 fNumBits = fVolume->BlockSize() << 3; 234 // the last group may have less bits than the others 235 if ((block + 1) * fNumBits > group.NumBits()) 236 fNumBits = group.NumBits() % fNumBits; 237 238 #ifdef DEBUG 239 fWritable = false; 240 #endif 241 return CachedBlock::SetTo(group.Start() + block); 242 } 243 244 245 status_t 246 AllocationBlock::SetToWritable(Transaction& transaction, AllocationGroup& group, 247 uint16 block) 248 { 249 // 8 blocks per byte 250 fNumBits = fVolume->BlockSize() << 3; 251 // the last group may have less bits in the last block 252 if ((block + 1) * fNumBits > group.NumBits()) 253 fNumBits = group.NumBits() % fNumBits; 254 255 #ifdef DEBUG 256 fWritable = true; 257 #endif 258 return CachedBlock::SetToWritable(transaction, group.Start() + block); 259 } 260 261 262 bool 263 AllocationBlock::IsUsed(uint16 block) 264 { 265 if (block > fNumBits) 266 return true; 267 268 // the block bitmap is accessed in 32-bit blocks 269 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 270 } 271 272 273 void 274 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 275 { 276 ASSERT(start < fNumBits); 277 ASSERT(uint32(start + numBlocks) <= fNumBits); 278 #ifdef DEBUG 279 ASSERT(fWritable); 280 #endif 281 282 T(Block("b-alloc-in", fBlockNumber, fBlock, fVolume->BlockSize(), 283 start, numBlocks)); 284 285 int32 block = start >> 5; 286 287 while (numBlocks > 0) { 288 uint32 mask = 0; 289 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 290 mask |= 1UL << i; 291 292 T(BlockChange("b-alloc", block, Block(block), 293 Block(block) | HOST_ENDIAN_TO_BFS_INT32(mask))); 294 295 #if KDEBUG 296 // check for already set blocks 297 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32*)fBlock)[block]) { 298 FATAL(("AllocationBlock::Allocate(): some blocks are already " 299 "allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 300 panic("blocks already set!"); 301 } 302 #endif 303 304 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 305 start = 0; 306 } 307 T(Block("b-alloc-out", fBlockNumber, fBlock, fVolume->BlockSize(), 308 start, numBlocks)); 309 } 310 311 312 void 313 AllocationBlock::Free(uint16 start, uint16 numBlocks) 314 { 315 ASSERT(start < fNumBits); 316 ASSERT(uint32(start + numBlocks) <= fNumBits); 317 #ifdef DEBUG 318 ASSERT(fWritable); 319 #endif 320 321 int32 block = start >> 5; 322 323 while (numBlocks > 0) { 324 uint32 mask = 0; 325 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 326 mask |= 1UL << (i % 32); 327 328 T(BlockChange("b-free", block, Block(block), 329 Block(block) & HOST_ENDIAN_TO_BFS_INT32(~mask))); 330 331 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 332 start = 0; 333 } 334 } 335 336 337 // #pragma mark - 338 339 340 /*! The allocation groups are created and initialized in 341 BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap() 342 respectively. 343 */ 344 AllocationGroup::AllocationGroup() 345 : 346 fFirstFree(-1), 347 fFreeBits(0), 348 fLargestValid(false) 349 { 350 } 351 352 353 void 354 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 355 { 356 //D(if (blocks > 512) 357 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 358 359 if (fFirstFree == -1) 360 fFirstFree = start; 361 362 if (!fLargestValid || fLargestLength < blocks) { 363 fLargestStart = start; 364 fLargestLength = blocks; 365 fLargestValid = true; 366 } 367 368 fFreeBits += blocks; 369 } 370 371 372 /*! Allocates the specified run in the allocation group. 373 Doesn't check if the run is valid or already allocated partially, nor 374 does it maintain the free ranges hints or the volume's used blocks count. 375 It only does the low-level work of allocating some bits in the block bitmap. 376 Assumes that the block bitmap lock is hold. 377 */ 378 status_t 379 AllocationGroup::Allocate(Transaction& transaction, uint16 start, int32 length) 380 { 381 ASSERT(start + length <= (int32)fNumBits); 382 383 // Update the allocation group info 384 // TODO: this info will be incorrect if something goes wrong later 385 // Note, the fFirstFree block doesn't have to be really free 386 if (start == fFirstFree) 387 fFirstFree = start + length; 388 fFreeBits -= length; 389 390 if (fLargestValid) { 391 bool cut = false; 392 if (fLargestStart == start) { 393 // cut from start 394 fLargestStart += length; 395 fLargestLength -= length; 396 cut = true; 397 } else if (start > fLargestStart 398 && start < fLargestStart + fLargestLength) { 399 // cut from end 400 fLargestLength = start - fLargestStart; 401 cut = true; 402 } 403 if (cut && (fLargestLength < fLargestStart 404 || fLargestLength 405 < (int32)fNumBits - (fLargestStart + fLargestLength))) { 406 // might not be the largest block anymore 407 fLargestValid = false; 408 } 409 } 410 411 Volume* volume = transaction.GetVolume(); 412 413 // calculate block in the block bitmap and position within 414 uint32 bitsPerBlock = volume->BlockSize() << 3; 415 uint32 block = start / bitsPerBlock; 416 start = start % bitsPerBlock; 417 418 AllocationBlock cached(volume); 419 420 while (length > 0) { 421 if (cached.SetToWritable(transaction, *this, block) < B_OK) { 422 fLargestValid = false; 423 RETURN_ERROR(B_IO_ERROR); 424 } 425 426 uint32 numBlocks = length; 427 if (start + numBlocks > cached.NumBlockBits()) 428 numBlocks = cached.NumBlockBits() - start; 429 430 cached.Allocate(start, numBlocks); 431 432 length -= numBlocks; 433 start = 0; 434 block++; 435 } 436 437 return B_OK; 438 } 439 440 441 /*! Frees the specified run in the allocation group. 442 Doesn't check if the run is valid or was not completely allocated, nor 443 does it maintain the free ranges hints or the volume's used blocks count. 444 It only does the low-level work of freeing some bits in the block bitmap. 445 Assumes that the block bitmap lock is hold. 446 */ 447 status_t 448 AllocationGroup::Free(Transaction& transaction, uint16 start, int32 length) 449 { 450 ASSERT(start + length <= (int32)fNumBits); 451 452 // Update the allocation group info 453 // TODO: this info will be incorrect if something goes wrong later 454 if (fFirstFree > start) 455 fFirstFree = start; 456 fFreeBits += length; 457 458 // The range to be freed cannot be part of the valid largest range 459 ASSERT(!fLargestValid || start + length <= fLargestStart 460 || start > fLargestStart); 461 462 if (fLargestValid 463 && (start + length == fLargestStart 464 || fLargestStart + fLargestLength == start 465 || (start < fLargestStart && fLargestStart > fLargestLength) 466 || (start > fLargestStart 467 && (int32)fNumBits - (fLargestStart + fLargestLength) 468 > fLargestLength))) { 469 fLargestValid = false; 470 } 471 472 Volume* volume = transaction.GetVolume(); 473 474 // calculate block in the block bitmap and position within 475 uint32 bitsPerBlock = volume->BlockSize() << 3; 476 uint32 block = start / bitsPerBlock; 477 start = start % bitsPerBlock; 478 479 AllocationBlock cached(volume); 480 481 while (length > 0) { 482 if (cached.SetToWritable(transaction, *this, block) < B_OK) 483 RETURN_ERROR(B_IO_ERROR); 484 485 T(Block("free-1", block, cached.Block(), volume->BlockSize())); 486 uint16 freeLength = length; 487 if (uint32(start + length) > cached.NumBlockBits()) 488 freeLength = cached.NumBlockBits() - start; 489 490 cached.Free(start, freeLength); 491 492 length -= freeLength; 493 start = 0; 494 T(Block("free-2", block, cached.Block(), volume->BlockSize())); 495 block++; 496 } 497 return B_OK; 498 } 499 500 501 // #pragma mark - 502 503 504 BlockAllocator::BlockAllocator(Volume* volume) 505 : 506 fVolume(volume), 507 fGroups(NULL) 508 //fCheckBitmap(NULL), 509 //fCheckCookie(NULL) 510 { 511 recursive_lock_init(&fLock, "bfs allocator"); 512 } 513 514 515 BlockAllocator::~BlockAllocator() 516 { 517 recursive_lock_destroy(&fLock); 518 delete[] fGroups; 519 } 520 521 522 status_t 523 BlockAllocator::Initialize(bool full) 524 { 525 fNumGroups = fVolume->AllocationGroups(); 526 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 527 //fNumBlocks = (fVolume->NumBlocks() + fVolume->BlockSize() * 8 - 1) 528 /// (fVolume->BlockSize() * 8); 529 fNumBlocks = fVolume->NumBitmapBlocks(); 530 531 fGroups = new(std::nothrow) AllocationGroup[fNumGroups]; 532 if (fGroups == NULL) 533 return B_NO_MEMORY; 534 535 if (!full) 536 return B_OK; 537 538 recursive_lock_lock(&fLock); 539 // the lock will be released by the _Initialize() method 540 541 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize, 542 "bfs block allocator", B_LOW_PRIORITY, this); 543 if (id < B_OK) 544 return _Initialize(this); 545 546 recursive_lock_transfer_lock(&fLock, id); 547 548 return resume_thread(id); 549 } 550 551 552 status_t 553 BlockAllocator::InitializeAndClearBitmap(Transaction& transaction) 554 { 555 status_t status = Initialize(false); 556 if (status != B_OK) 557 return status; 558 559 uint32 numBits = 8 * fBlocksPerGroup * fVolume->BlockSize(); 560 uint32 blockShift = fVolume->BlockShift(); 561 562 uint32* buffer = (uint32*)malloc(numBits >> 3); 563 if (buffer == NULL) 564 RETURN_ERROR(B_NO_MEMORY); 565 566 memset(buffer, 0, numBits >> 3); 567 568 off_t offset = 1; 569 // the bitmap starts directly after the superblock 570 571 // initialize the AllocationGroup objects and clear the on-disk bitmap 572 573 for (int32 i = 0; i < fNumGroups; i++) { 574 if (write_pos(fVolume->Device(), offset << blockShift, buffer, 575 fBlocksPerGroup << blockShift) < B_OK) { 576 free(buffer); 577 return B_ERROR; 578 } 579 580 // the last allocation group may contain less blocks than the others 581 if (i == fNumGroups - 1) { 582 fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits; 583 fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) 584 >> (blockShift + 3)); 585 } else { 586 fGroups[i].fNumBits = numBits; 587 fGroups[i].fNumBlocks = fBlocksPerGroup; 588 } 589 fGroups[i].fStart = offset; 590 fGroups[i].fFirstFree = fGroups[i].fLargestStart = 0; 591 fGroups[i].fFreeBits = fGroups[i].fLargestLength = fGroups[i].fNumBits; 592 fGroups[i].fLargestValid = true; 593 594 offset += fBlocksPerGroup; 595 } 596 free(buffer); 597 598 // reserve the boot block, the log area, and the block bitmap itself 599 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 600 601 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 602 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 603 return B_ERROR; 604 } 605 fVolume->SuperBlock().used_blocks 606 = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 607 608 return B_OK; 609 } 610 611 612 status_t 613 BlockAllocator::_Initialize(BlockAllocator* allocator) 614 { 615 // The lock must already be held at this point 616 RecursiveLocker locker(allocator->fLock, true); 617 618 Volume* volume = allocator->fVolume; 619 uint32 blocks = allocator->fBlocksPerGroup; 620 uint32 blockShift = volume->BlockShift(); 621 off_t freeBlocks = 0; 622 623 uint32* buffer = (uint32*)malloc(blocks << blockShift); 624 if (buffer == NULL) 625 RETURN_ERROR(B_NO_MEMORY); 626 627 AllocationGroup* groups = allocator->fGroups; 628 off_t offset = 1; 629 uint32 bitsPerGroup = 8 * (blocks << blockShift); 630 int32 numGroups = allocator->fNumGroups; 631 632 for (int32 i = 0; i < numGroups; i++) { 633 if (read_pos(volume->Device(), offset << blockShift, buffer, 634 blocks << blockShift) < B_OK) 635 break; 636 637 // the last allocation group may contain less blocks than the others 638 if (i == numGroups - 1) { 639 groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup; 640 groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) 641 >> (blockShift + 3)); 642 } else { 643 groups[i].fNumBits = bitsPerGroup; 644 groups[i].fNumBlocks = blocks; 645 } 646 groups[i].fStart = offset; 647 648 // finds all free ranges in this allocation group 649 int32 start = -1, range = 0; 650 int32 numBits = groups[i].fNumBits, bit = 0; 651 int32 count = (numBits + 31) / 32; 652 653 for (int32 k = 0; k < count; k++) { 654 for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) { 655 if (buffer[k] & (1UL << j)) { 656 // block is in use 657 if (range > 0) { 658 groups[i].AddFreeRange(start, range); 659 range = 0; 660 } 661 } else if (range++ == 0) { 662 // block is free, start new free range 663 start = bit; 664 } 665 } 666 } 667 if (range) 668 groups[i].AddFreeRange(start, range); 669 670 freeBlocks += groups[i].fFreeBits; 671 672 offset += blocks; 673 } 674 free(buffer); 675 676 // check if block bitmap and log area are reserved 677 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 678 679 if (allocator->CheckBlocks(0, reservedBlocks) != B_OK) { 680 if (volume->IsReadOnly()) { 681 FATAL(("Space for block bitmap or log area is not reserved " 682 "(volume is mounted read-only)!\n")); 683 } else { 684 Transaction transaction(volume, 0); 685 if (groups[0].Allocate(transaction, 0, reservedBlocks) != B_OK) { 686 FATAL(("Could not allocate reserved space for block " 687 "bitmap/log!\n")); 688 volume->Panic(); 689 } else { 690 transaction.Done(); 691 FATAL(("Space for block bitmap or log area was not " 692 "reserved!\n")); 693 } 694 } 695 } 696 697 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 698 if (volume->UsedBlocks() != usedBlocks) { 699 // If the disk in a dirty state at mount time, it's 700 // normal that the values don't match 701 INFORM(("volume reports %" B_PRIdOFF " used blocks, correct is %" 702 B_PRIdOFF "\n", volume->UsedBlocks(), usedBlocks)); 703 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 704 } 705 706 return B_OK; 707 } 708 709 710 void 711 BlockAllocator::Uninitialize() 712 { 713 // We only have to make sure that the initializer thread isn't running 714 // anymore. 715 recursive_lock_lock(&fLock); 716 } 717 718 719 /*! Tries to allocate between \a minimum, and \a maximum blocks starting 720 at group \a groupIndex with offset \a start. The resulting allocation 721 is put into \a run. 722 723 The number of allocated blocks is always a multiple of \a minimum which 724 has to be a power of two value. 725 */ 726 status_t 727 BlockAllocator::AllocateBlocks(Transaction& transaction, int32 groupIndex, 728 uint16 start, uint16 maximum, uint16 minimum, block_run& run) 729 { 730 if (maximum == 0) 731 return B_BAD_VALUE; 732 733 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", 734 groupIndex, start, maximum, minimum)); 735 736 AllocationBlock cached(fVolume); 737 RecursiveLocker lock(fLock); 738 739 uint32 bitsPerFullBlock = fVolume->BlockSize() << 3; 740 741 // Find the block_run that can fulfill the request best 742 int32 bestGroup = -1; 743 int32 bestStart = -1; 744 int32 bestLength = -1; 745 746 for (int32 i = 0; i < fNumGroups + 1; i++, groupIndex++, start = 0) { 747 groupIndex = groupIndex % fNumGroups; 748 AllocationGroup& group = fGroups[groupIndex]; 749 750 CHECK_ALLOCATION_GROUP(groupIndex); 751 752 if (start >= group.NumBits() || group.IsFull()) 753 continue; 754 755 // The wanted maximum is smaller than the largest free block in the 756 // group or already smaller than the minimum 757 758 if (start < group.fFirstFree) 759 start = group.fFirstFree; 760 761 if (group.fLargestValid) { 762 if (group.fLargestLength < bestLength) 763 continue; 764 765 if (group.fLargestStart >= start) { 766 if (group.fLargestLength >= bestLength) { 767 bestGroup = groupIndex; 768 bestStart = group.fLargestStart; 769 bestLength = group.fLargestLength; 770 771 if (bestLength >= maximum) 772 break; 773 } 774 775 // We know everything about this group we have to, let's skip 776 // to the next 777 continue; 778 } 779 } 780 781 // There may be more than one block per allocation group - and 782 // we iterate through it to find a place for the allocation. 783 // (one allocation can't exceed one allocation group) 784 785 uint32 block = start / (fVolume->BlockSize() << 3); 786 int32 currentStart = 0, currentLength = 0; 787 int32 groupLargestStart = -1; 788 int32 groupLargestLength = -1; 789 int32 currentBit = start; 790 bool canFindGroupLargest = start == 0; 791 792 for (; block < group.NumBlocks(); block++) { 793 if (cached.SetTo(group, block) < B_OK) 794 RETURN_ERROR(B_ERROR); 795 796 T(Block("alloc-in", group.Start() + block, cached.Block(), 797 fVolume->BlockSize(), groupIndex, currentStart)); 798 799 // find a block large enough to hold the allocation 800 for (uint32 bit = start % bitsPerFullBlock; 801 bit < cached.NumBlockBits(); bit++) { 802 if (!cached.IsUsed(bit)) { 803 if (currentLength == 0) { 804 // start new range 805 currentStart = currentBit; 806 } 807 808 // have we found a range large enough to hold numBlocks? 809 if (++currentLength >= maximum) { 810 bestGroup = groupIndex; 811 bestStart = currentStart; 812 bestLength = currentLength; 813 break; 814 } 815 } else { 816 if (currentLength) { 817 // end of a range 818 if (currentLength > bestLength) { 819 bestGroup = groupIndex; 820 bestStart = currentStart; 821 bestLength = currentLength; 822 } 823 if (currentLength > groupLargestLength) { 824 groupLargestStart = currentStart; 825 groupLargestLength = currentLength; 826 } 827 currentLength = 0; 828 } 829 if ((int32)group.NumBits() - currentBit 830 <= groupLargestLength) { 831 // We can't find a bigger block in this group anymore, 832 // let's skip the rest. 833 block = group.NumBlocks(); 834 break; 835 } 836 } 837 currentBit++; 838 } 839 840 T(Block("alloc-out", block, cached.Block(), 841 fVolume->BlockSize(), groupIndex, currentStart)); 842 843 if (bestLength >= maximum) { 844 canFindGroupLargest = false; 845 break; 846 } 847 848 // start from the beginning of the next block 849 start = 0; 850 } 851 852 if (currentBit == (int32)group.NumBits()) { 853 if (currentLength > bestLength) { 854 bestGroup = groupIndex; 855 bestStart = currentStart; 856 bestLength = currentLength; 857 } 858 if (canFindGroupLargest && currentLength > groupLargestLength) { 859 groupLargestStart = currentStart; 860 groupLargestLength = currentLength; 861 } 862 } 863 864 if (canFindGroupLargest && !group.fLargestValid 865 && groupLargestLength >= 0) { 866 group.fLargestStart = groupLargestStart; 867 group.fLargestLength = groupLargestLength; 868 group.fLargestValid = true; 869 } 870 871 if (bestLength >= maximum) 872 break; 873 } 874 875 // If we found a suitable range, mark the blocks as in use, and 876 // write the updated block bitmap back to disk 877 if (bestLength < minimum) 878 return B_DEVICE_FULL; 879 880 if (bestLength > maximum) 881 bestLength = maximum; 882 else if (minimum > 1) { 883 // make sure bestLength is a multiple of minimum 884 bestLength = round_down(bestLength, minimum); 885 } 886 887 if (fGroups[bestGroup].Allocate(transaction, bestStart, bestLength) != B_OK) 888 RETURN_ERROR(B_IO_ERROR); 889 890 CHECK_ALLOCATION_GROUP(bestGroup); 891 892 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(bestGroup); 893 run.start = HOST_ENDIAN_TO_BFS_INT16(bestStart); 894 run.length = HOST_ENDIAN_TO_BFS_INT16(bestLength); 895 896 fVolume->SuperBlock().used_blocks 897 = HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + bestLength); 898 // We are not writing back the disk's superblock - it's 899 // either done by the journaling code, or when the disk 900 // is unmounted. 901 // If the value is not correct at mount time, it will be 902 // fixed anyway. 903 904 // We need to flush any remaining blocks in the new allocation to make sure 905 // they won't interfere with the file cache. 906 block_cache_discard(fVolume->BlockCache(), fVolume->ToBlock(run), 907 run.Length()); 908 909 T(Allocate(run)); 910 return B_OK; 911 } 912 913 914 status_t 915 BlockAllocator::AllocateForInode(Transaction& transaction, 916 const block_run* parent, mode_t type, block_run& run) 917 { 918 // Apply some allocation policies here (AllocateBlocks() will break them 919 // if necessary) - we will start with those described in Dominic Giampaolo's 920 // "Practical File System Design", and see how good they work 921 922 // Files are going in the same allocation group as its parent, 923 // sub-directories will be inserted 8 allocation groups after 924 // the one of the parent 925 uint16 group = parent->AllocationGroup(); 926 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 927 group += 8; 928 929 return AllocateBlocks(transaction, group, 0, 1, 1, run); 930 } 931 932 933 status_t 934 BlockAllocator::Allocate(Transaction& transaction, Inode* inode, 935 off_t numBlocks, block_run& run, uint16 minimum) 936 { 937 if (numBlocks <= 0) 938 return B_ERROR; 939 940 // one block_run can't hold more data than there is in one allocation group 941 if (numBlocks > fGroups[0].NumBits()) 942 numBlocks = fGroups[0].NumBits(); 943 944 // since block_run.length is uint16, the largest number of blocks that 945 // can be covered by a block_run is 65535 946 // TODO: if we drop compatibility, couldn't we do this any better? 947 // There are basically two possibilities: 948 // a) since a length of zero doesn't have any sense, take that for 65536 - 949 // but that could cause many problems (bugs) in other areas 950 // b) reduce the maximum amount of blocks per block_run, so that the 951 // remaining number of free blocks can be used in a useful manner 952 // (like 4 blocks) - but that would also reduce the maximum file size 953 // c) have BlockRun::Length() return (length + 1). 954 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 955 numBlocks = MAX_BLOCK_RUN_LENGTH; 956 957 // Apply some allocation policies here (AllocateBlocks() will break them 958 // if necessary) 959 uint16 group = inode->BlockRun().AllocationGroup(); 960 uint16 start = 0; 961 962 // Are there already allocated blocks? (then just try to allocate near the 963 // last one) 964 if (inode->Size() > 0) { 965 const data_stream& data = inode->Node().data; 966 // TODO: we currently don't care for when the data stream 967 // is already grown into the indirect ranges 968 if (data.max_double_indirect_range == 0 969 && data.max_indirect_range == 0) { 970 // Since size > 0, there must be a valid block run in this stream 971 int32 last = 0; 972 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 973 if (data.direct[last + 1].IsZero()) 974 break; 975 976 group = data.direct[last].AllocationGroup(); 977 start = data.direct[last].Start() + data.direct[last].Length(); 978 } 979 } else if (inode->IsContainer() || inode->IsSymLink()) { 980 // directory and symbolic link data will go in the same allocation 981 // group as the inode is in but after the inode data 982 start = inode->BlockRun().Start(); 983 } else { 984 // file data will start in the next allocation group 985 group = inode->BlockRun().AllocationGroup() + 1; 986 } 987 988 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 989 } 990 991 992 status_t 993 BlockAllocator::Free(Transaction& transaction, block_run run) 994 { 995 RecursiveLocker lock(fLock); 996 997 int32 group = run.AllocationGroup(); 998 uint16 start = run.Start(); 999 uint16 length = run.Length(); 1000 1001 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, 1002 length)); 1003 T(Free(run)); 1004 1005 // doesn't use Volume::IsValidBlockRun() here because it can check better 1006 // against the group size (the last group may have a different length) 1007 if (group < 0 || group >= fNumGroups 1008 || start > fGroups[group].NumBits() 1009 || uint32(start + length) > fGroups[group].NumBits() 1010 || length == 0) { 1011 FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group, 1012 start, length)); 1013 DEBUGGER(("tried to free invalid block_run")); 1014 return B_BAD_VALUE; 1015 } 1016 // check if someone tries to free reserved areas at the beginning of the 1017 // drive 1018 if (group == 0 1019 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 1020 FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group, 1021 start, length)); 1022 DEBUGGER(("tried to free reserved block")); 1023 return B_BAD_VALUE; 1024 } 1025 #ifdef DEBUG 1026 if (CheckBlockRun(run) != B_OK) 1027 return B_BAD_DATA; 1028 #endif 1029 1030 CHECK_ALLOCATION_GROUP(group); 1031 1032 if (fGroups[group].Free(transaction, start, length) != B_OK) 1033 RETURN_ERROR(B_IO_ERROR); 1034 1035 CHECK_ALLOCATION_GROUP(group); 1036 1037 #ifdef DEBUG 1038 if (CheckBlockRun(run, NULL, false) != B_OK) { 1039 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just " 1040 "freed)\n")); 1041 } 1042 #endif 1043 1044 fVolume->SuperBlock().used_blocks = 1045 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 1046 return B_OK; 1047 } 1048 1049 1050 #ifdef DEBUG_FRAGMENTER 1051 void 1052 BlockAllocator::Fragment() 1053 { 1054 AllocationBlock cached(fVolume); 1055 RecursiveLocker lock(fLock); 1056 1057 // only leave 4 block holes 1058 static const uint32 kMask = 0x0f0f0f0f; 1059 uint32 valuesPerBlock = fVolume->BlockSize() / 4; 1060 1061 for (int32 i = 0; i < fNumGroups; i++) { 1062 AllocationGroup& group = fGroups[i]; 1063 1064 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1065 Transaction transaction(fVolume, 0); 1066 1067 if (cached.SetToWritable(transaction, group, block) != B_OK) 1068 return; 1069 1070 for (int32 index = 0; index < valuesPerBlock; index++) { 1071 cached.Block(index) |= HOST_ENDIAN_TO_BFS_INT32(kMask); 1072 } 1073 1074 transaction.Done(); 1075 } 1076 } 1077 } 1078 #endif // DEBUG_FRAGMENTER 1079 1080 1081 #ifdef DEBUG_ALLOCATION_GROUPS 1082 void 1083 BlockAllocator::_CheckGroup(int32 groupIndex) const 1084 { 1085 AllocationBlock cached(fVolume); 1086 ASSERT_LOCKED_RECURSIVE(&fLock); 1087 1088 AllocationGroup& group = fGroups[groupIndex]; 1089 1090 int32 currentStart = 0, currentLength = 0; 1091 int32 firstFree = -1; 1092 int32 largestStart = -1; 1093 int32 largestLength = 0; 1094 int32 currentBit = 0; 1095 1096 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1097 if (cached.SetTo(group, block) < B_OK) { 1098 panic("setting group block %d failed\n", (int)block); 1099 return; 1100 } 1101 1102 for (uint32 bit = 0; bit < cached.NumBlockBits(); bit++) { 1103 if (!cached.IsUsed(bit)) { 1104 if (firstFree < 0) { 1105 firstFree = currentBit; 1106 if (!group.fLargestValid) { 1107 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1108 // mostly harmless but noteworthy 1109 dprintf("group %d first free too late: should be " 1110 "%d, is %d\n", (int)groupIndex, (int)firstFree, 1111 (int)group.fFirstFree); 1112 } 1113 return; 1114 } 1115 } 1116 1117 if (currentLength == 0) { 1118 // start new range 1119 currentStart = currentBit; 1120 } 1121 currentLength++; 1122 } else if (currentLength) { 1123 // end of a range 1124 if (currentLength > largestLength) { 1125 largestStart = currentStart; 1126 largestLength = currentLength; 1127 } 1128 currentLength = 0; 1129 } 1130 currentBit++; 1131 } 1132 } 1133 1134 if (currentLength > largestLength) { 1135 largestStart = currentStart; 1136 largestLength = currentLength; 1137 } 1138 1139 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1140 // mostly harmless but noteworthy 1141 dprintf("group %d first free too late: should be %d, is %d\n", 1142 (int)groupIndex, (int)firstFree, (int)group.fFirstFree); 1143 } 1144 if (group.fLargestValid 1145 && (largestStart != group.fLargestStart 1146 || largestLength != group.fLargestLength)) { 1147 panic("bfs %p: group %d largest differs: %d.%d, checked %d.%d.\n", 1148 fVolume, (int)groupIndex, (int)group.fLargestStart, 1149 (int)group.fLargestLength, (int)largestStart, (int)largestLength); 1150 } 1151 } 1152 #endif // DEBUG_ALLOCATION_GROUPS 1153 1154 1155 status_t 1156 BlockAllocator::Trim(uint64 offset, uint64 size, uint64& trimmedSize) 1157 { 1158 const uint32 kTrimRanges = 128; 1159 fs_trim_data* trimData = (fs_trim_data*)malloc(sizeof(fs_trim_data) 1160 + sizeof(uint64) * kTrimRanges); 1161 if (trimData == NULL) 1162 return B_NO_MEMORY; 1163 1164 MemoryDeleter deleter(trimData); 1165 RecursiveLocker locker(fLock); 1166 1167 // TODO: take given offset and size into account! 1168 int32 lastGroup = fNumGroups - 1; 1169 uint32 firstBlock = 0; 1170 uint32 firstBit = 0; 1171 uint64 currentBlock = 0; 1172 uint32 blockShift = fVolume->BlockShift(); 1173 1174 uint64 firstFree = 0; 1175 size_t freeLength = 0; 1176 1177 trimData->range_count = 0; 1178 trimmedSize = 0; 1179 1180 AllocationBlock cached(fVolume); 1181 for (int32 groupIndex = 0; groupIndex <= lastGroup; groupIndex++) { 1182 AllocationGroup& group = fGroups[groupIndex]; 1183 1184 for (uint32 block = firstBlock; block < group.NumBlocks(); block++) { 1185 cached.SetTo(group, block); 1186 1187 for (uint32 i = firstBit; i < cached.NumBlockBits(); i++) { 1188 if (cached.IsUsed(i)) { 1189 // Block is in use 1190 if (freeLength > 0) { 1191 status_t status = _TrimNext(*trimData, kTrimRanges, 1192 firstFree << blockShift, freeLength << blockShift, 1193 false, trimmedSize); 1194 if (status != B_OK) 1195 return status; 1196 1197 freeLength = 0; 1198 } 1199 } else if (freeLength++ == 0) { 1200 // Block is free, start new free range 1201 firstFree = currentBlock; 1202 } 1203 1204 currentBlock++; 1205 } 1206 } 1207 1208 firstBlock = 0; 1209 firstBit = 0; 1210 } 1211 1212 return _TrimNext(*trimData, kTrimRanges, firstFree << blockShift, 1213 freeLength << blockShift, true, trimmedSize); 1214 } 1215 1216 1217 // #pragma mark - 1218 1219 1220 /*! Checks whether or not the specified block range is allocated or not, 1221 depending on the \a allocated argument. 1222 */ 1223 status_t 1224 BlockAllocator::CheckBlocks(off_t start, off_t length, bool allocated, 1225 off_t* firstError) 1226 { 1227 if (start < 0 || start + length > fVolume->NumBlocks()) 1228 return B_BAD_VALUE; 1229 1230 off_t block = start; 1231 1232 int32 group = start >> fVolume->AllocationGroupShift(); 1233 uint32 bitmapBlock = start / (fVolume->BlockSize() << 3); 1234 uint32 blockOffset = start % (fVolume->BlockSize() << 3); 1235 1236 uint32 groupBlock = bitmapBlock % fBlocksPerGroup; 1237 1238 AllocationBlock cached(fVolume); 1239 1240 while (groupBlock < fGroups[group].NumBlocks() && length > 0) { 1241 if (cached.SetTo(fGroups[group], groupBlock) != B_OK) 1242 RETURN_ERROR(B_IO_ERROR); 1243 1244 for (; blockOffset < cached.NumBlockBits() && length > 0; 1245 blockOffset++, length--, block++) { 1246 if (cached.IsUsed(blockOffset) != allocated) { 1247 PRINT(("CheckBlocks: Erroneous block (group = %ld, " 1248 "groupBlock = %ld, blockOffset = %ld)!\n", 1249 group, groupBlock, blockOffset)); 1250 1251 if (firstError) 1252 *firstError = block; 1253 1254 return B_BAD_DATA; 1255 } 1256 } 1257 1258 blockOffset = 0; 1259 1260 if (++groupBlock >= fGroups[group].NumBlocks()) { 1261 groupBlock = 0; 1262 group++; 1263 } 1264 } 1265 1266 return B_OK; 1267 } 1268 1269 1270 bool 1271 BlockAllocator::IsValidBlockRun(block_run run) 1272 { 1273 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1274 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1275 || uint32(run.Start() + run.Length()) 1276 > fGroups[run.AllocationGroup()].fNumBits 1277 || run.length == 0) { 1278 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, 1279 run.AllocationGroup(), run.Start(), run.Length())); 1280 return false; 1281 } 1282 return true; 1283 } 1284 1285 1286 status_t 1287 BlockAllocator::CheckBlockRun(block_run run, const char* type, bool allocated) 1288 { 1289 if (!IsValidBlockRun(run)) 1290 return B_BAD_DATA; 1291 1292 status_t status = CheckBlocks(fVolume->ToBlock(run), run.Length(), 1293 allocated); 1294 if (status != B_OK) { 1295 PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated!\n", 1296 type, run.AllocationGroup(), run.Start(), run.Length())); 1297 } 1298 1299 return status; 1300 } 1301 1302 1303 status_t 1304 BlockAllocator::_AddTrim(fs_trim_data& trimData, uint32 maxRanges, 1305 uint64 offset, uint64 size) 1306 { 1307 if (trimData.range_count < maxRanges && size > 0) { 1308 trimData.ranges[trimData.range_count].offset = offset; 1309 trimData.ranges[trimData.range_count].size = size; 1310 trimData.range_count++; 1311 return true; 1312 } 1313 1314 return false; 1315 } 1316 1317 1318 status_t 1319 BlockAllocator::_TrimNext(fs_trim_data& trimData, uint32 maxRanges, 1320 uint64 offset, uint64 size, bool force, uint64& trimmedSize) 1321 { 1322 PRINT(("_TrimNext(index %" B_PRIu32 ", offset %" B_PRIu64 ", size %" 1323 B_PRIu64 ")\n", trimData.range_count, offset, size)); 1324 1325 bool pushed = _AddTrim(trimData, maxRanges, offset, size); 1326 1327 if (!pushed || force) { 1328 // Trim now 1329 trimData.trimmed_size = 0; 1330 dprintf("TRIM FS:\n"); 1331 for (uint32 i = 0; i < trimData.range_count; i++) { 1332 dprintf("[%3" B_PRIu32 "] %" B_PRIu64 " : %" B_PRIu64 "\n", i, 1333 trimData.ranges[i].offset, trimData.ranges[i].size); 1334 } 1335 if (ioctl(fVolume->Device(), B_TRIM_DEVICE, &trimData, 1336 sizeof(fs_trim_data)) != 0) { 1337 return errno; 1338 } 1339 1340 trimmedSize += trimData.trimmed_size; 1341 trimData.range_count = 0; 1342 } 1343 1344 if (!pushed) 1345 _AddTrim(trimData, maxRanges, offset, size); 1346 1347 return B_OK; 1348 } 1349 1350 1351 // #pragma mark - debugger commands 1352 1353 1354 #ifdef BFS_DEBUGGER_COMMANDS 1355 1356 void 1357 BlockAllocator::Dump(int32 index) 1358 { 1359 kprintf("allocation groups: %" B_PRId32 " (base %p)\n", fNumGroups, fGroups); 1360 kprintf("blocks per group: %" B_PRId32 "\n", fBlocksPerGroup); 1361 1362 for (int32 i = 0; i < fNumGroups; i++) { 1363 if (index != -1 && i != index) 1364 continue; 1365 1366 AllocationGroup& group = fGroups[i]; 1367 1368 kprintf("[%3" B_PRId32 "] num bits: %" B_PRIu32 " (%p)\n", i, 1369 group.NumBits(), &group); 1370 kprintf(" num blocks: %" B_PRIu32 "\n", group.NumBlocks()); 1371 kprintf(" start: %" B_PRId32 "\n", group.Start()); 1372 kprintf(" first free: %" B_PRId32 "\n", group.fFirstFree); 1373 kprintf(" largest start: %" B_PRId32 "%s\n", group.fLargestStart, 1374 group.fLargestValid ? "" : " (invalid)"); 1375 kprintf(" largest length: %" B_PRId32 "\n", group.fLargestLength); 1376 kprintf(" free bits: %" B_PRId32 "\n", group.fFreeBits); 1377 } 1378 } 1379 1380 1381 #if BFS_TRACING 1382 static char kTraceBuffer[256]; 1383 1384 1385 int 1386 dump_block_allocator_blocks(int argc, char** argv) 1387 { 1388 if (argc != 3 || !strcmp(argv[1], "--help")) { 1389 kprintf("usage: %s <ptr-to-volume> <block>\n", argv[0]); 1390 return 0; 1391 } 1392 1393 Volume* volume = (Volume*)parse_expression(argv[1]); 1394 off_t block = parse_expression(argv[2]); 1395 1396 // iterate over all tracing entries to find overlapping actions 1397 1398 using namespace BFSBlockTracing; 1399 1400 LazyTraceOutput out(kTraceBuffer, sizeof(kTraceBuffer), 0); 1401 TraceEntryIterator iterator; 1402 while (TraceEntry* _entry = iterator.Next()) { 1403 if (Allocate* entry = dynamic_cast<Allocate*>(_entry)) { 1404 off_t first = volume->ToBlock(entry->Run()); 1405 off_t last = first - 1 + entry->Run().Length(); 1406 if (block >= first && block <= last) { 1407 out.Clear(); 1408 const char* dump = out.DumpEntry(entry); 1409 kprintf("%5ld. %s\n", iterator.Index(), dump); 1410 } 1411 } else if (Free* entry = dynamic_cast<Free*>(_entry)) { 1412 off_t first = volume->ToBlock(entry->Run()); 1413 off_t last = first - 1 + entry->Run().Length(); 1414 if (block >= first && block <= last) { 1415 out.Clear(); 1416 const char* dump = out.DumpEntry(entry); 1417 kprintf("%5ld. %s\n", iterator.Index(), dump); 1418 } 1419 } 1420 } 1421 1422 return 0; 1423 } 1424 #endif 1425 1426 1427 int 1428 dump_block_allocator(int argc, char** argv) 1429 { 1430 int32 group = -1; 1431 if (argc == 3) { 1432 group = parse_expression(argv[2]); 1433 argc--; 1434 } 1435 1436 if (argc != 2 || !strcmp(argv[1], "--help")) { 1437 kprintf("usage: %s <ptr-to-volume> [group]\n", argv[0]); 1438 return 0; 1439 } 1440 1441 Volume* volume = (Volume*)parse_expression(argv[1]); 1442 BlockAllocator& allocator = volume->Allocator(); 1443 1444 allocator.Dump(group); 1445 return 0; 1446 } 1447 1448 #endif // BFS_DEBUGGER_COMMANDS 1449 1450