1 /* 2 * Copyright 2001-2017, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 7 //! Block bitmap handling and allocation policies 8 9 10 #include "BlockAllocator.h" 11 12 #include "Debug.h" 13 #include "Inode.h" 14 #include "Volume.h" 15 16 17 // Things the BlockAllocator should do: 18 19 // - find a range of blocks of a certain size nearby a specific position 20 // - allocating an unsharp range of blocks for pre-allocation 21 // - free blocks 22 // - know how to deal with each allocation, special handling for directories, 23 // files, symlinks, etc. (type sensitive allocation policies) 24 25 // What makes the code complicated is the fact that we are not just reading 26 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 27 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 28 // of partitions will grow even more - so that's not an option. 29 // Instead we are reading in every block when it's used - since an allocation 30 // group can span several blocks in the block bitmap, the AllocationBlock 31 // class is there to make handling those easier. 32 33 // The current implementation is only slightly optimized and could probably 34 // be improved a lot. Furthermore, the allocation policies used here should 35 // have some real world tests. 36 37 #if BFS_TRACING && !defined(FS_SHELL) 38 namespace BFSBlockTracing { 39 40 41 class Allocate : public AbstractTraceEntry { 42 public: 43 Allocate(block_run run) 44 : 45 fRun(run) 46 { 47 Initialized(); 48 } 49 50 virtual void AddDump(TraceOutput& out) 51 { 52 out.Print("bfs:alloc %lu.%u.%u", fRun.AllocationGroup(), 53 fRun.Start(), fRun.Length()); 54 } 55 56 const block_run& Run() const { return fRun; } 57 58 private: 59 block_run fRun; 60 }; 61 62 63 class Free : public AbstractTraceEntry { 64 public: 65 Free(block_run run) 66 : 67 fRun(run) 68 { 69 Initialized(); 70 } 71 72 virtual void AddDump(TraceOutput& out) 73 { 74 out.Print("bfs:free %lu.%u.%u", fRun.AllocationGroup(), 75 fRun.Start(), fRun.Length()); 76 } 77 78 const block_run& Run() const { return fRun; } 79 80 private: 81 block_run fRun; 82 }; 83 84 85 static uint32 86 checksum(const uint8* data, size_t size) 87 { 88 const uint32* data4 = (const uint32*)data; 89 uint32 sum = 0; 90 while (size >= 4) { 91 sum += *data4; 92 data4++; 93 size -= 4; 94 } 95 return sum; 96 } 97 98 99 class Block : public AbstractTraceEntry { 100 public: 101 Block(const char* label, off_t blockNumber, const uint8* data, 102 size_t size, uint32 start = 0, uint32 length = 0) 103 : 104 fBlock(blockNumber), 105 fData(data), 106 fStart(start), 107 fLength(length), 108 fLabel(label) 109 { 110 fSum = checksum(data, size); 111 Initialized(); 112 } 113 114 virtual void AddDump(TraceOutput& out) 115 { 116 out.Print("bfs:%s: block %Ld (%p), sum %lu, s/l %lu/%lu", fLabel, 117 fBlock, fData, fSum, fStart, fLength); 118 } 119 120 private: 121 off_t fBlock; 122 const uint8* fData; 123 uint32 fStart; 124 uint32 fLength; 125 uint32 fSum; 126 const char* fLabel; 127 }; 128 129 130 class BlockChange : public AbstractTraceEntry { 131 public: 132 BlockChange(const char* label, int32 block, uint32 oldData, uint32 newData) 133 : 134 fBlock(block), 135 fOldData(oldData), 136 fNewData(newData), 137 fLabel(label) 138 { 139 Initialized(); 140 } 141 142 virtual void AddDump(TraceOutput& out) 143 { 144 out.Print("bfs:%s: block %ld, %08lx -> %08lx", fLabel, 145 fBlock, fOldData, fNewData); 146 } 147 148 private: 149 int32 fBlock; 150 uint32 fOldData; 151 uint32 fNewData; 152 const char* fLabel; 153 }; 154 155 156 } // namespace BFSBlockTracing 157 158 # define T(x) new(std::nothrow) BFSBlockTracing::x; 159 #else 160 # define T(x) ; 161 #endif 162 163 #ifdef DEBUG_ALLOCATION_GROUPS 164 # define CHECK_ALLOCATION_GROUP(group) _CheckGroup(group) 165 #else 166 # define CHECK_ALLOCATION_GROUP(group) ; 167 #endif 168 169 170 class AllocationBlock : public CachedBlock { 171 public: 172 AllocationBlock(Volume* volume); 173 174 inline void Allocate(uint16 start, uint16 numBlocks); 175 inline void Free(uint16 start, uint16 numBlocks); 176 inline bool IsUsed(uint16 block); 177 178 status_t SetTo(AllocationGroup& group, uint16 block); 179 status_t SetToWritable(Transaction& transaction, AllocationGroup& group, 180 uint16 block); 181 182 uint32 NumBlockBits() const { return fNumBits; } 183 uint32& Block(int32 index) { return ((uint32*)fBlock)[index]; } 184 uint8* Block() const { return (uint8*)fBlock; } 185 186 private: 187 uint32 fNumBits; 188 #ifdef DEBUG 189 bool fWritable; 190 #endif 191 }; 192 193 194 class AllocationGroup { 195 public: 196 AllocationGroup(); 197 198 void AddFreeRange(int32 start, int32 blocks); 199 bool IsFull() const { return fFreeBits == 0; } 200 201 status_t Allocate(Transaction& transaction, uint16 start, int32 length); 202 status_t Free(Transaction& transaction, uint16 start, int32 length); 203 204 uint32 NumBits() const { return fNumBits; } 205 uint32 NumBlocks() const { return fNumBlocks; } 206 int32 Start() const { return fStart; } 207 208 private: 209 friend class BlockAllocator; 210 211 uint32 fNumBits; 212 uint32 fNumBlocks; 213 int32 fStart; 214 int32 fFirstFree; 215 int32 fFreeBits; 216 217 int32 fLargestStart; 218 int32 fLargestLength; 219 bool fLargestValid; 220 }; 221 222 223 AllocationBlock::AllocationBlock(Volume* volume) 224 : CachedBlock(volume) 225 { 226 } 227 228 229 status_t 230 AllocationBlock::SetTo(AllocationGroup& group, uint16 block) 231 { 232 // 8 blocks per byte 233 fNumBits = fVolume->BlockSize() << 3; 234 // the last group may have less bits than the others 235 if ((block + 1) * fNumBits > group.NumBits()) 236 fNumBits = group.NumBits() % fNumBits; 237 238 #ifdef DEBUG 239 fWritable = false; 240 #endif 241 return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR; 242 } 243 244 245 status_t 246 AllocationBlock::SetToWritable(Transaction& transaction, AllocationGroup& group, 247 uint16 block) 248 { 249 // 8 blocks per byte 250 fNumBits = fVolume->BlockSize() << 3; 251 // the last group may have less bits in the last block 252 if ((block + 1) * fNumBits > group.NumBits()) 253 fNumBits = group.NumBits() % fNumBits; 254 255 #ifdef DEBUG 256 fWritable = true; 257 #endif 258 return CachedBlock::SetToWritable(transaction, group.Start() + block) 259 != NULL ? B_OK : B_ERROR; 260 } 261 262 263 bool 264 AllocationBlock::IsUsed(uint16 block) 265 { 266 if (block > fNumBits) 267 return true; 268 269 // the block bitmap is accessed in 32-bit blocks 270 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 271 } 272 273 274 void 275 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 276 { 277 ASSERT(start < fNumBits); 278 ASSERT(uint32(start + numBlocks) <= fNumBits); 279 #ifdef DEBUG 280 ASSERT(fWritable); 281 #endif 282 283 T(Block("b-alloc-in", fBlockNumber, fBlock, fVolume->BlockSize(), 284 start, numBlocks)); 285 286 int32 block = start >> 5; 287 288 while (numBlocks > 0) { 289 uint32 mask = 0; 290 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 291 mask |= 1UL << i; 292 293 T(BlockChange("b-alloc", block, Block(block), 294 Block(block) | HOST_ENDIAN_TO_BFS_INT32(mask))); 295 296 #if KDEBUG 297 // check for already set blocks 298 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32*)fBlock)[block]) { 299 FATAL(("AllocationBlock::Allocate(): some blocks are already " 300 "allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 301 panic("blocks already set!"); 302 } 303 #endif 304 305 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 306 start = 0; 307 } 308 T(Block("b-alloc-out", fBlockNumber, fBlock, fVolume->BlockSize(), 309 start, numBlocks)); 310 } 311 312 313 void 314 AllocationBlock::Free(uint16 start, uint16 numBlocks) 315 { 316 ASSERT(start < fNumBits); 317 ASSERT(uint32(start + numBlocks) <= fNumBits); 318 #ifdef DEBUG 319 ASSERT(fWritable); 320 #endif 321 322 int32 block = start >> 5; 323 324 while (numBlocks > 0) { 325 uint32 mask = 0; 326 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 327 mask |= 1UL << (i % 32); 328 329 T(BlockChange("b-free", block, Block(block), 330 Block(block) & HOST_ENDIAN_TO_BFS_INT32(~mask))); 331 332 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 333 start = 0; 334 } 335 } 336 337 338 // #pragma mark - 339 340 341 /*! The allocation groups are created and initialized in 342 BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap() 343 respectively. 344 */ 345 AllocationGroup::AllocationGroup() 346 : 347 fFirstFree(-1), 348 fFreeBits(0), 349 fLargestValid(false) 350 { 351 } 352 353 354 void 355 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 356 { 357 //D(if (blocks > 512) 358 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 359 360 if (fFirstFree == -1) 361 fFirstFree = start; 362 363 if (!fLargestValid || fLargestLength < blocks) { 364 fLargestStart = start; 365 fLargestLength = blocks; 366 fLargestValid = true; 367 } 368 369 fFreeBits += blocks; 370 } 371 372 373 /*! Allocates the specified run in the allocation group. 374 Doesn't check if the run is valid or already allocated partially, nor 375 does it maintain the free ranges hints or the volume's used blocks count. 376 It only does the low-level work of allocating some bits in the block bitmap. 377 Assumes that the block bitmap lock is hold. 378 */ 379 status_t 380 AllocationGroup::Allocate(Transaction& transaction, uint16 start, int32 length) 381 { 382 ASSERT(start + length <= (int32)fNumBits); 383 384 // Update the allocation group info 385 // TODO: this info will be incorrect if something goes wrong later 386 // Note, the fFirstFree block doesn't have to be really free 387 if (start == fFirstFree) 388 fFirstFree = start + length; 389 fFreeBits -= length; 390 391 if (fLargestValid) { 392 bool cut = false; 393 if (fLargestStart == start) { 394 // cut from start 395 fLargestStart += length; 396 fLargestLength -= length; 397 cut = true; 398 } else if (start > fLargestStart 399 && start < fLargestStart + fLargestLength) { 400 // cut from end 401 fLargestLength = start - fLargestStart; 402 cut = true; 403 } 404 if (cut && (fLargestLength < fLargestStart 405 || fLargestLength 406 < (int32)fNumBits - (fLargestStart + fLargestLength))) { 407 // might not be the largest block anymore 408 fLargestValid = false; 409 } 410 } 411 412 Volume* volume = transaction.GetVolume(); 413 414 // calculate block in the block bitmap and position within 415 uint32 bitsPerBlock = volume->BlockSize() << 3; 416 uint32 block = start / bitsPerBlock; 417 start = start % bitsPerBlock; 418 419 AllocationBlock cached(volume); 420 421 while (length > 0) { 422 if (cached.SetToWritable(transaction, *this, block) < B_OK) { 423 fLargestValid = false; 424 RETURN_ERROR(B_IO_ERROR); 425 } 426 427 uint32 numBlocks = length; 428 if (start + numBlocks > cached.NumBlockBits()) 429 numBlocks = cached.NumBlockBits() - start; 430 431 cached.Allocate(start, numBlocks); 432 433 length -= numBlocks; 434 start = 0; 435 block++; 436 } 437 438 return B_OK; 439 } 440 441 442 /*! Frees the specified run in the allocation group. 443 Doesn't check if the run is valid or was not completely allocated, nor 444 does it maintain the free ranges hints or the volume's used blocks count. 445 It only does the low-level work of freeing some bits in the block bitmap. 446 Assumes that the block bitmap lock is hold. 447 */ 448 status_t 449 AllocationGroup::Free(Transaction& transaction, uint16 start, int32 length) 450 { 451 ASSERT(start + length <= (int32)fNumBits); 452 453 // Update the allocation group info 454 // TODO: this info will be incorrect if something goes wrong later 455 if (fFirstFree > start) 456 fFirstFree = start; 457 fFreeBits += length; 458 459 // The range to be freed cannot be part of the valid largest range 460 ASSERT(!fLargestValid || start + length <= fLargestStart 461 || start > fLargestStart); 462 463 if (fLargestValid 464 && (start + length == fLargestStart 465 || fLargestStart + fLargestLength == start 466 || (start < fLargestStart && fLargestStart > fLargestLength) 467 || (start > fLargestStart 468 && (int32)fNumBits - (fLargestStart + fLargestLength) 469 > fLargestLength))) { 470 fLargestValid = false; 471 } 472 473 Volume* volume = transaction.GetVolume(); 474 475 // calculate block in the block bitmap and position within 476 uint32 bitsPerBlock = volume->BlockSize() << 3; 477 uint32 block = start / bitsPerBlock; 478 start = start % bitsPerBlock; 479 480 AllocationBlock cached(volume); 481 482 while (length > 0) { 483 if (cached.SetToWritable(transaction, *this, block) < B_OK) 484 RETURN_ERROR(B_IO_ERROR); 485 486 T(Block("free-1", block, cached.Block(), volume->BlockSize())); 487 uint16 freeLength = length; 488 if (uint32(start + length) > cached.NumBlockBits()) 489 freeLength = cached.NumBlockBits() - start; 490 491 cached.Free(start, freeLength); 492 493 length -= freeLength; 494 start = 0; 495 T(Block("free-2", block, cached.Block(), volume->BlockSize())); 496 block++; 497 } 498 return B_OK; 499 } 500 501 502 // #pragma mark - 503 504 505 BlockAllocator::BlockAllocator(Volume* volume) 506 : 507 fVolume(volume), 508 fGroups(NULL) 509 //fCheckBitmap(NULL), 510 //fCheckCookie(NULL) 511 { 512 recursive_lock_init(&fLock, "bfs allocator"); 513 } 514 515 516 BlockAllocator::~BlockAllocator() 517 { 518 recursive_lock_destroy(&fLock); 519 delete[] fGroups; 520 } 521 522 523 status_t 524 BlockAllocator::Initialize(bool full) 525 { 526 fNumGroups = fVolume->AllocationGroups(); 527 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 528 //fNumBlocks = (fVolume->NumBlocks() + fVolume->BlockSize() * 8 - 1) 529 /// (fVolume->BlockSize() * 8); 530 fNumBlocks = fVolume->NumBitmapBlocks(); 531 532 fGroups = new(std::nothrow) AllocationGroup[fNumGroups]; 533 if (fGroups == NULL) 534 return B_NO_MEMORY; 535 536 if (!full) 537 return B_OK; 538 539 recursive_lock_lock(&fLock); 540 // the lock will be released by the _Initialize() method 541 542 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize, 543 "bfs block allocator", B_LOW_PRIORITY, this); 544 if (id < B_OK) 545 return _Initialize(this); 546 547 recursive_lock_transfer_lock(&fLock, id); 548 549 return resume_thread(id); 550 } 551 552 553 status_t 554 BlockAllocator::InitializeAndClearBitmap(Transaction& transaction) 555 { 556 status_t status = Initialize(false); 557 if (status != B_OK) 558 return status; 559 560 uint32 numBits = 8 * fBlocksPerGroup * fVolume->BlockSize(); 561 uint32 blockShift = fVolume->BlockShift(); 562 563 uint32* buffer = (uint32*)malloc(numBits >> 3); 564 if (buffer == NULL) 565 RETURN_ERROR(B_NO_MEMORY); 566 567 memset(buffer, 0, numBits >> 3); 568 569 off_t offset = 1; 570 // the bitmap starts directly after the superblock 571 572 // initialize the AllocationGroup objects and clear the on-disk bitmap 573 574 for (int32 i = 0; i < fNumGroups; i++) { 575 if (write_pos(fVolume->Device(), offset << blockShift, buffer, 576 fBlocksPerGroup << blockShift) < B_OK) { 577 free(buffer); 578 return B_ERROR; 579 } 580 581 // the last allocation group may contain less blocks than the others 582 if (i == fNumGroups - 1) { 583 fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits; 584 fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) 585 >> (blockShift + 3)); 586 } else { 587 fGroups[i].fNumBits = numBits; 588 fGroups[i].fNumBlocks = fBlocksPerGroup; 589 } 590 fGroups[i].fStart = offset; 591 fGroups[i].fFirstFree = fGroups[i].fLargestStart = 0; 592 fGroups[i].fFreeBits = fGroups[i].fLargestLength = fGroups[i].fNumBits; 593 fGroups[i].fLargestValid = true; 594 595 offset += fBlocksPerGroup; 596 } 597 free(buffer); 598 599 // reserve the boot block, the log area, and the block bitmap itself 600 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 601 602 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 603 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 604 return B_ERROR; 605 } 606 fVolume->SuperBlock().used_blocks 607 = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 608 609 return B_OK; 610 } 611 612 613 status_t 614 BlockAllocator::_Initialize(BlockAllocator* allocator) 615 { 616 // The lock must already be held at this point 617 RecursiveLocker locker(allocator->fLock, true); 618 619 Volume* volume = allocator->fVolume; 620 uint32 blocks = allocator->fBlocksPerGroup; 621 uint32 blockShift = volume->BlockShift(); 622 off_t freeBlocks = 0; 623 624 uint32* buffer = (uint32*)malloc(blocks << blockShift); 625 if (buffer == NULL) 626 RETURN_ERROR(B_NO_MEMORY); 627 628 AllocationGroup* groups = allocator->fGroups; 629 off_t offset = 1; 630 uint32 bitsPerGroup = 8 * (blocks << blockShift); 631 int32 numGroups = allocator->fNumGroups; 632 633 for (int32 i = 0; i < numGroups; i++) { 634 if (read_pos(volume->Device(), offset << blockShift, buffer, 635 blocks << blockShift) < B_OK) 636 break; 637 638 // the last allocation group may contain less blocks than the others 639 if (i == numGroups - 1) { 640 groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup; 641 groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) 642 >> (blockShift + 3)); 643 } else { 644 groups[i].fNumBits = bitsPerGroup; 645 groups[i].fNumBlocks = blocks; 646 } 647 groups[i].fStart = offset; 648 649 // finds all free ranges in this allocation group 650 int32 start = -1, range = 0; 651 int32 numBits = groups[i].fNumBits, bit = 0; 652 int32 count = (numBits + 31) / 32; 653 654 for (int32 k = 0; k < count; k++) { 655 for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) { 656 if (buffer[k] & (1UL << j)) { 657 // block is in use 658 if (range > 0) { 659 groups[i].AddFreeRange(start, range); 660 range = 0; 661 } 662 } else if (range++ == 0) { 663 // block is free, start new free range 664 start = bit; 665 } 666 } 667 } 668 if (range) 669 groups[i].AddFreeRange(start, range); 670 671 freeBlocks += groups[i].fFreeBits; 672 673 offset += blocks; 674 } 675 free(buffer); 676 677 // check if block bitmap and log area are reserved 678 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 679 680 if (allocator->CheckBlocks(0, reservedBlocks) != B_OK) { 681 if (volume->IsReadOnly()) { 682 FATAL(("Space for block bitmap or log area is not reserved " 683 "(volume is mounted read-only)!\n")); 684 } else { 685 Transaction transaction(volume, 0); 686 if (groups[0].Allocate(transaction, 0, reservedBlocks) != B_OK) { 687 FATAL(("Could not allocate reserved space for block " 688 "bitmap/log!\n")); 689 volume->Panic(); 690 } else { 691 transaction.Done(); 692 FATAL(("Space for block bitmap or log area was not " 693 "reserved!\n")); 694 } 695 } 696 } 697 698 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 699 if (volume->UsedBlocks() != usedBlocks) { 700 // If the disk in a dirty state at mount time, it's 701 // normal that the values don't match 702 INFORM(("volume reports %" B_PRIdOFF " used blocks, correct is %" 703 B_PRIdOFF "\n", volume->UsedBlocks(), usedBlocks)); 704 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 705 } 706 707 return B_OK; 708 } 709 710 711 void 712 BlockAllocator::Uninitialize() 713 { 714 // We only have to make sure that the initializer thread isn't running 715 // anymore. 716 recursive_lock_lock(&fLock); 717 } 718 719 720 /*! Tries to allocate between \a minimum, and \a maximum blocks starting 721 at group \a groupIndex with offset \a start. The resulting allocation 722 is put into \a run. 723 724 The number of allocated blocks is always a multiple of \a minimum which 725 has to be a power of two value. 726 */ 727 status_t 728 BlockAllocator::AllocateBlocks(Transaction& transaction, int32 groupIndex, 729 uint16 start, uint16 maximum, uint16 minimum, block_run& run) 730 { 731 if (maximum == 0) 732 return B_BAD_VALUE; 733 734 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", 735 groupIndex, start, maximum, minimum)); 736 737 AllocationBlock cached(fVolume); 738 RecursiveLocker lock(fLock); 739 740 uint32 bitsPerFullBlock = fVolume->BlockSize() << 3; 741 742 // Find the block_run that can fulfill the request best 743 int32 bestGroup = -1; 744 int32 bestStart = -1; 745 int32 bestLength = -1; 746 747 for (int32 i = 0; i < fNumGroups + 1; i++, groupIndex++, start = 0) { 748 groupIndex = groupIndex % fNumGroups; 749 AllocationGroup& group = fGroups[groupIndex]; 750 751 CHECK_ALLOCATION_GROUP(groupIndex); 752 753 if (start >= group.NumBits() || group.IsFull()) 754 continue; 755 756 // The wanted maximum is smaller than the largest free block in the 757 // group or already smaller than the minimum 758 759 if (start < group.fFirstFree) 760 start = group.fFirstFree; 761 762 if (group.fLargestValid) { 763 if (group.fLargestLength < bestLength) 764 continue; 765 766 if (group.fLargestStart >= start) { 767 if (group.fLargestLength >= bestLength) { 768 bestGroup = groupIndex; 769 bestStart = group.fLargestStart; 770 bestLength = group.fLargestLength; 771 772 if (bestLength >= maximum) 773 break; 774 } 775 776 // We know everything about this group we have to, let's skip 777 // to the next 778 continue; 779 } 780 } 781 782 // There may be more than one block per allocation group - and 783 // we iterate through it to find a place for the allocation. 784 // (one allocation can't exceed one allocation group) 785 786 uint32 block = start / (fVolume->BlockSize() << 3); 787 int32 currentStart = 0, currentLength = 0; 788 int32 groupLargestStart = -1; 789 int32 groupLargestLength = -1; 790 int32 currentBit = start; 791 bool canFindGroupLargest = start == 0; 792 793 for (; block < group.NumBlocks(); block++) { 794 if (cached.SetTo(group, block) < B_OK) 795 RETURN_ERROR(B_ERROR); 796 797 T(Block("alloc-in", group.Start() + block, cached.Block(), 798 fVolume->BlockSize(), groupIndex, currentStart)); 799 800 // find a block large enough to hold the allocation 801 for (uint32 bit = start % bitsPerFullBlock; 802 bit < cached.NumBlockBits(); bit++) { 803 if (!cached.IsUsed(bit)) { 804 if (currentLength == 0) { 805 // start new range 806 currentStart = currentBit; 807 } 808 809 // have we found a range large enough to hold numBlocks? 810 if (++currentLength >= maximum) { 811 bestGroup = groupIndex; 812 bestStart = currentStart; 813 bestLength = currentLength; 814 break; 815 } 816 } else { 817 if (currentLength) { 818 // end of a range 819 if (currentLength > bestLength) { 820 bestGroup = groupIndex; 821 bestStart = currentStart; 822 bestLength = currentLength; 823 } 824 if (currentLength > groupLargestLength) { 825 groupLargestStart = currentStart; 826 groupLargestLength = currentLength; 827 } 828 currentLength = 0; 829 } 830 if ((int32)group.NumBits() - currentBit 831 <= groupLargestLength) { 832 // We can't find a bigger block in this group anymore, 833 // let's skip the rest. 834 block = group.NumBlocks(); 835 break; 836 } 837 } 838 currentBit++; 839 } 840 841 T(Block("alloc-out", block, cached.Block(), 842 fVolume->BlockSize(), groupIndex, currentStart)); 843 844 if (bestLength >= maximum) { 845 canFindGroupLargest = false; 846 break; 847 } 848 849 // start from the beginning of the next block 850 start = 0; 851 } 852 853 if (currentBit == (int32)group.NumBits()) { 854 if (currentLength > bestLength) { 855 bestGroup = groupIndex; 856 bestStart = currentStart; 857 bestLength = currentLength; 858 } 859 if (canFindGroupLargest && currentLength > groupLargestLength) { 860 groupLargestStart = currentStart; 861 groupLargestLength = currentLength; 862 } 863 } 864 865 if (canFindGroupLargest && !group.fLargestValid 866 && groupLargestLength >= 0) { 867 group.fLargestStart = groupLargestStart; 868 group.fLargestLength = groupLargestLength; 869 group.fLargestValid = true; 870 } 871 872 if (bestLength >= maximum) 873 break; 874 } 875 876 // If we found a suitable range, mark the blocks as in use, and 877 // write the updated block bitmap back to disk 878 if (bestLength < minimum) 879 return B_DEVICE_FULL; 880 881 if (bestLength > maximum) 882 bestLength = maximum; 883 else if (minimum > 1) { 884 // make sure bestLength is a multiple of minimum 885 bestLength = round_down(bestLength, minimum); 886 } 887 888 if (fGroups[bestGroup].Allocate(transaction, bestStart, bestLength) != B_OK) 889 RETURN_ERROR(B_IO_ERROR); 890 891 CHECK_ALLOCATION_GROUP(bestGroup); 892 893 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(bestGroup); 894 run.start = HOST_ENDIAN_TO_BFS_INT16(bestStart); 895 run.length = HOST_ENDIAN_TO_BFS_INT16(bestLength); 896 897 fVolume->SuperBlock().used_blocks 898 = HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + bestLength); 899 // We are not writing back the disk's superblock - it's 900 // either done by the journaling code, or when the disk 901 // is unmounted. 902 // If the value is not correct at mount time, it will be 903 // fixed anyway. 904 905 // We need to flush any remaining blocks in the new allocation to make sure 906 // they won't interfere with the file cache. 907 block_cache_discard(fVolume->BlockCache(), fVolume->ToBlock(run), 908 run.Length()); 909 910 T(Allocate(run)); 911 return B_OK; 912 } 913 914 915 status_t 916 BlockAllocator::AllocateForInode(Transaction& transaction, 917 const block_run* parent, mode_t type, block_run& run) 918 { 919 // Apply some allocation policies here (AllocateBlocks() will break them 920 // if necessary) - we will start with those described in Dominic Giampaolo's 921 // "Practical File System Design", and see how good they work 922 923 // Files are going in the same allocation group as its parent, 924 // sub-directories will be inserted 8 allocation groups after 925 // the one of the parent 926 uint16 group = parent->AllocationGroup(); 927 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 928 group += 8; 929 930 return AllocateBlocks(transaction, group, 0, 1, 1, run); 931 } 932 933 934 status_t 935 BlockAllocator::Allocate(Transaction& transaction, Inode* inode, 936 off_t numBlocks, block_run& run, uint16 minimum) 937 { 938 if (numBlocks <= 0) 939 return B_ERROR; 940 941 // one block_run can't hold more data than there is in one allocation group 942 if (numBlocks > fGroups[0].NumBits()) 943 numBlocks = fGroups[0].NumBits(); 944 945 // since block_run.length is uint16, the largest number of blocks that 946 // can be covered by a block_run is 65535 947 // TODO: if we drop compatibility, couldn't we do this any better? 948 // There are basically two possibilities: 949 // a) since a length of zero doesn't have any sense, take that for 65536 - 950 // but that could cause many problems (bugs) in other areas 951 // b) reduce the maximum amount of blocks per block_run, so that the 952 // remaining number of free blocks can be used in a useful manner 953 // (like 4 blocks) - but that would also reduce the maximum file size 954 // c) have BlockRun::Length() return (length + 1). 955 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 956 numBlocks = MAX_BLOCK_RUN_LENGTH; 957 958 // Apply some allocation policies here (AllocateBlocks() will break them 959 // if necessary) 960 uint16 group = inode->BlockRun().AllocationGroup(); 961 uint16 start = 0; 962 963 // Are there already allocated blocks? (then just try to allocate near the 964 // last one) 965 if (inode->Size() > 0) { 966 const data_stream& data = inode->Node().data; 967 // TODO: we currently don't care for when the data stream 968 // is already grown into the indirect ranges 969 if (data.max_double_indirect_range == 0 970 && data.max_indirect_range == 0) { 971 // Since size > 0, there must be a valid block run in this stream 972 int32 last = 0; 973 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 974 if (data.direct[last + 1].IsZero()) 975 break; 976 977 group = data.direct[last].AllocationGroup(); 978 start = data.direct[last].Start() + data.direct[last].Length(); 979 } 980 } else if (inode->IsContainer() || inode->IsSymLink()) { 981 // directory and symbolic link data will go in the same allocation 982 // group as the inode is in but after the inode data 983 start = inode->BlockRun().Start(); 984 } else { 985 // file data will start in the next allocation group 986 group = inode->BlockRun().AllocationGroup() + 1; 987 } 988 989 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 990 } 991 992 993 status_t 994 BlockAllocator::Free(Transaction& transaction, block_run run) 995 { 996 RecursiveLocker lock(fLock); 997 998 int32 group = run.AllocationGroup(); 999 uint16 start = run.Start(); 1000 uint16 length = run.Length(); 1001 1002 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, 1003 length)); 1004 T(Free(run)); 1005 1006 // doesn't use Volume::IsValidBlockRun() here because it can check better 1007 // against the group size (the last group may have a different length) 1008 if (group < 0 || group >= fNumGroups 1009 || start > fGroups[group].NumBits() 1010 || uint32(start + length) > fGroups[group].NumBits() 1011 || length == 0) { 1012 FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group, 1013 start, length)); 1014 DEBUGGER(("tried to free invalid block_run")); 1015 return B_BAD_VALUE; 1016 } 1017 // check if someone tries to free reserved areas at the beginning of the 1018 // drive 1019 if (group == 0 1020 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 1021 FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group, 1022 start, length)); 1023 DEBUGGER(("tried to free reserved block")); 1024 return B_BAD_VALUE; 1025 } 1026 #ifdef DEBUG 1027 if (CheckBlockRun(run) != B_OK) 1028 return B_BAD_DATA; 1029 #endif 1030 1031 CHECK_ALLOCATION_GROUP(group); 1032 1033 if (fGroups[group].Free(transaction, start, length) != B_OK) 1034 RETURN_ERROR(B_IO_ERROR); 1035 1036 CHECK_ALLOCATION_GROUP(group); 1037 1038 #ifdef DEBUG 1039 if (CheckBlockRun(run, NULL, false) != B_OK) { 1040 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just " 1041 "freed)\n")); 1042 } 1043 #endif 1044 1045 fVolume->SuperBlock().used_blocks = 1046 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 1047 return B_OK; 1048 } 1049 1050 1051 #ifdef DEBUG_FRAGMENTER 1052 void 1053 BlockAllocator::Fragment() 1054 { 1055 AllocationBlock cached(fVolume); 1056 RecursiveLocker lock(fLock); 1057 1058 // only leave 4 block holes 1059 static const uint32 kMask = 0x0f0f0f0f; 1060 uint32 valuesPerBlock = fVolume->BlockSize() / 4; 1061 1062 for (int32 i = 0; i < fNumGroups; i++) { 1063 AllocationGroup& group = fGroups[i]; 1064 1065 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1066 Transaction transaction(fVolume, 0); 1067 1068 if (cached.SetToWritable(transaction, group, block) != B_OK) 1069 return; 1070 1071 for (int32 index = 0; index < valuesPerBlock; index++) { 1072 cached.Block(index) |= HOST_ENDIAN_TO_BFS_INT32(kMask); 1073 } 1074 1075 transaction.Done(); 1076 } 1077 } 1078 } 1079 #endif // DEBUG_FRAGMENTER 1080 1081 1082 #ifdef DEBUG_ALLOCATION_GROUPS 1083 void 1084 BlockAllocator::_CheckGroup(int32 groupIndex) const 1085 { 1086 AllocationBlock cached(fVolume); 1087 ASSERT_LOCKED_RECURSIVE(&fLock); 1088 1089 AllocationGroup& group = fGroups[groupIndex]; 1090 1091 int32 currentStart = 0, currentLength = 0; 1092 int32 firstFree = -1; 1093 int32 largestStart = -1; 1094 int32 largestLength = 0; 1095 int32 currentBit = 0; 1096 1097 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1098 if (cached.SetTo(group, block) < B_OK) { 1099 panic("setting group block %d failed\n", (int)block); 1100 return; 1101 } 1102 1103 for (uint32 bit = 0; bit < cached.NumBlockBits(); bit++) { 1104 if (!cached.IsUsed(bit)) { 1105 if (firstFree < 0) { 1106 firstFree = currentBit; 1107 if (!group.fLargestValid) { 1108 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1109 // mostly harmless but noteworthy 1110 dprintf("group %d first free too late: should be " 1111 "%d, is %d\n", (int)groupIndex, (int)firstFree, 1112 (int)group.fFirstFree); 1113 } 1114 return; 1115 } 1116 } 1117 1118 if (currentLength == 0) { 1119 // start new range 1120 currentStart = currentBit; 1121 } 1122 currentLength++; 1123 } else if (currentLength) { 1124 // end of a range 1125 if (currentLength > largestLength) { 1126 largestStart = currentStart; 1127 largestLength = currentLength; 1128 } 1129 currentLength = 0; 1130 } 1131 currentBit++; 1132 } 1133 } 1134 1135 if (currentLength > largestLength) { 1136 largestStart = currentStart; 1137 largestLength = currentLength; 1138 } 1139 1140 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1141 // mostly harmless but noteworthy 1142 dprintf("group %d first free too late: should be %d, is %d\n", 1143 (int)groupIndex, (int)firstFree, (int)group.fFirstFree); 1144 } 1145 if (group.fLargestValid 1146 && (largestStart != group.fLargestStart 1147 || largestLength != group.fLargestLength)) { 1148 panic("bfs %p: group %d largest differs: %d.%d, checked %d.%d.\n", 1149 fVolume, (int)groupIndex, (int)group.fLargestStart, 1150 (int)group.fLargestLength, (int)largestStart, (int)largestLength); 1151 } 1152 } 1153 #endif // DEBUG_ALLOCATION_GROUPS 1154 1155 1156 status_t 1157 BlockAllocator::Trim(uint64 offset, uint64 size, uint64& trimmedSize) 1158 { 1159 const uint32 kTrimRanges = 128; 1160 fs_trim_data* trimData = (fs_trim_data*)malloc(sizeof(fs_trim_data) 1161 + sizeof(uint64) * kTrimRanges); 1162 if (trimData == NULL) 1163 return B_NO_MEMORY; 1164 1165 MemoryDeleter deleter(trimData); 1166 RecursiveLocker locker(fLock); 1167 1168 // TODO: take given offset and size into account! 1169 int32 lastGroup = fNumGroups - 1; 1170 uint32 firstBlock = 0; 1171 uint32 firstBit = 0; 1172 uint64 currentBlock = 0; 1173 uint32 blockShift = fVolume->BlockShift(); 1174 1175 uint64 firstFree = 0; 1176 size_t freeLength = 0; 1177 1178 trimData->range_count = 0; 1179 trimmedSize = 0; 1180 1181 AllocationBlock cached(fVolume); 1182 for (int32 groupIndex = 0; groupIndex <= lastGroup; groupIndex++) { 1183 AllocationGroup& group = fGroups[groupIndex]; 1184 1185 for (uint32 block = firstBlock; block < group.NumBlocks(); block++) { 1186 cached.SetTo(group, block); 1187 1188 for (uint32 i = firstBit; i < cached.NumBlockBits(); i++) { 1189 if (cached.IsUsed(i)) { 1190 // Block is in use 1191 if (freeLength > 0) { 1192 status_t status = _TrimNext(*trimData, kTrimRanges, 1193 firstFree << blockShift, freeLength << blockShift, 1194 false, trimmedSize); 1195 if (status != B_OK) 1196 return status; 1197 1198 freeLength = 0; 1199 } 1200 } else if (freeLength++ == 0) { 1201 // Block is free, start new free range 1202 firstFree = currentBlock; 1203 } 1204 1205 currentBlock++; 1206 } 1207 } 1208 1209 firstBlock = 0; 1210 firstBit = 0; 1211 } 1212 1213 return _TrimNext(*trimData, kTrimRanges, firstFree << blockShift, 1214 freeLength << blockShift, true, trimmedSize); 1215 } 1216 1217 1218 // #pragma mark - 1219 1220 1221 /*! Checks whether or not the specified block range is allocated or not, 1222 depending on the \a allocated argument. 1223 */ 1224 status_t 1225 BlockAllocator::CheckBlocks(off_t start, off_t length, bool allocated, 1226 off_t* firstError) 1227 { 1228 if (start < 0 || start + length > fVolume->NumBlocks()) 1229 return B_BAD_VALUE; 1230 1231 off_t block = start; 1232 1233 int32 group = start >> fVolume->AllocationGroupShift(); 1234 uint32 bitmapBlock = start / (fVolume->BlockSize() << 3); 1235 uint32 blockOffset = start % (fVolume->BlockSize() << 3); 1236 1237 uint32 groupBlock = bitmapBlock % fBlocksPerGroup; 1238 1239 AllocationBlock cached(fVolume); 1240 1241 while (groupBlock < fGroups[group].NumBlocks() && length > 0) { 1242 if (cached.SetTo(fGroups[group], groupBlock) != B_OK) 1243 RETURN_ERROR(B_IO_ERROR); 1244 1245 for (; blockOffset < cached.NumBlockBits() && length > 0; 1246 blockOffset++, length--, block++) { 1247 if (cached.IsUsed(blockOffset) != allocated) { 1248 PRINT(("CheckBlocks: Erroneous block (group = %ld, " 1249 "groupBlock = %ld, blockOffset = %ld)!\n", 1250 group, groupBlock, blockOffset)); 1251 1252 if (firstError) 1253 *firstError = block; 1254 1255 return B_BAD_DATA; 1256 } 1257 } 1258 1259 blockOffset = 0; 1260 1261 if (++groupBlock >= fGroups[group].NumBlocks()) { 1262 groupBlock = 0; 1263 group++; 1264 } 1265 } 1266 1267 return B_OK; 1268 } 1269 1270 1271 bool 1272 BlockAllocator::IsValidBlockRun(block_run run) 1273 { 1274 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1275 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1276 || uint32(run.Start() + run.Length()) 1277 > fGroups[run.AllocationGroup()].fNumBits 1278 || run.length == 0) { 1279 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, 1280 run.AllocationGroup(), run.Start(), run.Length())); 1281 return false; 1282 } 1283 return true; 1284 } 1285 1286 1287 status_t 1288 BlockAllocator::CheckBlockRun(block_run run, const char* type, bool allocated) 1289 { 1290 if (!IsValidBlockRun(run)) 1291 return B_BAD_DATA; 1292 1293 status_t status = CheckBlocks(fVolume->ToBlock(run), run.Length(), 1294 allocated); 1295 if (status != B_OK) { 1296 PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated!\n", 1297 type, run.AllocationGroup(), run.Start(), run.Length())); 1298 } 1299 1300 return status; 1301 } 1302 1303 1304 status_t 1305 BlockAllocator::_AddTrim(fs_trim_data& trimData, uint32 maxRanges, 1306 uint64 offset, uint64 size) 1307 { 1308 if (trimData.range_count < maxRanges && size > 0) { 1309 trimData.ranges[trimData.range_count].offset = offset; 1310 trimData.ranges[trimData.range_count].size = size; 1311 trimData.range_count++; 1312 return true; 1313 } 1314 1315 return false; 1316 } 1317 1318 1319 status_t 1320 BlockAllocator::_TrimNext(fs_trim_data& trimData, uint32 maxRanges, 1321 uint64 offset, uint64 size, bool force, uint64& trimmedSize) 1322 { 1323 PRINT(("_TrimNext(index %" B_PRIu32 ", offset %" B_PRIu64 ", size %" 1324 B_PRIu64 ")\n", trimData.range_count, offset, size)); 1325 1326 bool pushed = _AddTrim(trimData, maxRanges, offset, size); 1327 1328 if (!pushed || force) { 1329 // Trim now 1330 trimData.trimmed_size = 0; 1331 dprintf("TRIM FS:\n"); 1332 for (uint32 i = 0; i < trimData.range_count; i++) { 1333 dprintf("[%3" B_PRIu32 "] %" B_PRIu64 " : %" B_PRIu64 "\n", i, 1334 trimData.ranges[i].offset, trimData.ranges[i].size); 1335 } 1336 if (ioctl(fVolume->Device(), B_TRIM_DEVICE, &trimData, 1337 sizeof(fs_trim_data)) != 0) { 1338 return errno; 1339 } 1340 1341 trimmedSize += trimData.trimmed_size; 1342 trimData.range_count = 0; 1343 } 1344 1345 if (!pushed) 1346 _AddTrim(trimData, maxRanges, offset, size); 1347 1348 return B_OK; 1349 } 1350 1351 1352 // #pragma mark - debugger commands 1353 1354 1355 #ifdef BFS_DEBUGGER_COMMANDS 1356 1357 void 1358 BlockAllocator::Dump(int32 index) 1359 { 1360 kprintf("allocation groups: %" B_PRId32 " (base %p)\n", fNumGroups, fGroups); 1361 kprintf("blocks per group: %" B_PRId32 "\n", fBlocksPerGroup); 1362 1363 for (int32 i = 0; i < fNumGroups; i++) { 1364 if (index != -1 && i != index) 1365 continue; 1366 1367 AllocationGroup& group = fGroups[i]; 1368 1369 kprintf("[%3" B_PRId32 "] num bits: %" B_PRIu32 " (%p)\n", i, 1370 group.NumBits(), &group); 1371 kprintf(" num blocks: %" B_PRIu32 "\n", group.NumBlocks()); 1372 kprintf(" start: %" B_PRId32 "\n", group.Start()); 1373 kprintf(" first free: %" B_PRId32 "\n", group.fFirstFree); 1374 kprintf(" largest start: %" B_PRId32 "%s\n", group.fLargestStart, 1375 group.fLargestValid ? "" : " (invalid)"); 1376 kprintf(" largest length: %" B_PRId32 "\n", group.fLargestLength); 1377 kprintf(" free bits: %" B_PRId32 "\n", group.fFreeBits); 1378 } 1379 } 1380 1381 1382 #if BFS_TRACING 1383 static char kTraceBuffer[256]; 1384 1385 1386 int 1387 dump_block_allocator_blocks(int argc, char** argv) 1388 { 1389 if (argc != 3 || !strcmp(argv[1], "--help")) { 1390 kprintf("usage: %s <ptr-to-volume> <block>\n", argv[0]); 1391 return 0; 1392 } 1393 1394 Volume* volume = (Volume*)parse_expression(argv[1]); 1395 off_t block = parse_expression(argv[2]); 1396 1397 // iterate over all tracing entries to find overlapping actions 1398 1399 using namespace BFSBlockTracing; 1400 1401 LazyTraceOutput out(kTraceBuffer, sizeof(kTraceBuffer), 0); 1402 TraceEntryIterator iterator; 1403 while (TraceEntry* _entry = iterator.Next()) { 1404 if (Allocate* entry = dynamic_cast<Allocate*>(_entry)) { 1405 off_t first = volume->ToBlock(entry->Run()); 1406 off_t last = first - 1 + entry->Run().Length(); 1407 if (block >= first && block <= last) { 1408 out.Clear(); 1409 const char* dump = out.DumpEntry(entry); 1410 kprintf("%5ld. %s\n", iterator.Index(), dump); 1411 } 1412 } else if (Free* entry = dynamic_cast<Free*>(_entry)) { 1413 off_t first = volume->ToBlock(entry->Run()); 1414 off_t last = first - 1 + entry->Run().Length(); 1415 if (block >= first && block <= last) { 1416 out.Clear(); 1417 const char* dump = out.DumpEntry(entry); 1418 kprintf("%5ld. %s\n", iterator.Index(), dump); 1419 } 1420 } 1421 } 1422 1423 return 0; 1424 } 1425 #endif 1426 1427 1428 int 1429 dump_block_allocator(int argc, char** argv) 1430 { 1431 int32 group = -1; 1432 if (argc == 3) { 1433 group = parse_expression(argv[2]); 1434 argc--; 1435 } 1436 1437 if (argc != 2 || !strcmp(argv[1], "--help")) { 1438 kprintf("usage: %s <ptr-to-volume> [group]\n", argv[0]); 1439 return 0; 1440 } 1441 1442 Volume* volume = (Volume*)parse_expression(argv[1]); 1443 BlockAllocator& allocator = volume->Allocator(); 1444 1445 allocator.Dump(group); 1446 return 0; 1447 } 1448 1449 #endif // BFS_DEBUGGER_COMMANDS 1450 1451