1 /* BlockAllocator - block bitmap handling and allocation policies 2 ** 3 ** Initial version by Axel Dörfler, axeld@pinc-software.de 4 ** This file may be used under the terms of the OpenBeOS License. 5 */ 6 7 8 #include "Debug.h" 9 #include "BlockAllocator.h" 10 #include "Volume.h" 11 #include "Inode.h" 12 #include "BPlusTree.h" 13 #include "Stack.h" 14 #include "bfs_control.h" 15 16 #include <util/kernel_cpp.h> 17 #include <stdlib.h> 18 19 #ifdef USER 20 # define spawn_kernel_thread spawn_thread 21 #endif 22 23 24 // Things the BlockAllocator should do: 25 26 // - find a range of blocks of a certain size nearby a specific position 27 // - allocating an unsharp range of blocks for pre-allocation 28 // - free blocks 29 // - know how to deal with each allocation, special handling for directories, 30 // files, symlinks, etc. (type sensitive allocation policies) 31 32 // What makes the code complicated is the fact that we are not just reading 33 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 34 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 35 // of partitions will grow even more - so that's not an option. 36 // Instead we are reading in every block when it's used - since an allocation 37 // group can span several blocks in the block bitmap, the AllocationBlock 38 // class is there to make handling those easier. 39 40 // The current implementation is only slightly optimized and could probably 41 // be improved a lot. Furthermore, the allocation policies used here should 42 // have some real world tests. 43 44 45 struct check_cookie { 46 check_cookie() {} 47 48 block_run current; 49 Inode *parent; 50 mode_t parent_mode; 51 Stack<block_run> stack; 52 TreeIterator *iterator; 53 }; 54 55 56 class AllocationBlock : public CachedBlock { 57 public: 58 AllocationBlock(Volume *volume); 59 60 void Allocate(uint16 start, uint16 numBlocks); 61 void Free(uint16 start, uint16 numBlocks); 62 inline bool IsUsed(uint16 block); 63 64 status_t SetTo(AllocationGroup &group, uint16 block); 65 66 uint32 NumBlockBits() const { return fNumBits; } 67 uint32 &Block(int32 index) { return ((uint32 *)fBlock)[index]; } 68 69 private: 70 uint32 fNumBits; 71 }; 72 73 74 class AllocationGroup { 75 public: 76 AllocationGroup(); 77 78 void AddFreeRange(int32 start, int32 blocks); 79 bool IsFull() const { return fFreeBits == 0; } 80 81 status_t Allocate(Transaction *transaction, uint16 start, int32 length); 82 status_t Free(Transaction *transaction, uint16 start, int32 length); 83 84 uint32 fNumBits; 85 int32 fStart; 86 int32 fFirstFree, fLargest, fLargestFirst; 87 // ToDo: fLargest & fLargestFirst are not maintained (and therefore used) yet! 88 int32 fFreeBits; 89 }; 90 91 92 AllocationBlock::AllocationBlock(Volume *volume) 93 : CachedBlock(volume) 94 { 95 } 96 97 98 status_t 99 AllocationBlock::SetTo(AllocationGroup &group, uint16 block) 100 { 101 // 8 blocks per byte 102 fNumBits = fVolume->BlockSize() << 3; 103 // the last group may have less bits in the last block 104 if ((block + 1) * fNumBits > group.fNumBits) 105 fNumBits = group.fNumBits % fNumBits; 106 107 return CachedBlock::SetTo(group.fStart + block) != NULL ? B_OK : B_ERROR; 108 } 109 110 111 bool 112 AllocationBlock::IsUsed(uint16 block) 113 { 114 if (block > fNumBits) 115 return true; 116 // the block bitmap is accessed in 32-bit blocks 117 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 118 } 119 120 121 void 122 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 123 { 124 ASSERT(start < fNumBits); 125 ASSERT(uint32(start + numBlocks) <= fNumBits); 126 127 if (uint32(start + numBlocks) > fNumBits) { 128 FATAL(("Allocation::Allocate(): tried to allocate too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 129 DIE(("Allocation::Allocate(): tried to allocate too many blocks")); 130 } 131 132 int32 block = start >> 5; 133 134 while (numBlocks > 0) { 135 uint32 mask = 0; 136 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 137 mask |= 1UL << (i % 32); 138 139 #ifdef DEBUG 140 // check for already set blocks 141 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32 *)fBlock)[block]) { 142 FATAL(("AllocationBlock::Allocate(): some blocks are already allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 143 DEBUGGER(("blocks already set!")); 144 } 145 #endif 146 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 147 start = 0; 148 } 149 } 150 151 152 void 153 AllocationBlock::Free(uint16 start, uint16 numBlocks) 154 { 155 ASSERT(start < fNumBits); 156 ASSERT(uint32(start + numBlocks) <= fNumBits); 157 158 if (uint32(start + numBlocks) > fNumBits) { 159 FATAL(("Allocation::Free(): tried to free too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 160 DIE(("Allocation::Free(): tried to free too many blocks")); 161 } 162 163 int32 block = start >> 5; 164 165 while (numBlocks > 0) { 166 uint32 mask = 0; 167 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 168 mask |= 1UL << (i % 32); 169 170 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 171 start = 0; 172 } 173 } 174 175 176 // #pragma mark - 177 178 179 AllocationGroup::AllocationGroup() 180 : 181 fFirstFree(-1), 182 fLargest(-1), 183 fLargestFirst(-1), 184 fFreeBits(0) 185 { 186 } 187 188 189 void 190 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 191 { 192 //D(if (blocks > 512) 193 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 194 195 if (fFirstFree == -1) 196 fFirstFree = start; 197 198 if (fLargest < blocks) { 199 fLargest = blocks; 200 fLargestFirst = start; 201 } 202 203 fFreeBits += blocks; 204 } 205 206 207 /** Allocates the specified run in the allocation group. 208 * Doesn't check if the run is valid or already allocated 209 * partially, nor does it maintain the free ranges hints 210 * or the volume's used blocks count. 211 * It only does the low-level work of allocating some bits 212 * in the block bitmap. 213 * Assumes that the block bitmap lock is hold. 214 */ 215 216 status_t 217 AllocationGroup::Allocate(Transaction *transaction, uint16 start, int32 length) 218 { 219 if (start > fNumBits) 220 return B_ERROR; 221 222 // Update the allocation group info 223 // ToDo: this info will be incorrect if something goes wrong later 224 // Note, the fFirstFree block doesn't have to be really free 225 if (start == fFirstFree) 226 fFirstFree = start + length; 227 fFreeBits -= length; 228 229 Volume *volume = transaction->GetVolume(); 230 231 // calculate block in the block bitmap and position within 232 uint32 bitsPerBlock = volume->BlockSize() << 3; 233 uint32 block = start / bitsPerBlock; 234 start = start % bitsPerBlock; 235 236 AllocationBlock cached(volume); 237 238 while (length > 0) { 239 if (cached.SetTo(*this, block) < B_OK) 240 RETURN_ERROR(B_IO_ERROR); 241 242 uint32 numBlocks = length; 243 if (start + numBlocks > cached.NumBlockBits()) 244 numBlocks = cached.NumBlockBits() - start; 245 246 cached.Allocate(start, numBlocks); 247 248 if (cached.WriteBack(transaction) < B_OK) 249 return B_IO_ERROR; 250 251 length -= numBlocks; 252 start = 0; 253 block++; 254 } 255 256 return B_OK; 257 } 258 259 260 /** Frees the specified run in the allocation group. 261 * Doesn't check if the run is valid or was not completely 262 * allocated, nor does it maintain the free ranges hints 263 * or the volume's used blocks count. 264 * It only does the low-level work of freeing some bits 265 * in the block bitmap. 266 * Assumes that the block bitmap lock is hold. 267 */ 268 269 status_t 270 AllocationGroup::Free(Transaction *transaction, uint16 start, int32 length) 271 { 272 if (start > fNumBits) 273 return B_ERROR; 274 275 // Update the allocation group info 276 // ToDo: this info will be incorrect if something goes wrong later 277 if (fFirstFree > start) 278 fFirstFree = start; 279 fFreeBits += length; 280 281 Volume *volume = transaction->GetVolume(); 282 283 // calculate block in the block bitmap and position within 284 uint32 bitsPerBlock = volume->BlockSize() << 3; 285 uint32 block = start / bitsPerBlock; 286 start = start % bitsPerBlock; 287 288 AllocationBlock cached(volume); 289 290 while (length > 0) { 291 if (cached.SetTo(*this, block) < B_OK) 292 RETURN_ERROR(B_IO_ERROR); 293 294 uint16 freeLength = length; 295 if (uint32(start + length) > cached.NumBlockBits()) 296 freeLength = cached.NumBlockBits() - start; 297 298 cached.Free(start, freeLength); 299 300 if (cached.WriteBack(transaction) < B_OK) 301 return B_IO_ERROR; 302 303 length -= freeLength; 304 start = 0; 305 block++; 306 } 307 return B_OK; 308 } 309 310 311 // #pragma mark - 312 313 314 BlockAllocator::BlockAllocator(Volume *volume) 315 : 316 fVolume(volume), 317 fLock("bfs allocator"), 318 fGroups(NULL), 319 fCheckBitmap(NULL) 320 { 321 } 322 323 324 BlockAllocator::~BlockAllocator() 325 { 326 delete[] fGroups; 327 } 328 329 330 status_t 331 BlockAllocator::Initialize(bool full) 332 { 333 if (fLock.InitCheck() < B_OK) 334 return B_ERROR; 335 336 fNumGroups = fVolume->AllocationGroups(); 337 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 338 fGroups = new AllocationGroup[fNumGroups]; 339 if (fGroups == NULL) 340 return B_NO_MEMORY; 341 342 if (!full) 343 return B_OK; 344 345 fLock.Lock(); 346 // the lock will be released by the initialize() function 347 348 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::initialize, 349 "bfs block allocator", B_LOW_PRIORITY, (void *)this); 350 if (id < B_OK) 351 return initialize(this); 352 353 return resume_thread(id); 354 } 355 356 357 status_t 358 BlockAllocator::InitializeAndClearBitmap(Transaction &transaction) 359 { 360 status_t status = Initialize(false); 361 if (status < B_OK) 362 return status; 363 364 uint32 blocks = fBlocksPerGroup; 365 uint32 numBits = 8 * blocks * fVolume->BlockSize(); 366 367 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 368 if (buffer == NULL) 369 RETURN_ERROR(B_NO_MEMORY); 370 371 memset(buffer, 0, numBits >> 3); 372 373 off_t offset = 1; 374 375 // initialize the AllocationGroup objects and clear the on-disk bitmap 376 377 for (int32 i = 0; i < fNumGroups; i++) { 378 if (cached_write(fVolume->Device(), offset, buffer, blocks, fVolume->BlockSize()) < B_OK) 379 return B_ERROR; 380 381 // the last allocation group may contain less blocks than the others 382 fGroups[i].fNumBits = i == fNumGroups - 1 ? fVolume->NumBlocks() - i * numBits : numBits; 383 fGroups[i].fStart = offset; 384 fGroups[i].fFirstFree = fGroups[i].fLargestFirst = 0; 385 fGroups[i].fFreeBits = fGroups[i].fLargest = fGroups[i].fNumBits; 386 387 offset += blocks; 388 } 389 free(buffer); 390 391 // reserve the boot block, the log area, and the block bitmap itself 392 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 393 394 if (fGroups[0].Allocate(&transaction, 0, reservedBlocks) < B_OK) { 395 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 396 return B_ERROR; 397 } 398 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 399 400 return B_OK; 401 } 402 403 404 status_t 405 BlockAllocator::initialize(BlockAllocator *allocator) 406 { 407 // The lock must already be held at this point 408 409 Volume *volume = allocator->fVolume; 410 uint32 blocks = allocator->fBlocksPerGroup; 411 uint32 numBits = 8 * blocks * volume->BlockSize(); 412 off_t freeBlocks = 0; 413 414 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 415 if (buffer == NULL) { 416 allocator->fLock.Unlock(); 417 RETURN_ERROR(B_NO_MEMORY); 418 } 419 420 AllocationGroup *groups = allocator->fGroups; 421 off_t offset = 1; 422 int32 num = allocator->fNumGroups; 423 424 for (int32 i = 0;i < num;i++) { 425 if (cached_read(volume->Device(), offset, buffer, blocks, volume->BlockSize()) < B_OK) 426 break; 427 428 // the last allocation group may contain less blocks than the others 429 groups[i].fNumBits = i == num - 1 ? volume->NumBlocks() - i * numBits : numBits; 430 groups[i].fStart = offset; 431 432 // finds all free ranges in this allocation group 433 int32 start = -1, range = 0; 434 int32 size = groups[i].fNumBits, num = 0; 435 436 for (int32 k = 0;k < (size >> 2);k++) { 437 for (int32 j = 0; j < 32 && num < size; j++, num++) { 438 if (buffer[k] & (1UL << j)) { 439 if (range > 0) { 440 groups[i].AddFreeRange(start, range); 441 range = 0; 442 } 443 } else if (range++ == 0) 444 start = num; 445 } 446 } 447 if (range) 448 groups[i].AddFreeRange(start, range); 449 450 freeBlocks += groups[i].fFreeBits; 451 452 offset += blocks; 453 } 454 free(buffer); 455 456 // check if block bitmap and log area are reserved 457 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 458 if (allocator->CheckBlockRun(block_run::Run(0, 0, reservedBlocks)) < B_OK) { 459 Transaction transaction(volume, 0); 460 if (groups[0].Allocate(&transaction, 0, reservedBlocks) < B_OK) { 461 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 462 volume->Panic(); 463 } else { 464 FATAL(("space for block bitmap or log area was not reserved!\n")); 465 466 transaction.Done(); 467 } 468 } 469 470 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 471 if (volume->UsedBlocks() != usedBlocks) { 472 // If the disk in a dirty state at mount time, it's 473 // normal that the values don't match 474 INFORM(("volume reports %Ld used blocks, correct is %Ld\n", volume->UsedBlocks(), usedBlocks)); 475 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 476 } 477 478 allocator->fLock.Unlock(); 479 return B_OK; 480 } 481 482 483 status_t 484 BlockAllocator::AllocateBlocks(Transaction *transaction, int32 group, uint16 start, 485 uint16 maximum, uint16 minimum, block_run &run) 486 { 487 if (maximum == 0) 488 return B_BAD_VALUE; 489 490 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", group, start, maximum, minimum)); 491 492 AllocationBlock cached(fVolume); 493 Locker lock(fLock); 494 495 // the first scan through all allocation groups will look for the 496 // wanted maximum of blocks, the second scan will just look to 497 // satisfy the minimal requirement 498 uint16 numBlocks = maximum; 499 500 for (int32 i = 0; i < fNumGroups * 2; i++, group++, start = 0) { 501 group = group % fNumGroups; 502 503 if (start >= fGroups[group].fNumBits || fGroups[group].IsFull()) 504 continue; 505 506 if (i >= fNumGroups) { 507 // if the minimum is the same as the maximum, it's not necessary to 508 // search for in the allocation groups a second time 509 if (maximum == minimum) 510 return B_DEVICE_FULL; 511 512 numBlocks = minimum; 513 } 514 515 // The wanted maximum is smaller than the largest free block in the group 516 // or already smaller than the minimum 517 // ToDo: disabled because it's currently not maintained after the first allocation 518 //if (numBlocks > fGroups[group].fLargest) 519 // continue; 520 521 if (start < fGroups[group].fFirstFree) 522 start = fGroups[group].fFirstFree; 523 524 // there may be more than one block per allocation group - and 525 // we iterate through it to find a place for the allocation. 526 // (one allocation can't exceed one allocation group) 527 528 uint32 block = start / (fVolume->BlockSize() << 3); 529 int32 range = 0, rangeStart = 0; 530 531 for (; block < fBlocksPerGroup; block++) { 532 if (cached.SetTo(fGroups[group], block) < B_OK) 533 RETURN_ERROR(B_ERROR); 534 535 // find a block large enough to hold the allocation 536 for (uint32 bit = start % cached.NumBlockBits(); bit < cached.NumBlockBits(); bit++) { 537 if (!cached.IsUsed(bit)) { 538 if (range == 0) { 539 // start new range 540 rangeStart = block * cached.NumBlockBits() + bit; 541 } 542 543 // have we found a range large enough to hold numBlocks? 544 if (++range >= maximum) 545 break; 546 } else if (i >= fNumGroups && range >= minimum) { 547 // we have found a block larger than the required minimum (second pass) 548 break; 549 } else { 550 // end of a range 551 range = 0; 552 } 553 } 554 555 // if we found a suitable block, mark the blocks as in use, and write 556 // the updated block bitmap back to disk 557 if (range >= numBlocks) { 558 // adjust allocation size 559 if (numBlocks < maximum) 560 numBlocks = range; 561 562 if (fGroups[group].Allocate(transaction, rangeStart, numBlocks) < B_OK) 563 RETURN_ERROR(B_IO_ERROR); 564 565 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group); 566 run.start = HOST_ENDIAN_TO_BFS_INT16(rangeStart); 567 run.length = HOST_ENDIAN_TO_BFS_INT16(numBlocks); 568 569 fVolume->SuperBlock().used_blocks = 570 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + numBlocks); 571 // We are not writing back the disk's super block - it's 572 // either done by the journaling code, or when the disk 573 // is unmounted. 574 // If the value is not correct at mount time, it will be 575 // fixed anyway. 576 577 return B_OK; 578 } 579 580 // start from the beginning of the next block 581 start = 0; 582 } 583 } 584 return B_DEVICE_FULL; 585 } 586 587 588 status_t 589 BlockAllocator::AllocateForInode(Transaction *transaction, const block_run *parent, 590 mode_t type, block_run &run) 591 { 592 // apply some allocation policies here (AllocateBlocks() will break them 593 // if necessary) - we will start with those described in Dominic Giampaolo's 594 // "Practical File System Design", and see how good they work 595 596 // files are going in the same allocation group as its parent, sub-directories 597 // will be inserted 8 allocation groups after the one of the parent 598 uint16 group = parent->AllocationGroup(); 599 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 600 group += 8; 601 602 return AllocateBlocks(transaction, group, 0, 1, 1, run); 603 } 604 605 606 status_t 607 BlockAllocator::Allocate(Transaction *transaction, const Inode *inode, off_t numBlocks, 608 block_run &run, uint16 minimum) 609 { 610 if (numBlocks <= 0) 611 return B_ERROR; 612 613 // one block_run can't hold more data than there is in one allocation group 614 if (numBlocks > fGroups[0].fNumBits) 615 numBlocks = fGroups[0].fNumBits; 616 617 // since block_run.length is uint16, the largest number of blocks that 618 // can be covered by a block_run is 65535 619 // ToDo: if we drop compatibility, couldn't we do this any better? 620 // There are basically two possibilities: 621 // a) since a length of zero doesn't have any sense, take that for 65536 - 622 // but that could cause many problems (bugs) in other areas 623 // b) reduce the maximum amount of blocks per block_run, so that the remaining 624 // number of free blocks can be used in a useful manner (like 4 blocks) - 625 // but that would also reduce the maximum file size 626 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 627 numBlocks = MAX_BLOCK_RUN_LENGTH; 628 629 // apply some allocation policies here (AllocateBlocks() will break them 630 // if necessary) 631 uint16 group = inode->BlockRun().AllocationGroup(); 632 uint16 start = 0; 633 634 // are there already allocated blocks? (then just try to allocate near the last one) 635 if (inode->Size() > 0) { 636 data_stream *data = &inode->Node()->data; 637 // ToDo: we currently don't care for when the data stream 638 // is already grown into the indirect ranges 639 if (data->max_double_indirect_range == 0 640 && data->max_indirect_range == 0) { 641 // Since size > 0, there must be a valid block run in this stream 642 int32 last = 0; 643 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 644 if (data->direct[last + 1].IsZero()) 645 break; 646 647 group = data->direct[last].AllocationGroup(); 648 start = data->direct[last].Start() + data->direct[last].Length(); 649 } 650 } else if (inode->IsContainer() || inode->IsSymLink()) { 651 // directory and symbolic link data will go in the same allocation 652 // group as the inode is in but after the inode data 653 start = inode->BlockRun().Start(); 654 } else { 655 // file data will start in the next allocation group 656 group = inode->BlockRun().AllocationGroup() + 1; 657 } 658 659 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 660 } 661 662 663 status_t 664 BlockAllocator::Free(Transaction *transaction, block_run run) 665 { 666 Locker lock(fLock); 667 668 int32 group = run.AllocationGroup(); 669 uint16 start = run.Start(); 670 uint16 length = run.Length(); 671 672 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, length)); 673 674 // doesn't use Volume::IsValidBlockRun() here because it can check better 675 // against the group size (the last group may have a different length) 676 if (group < 0 || group >= fNumGroups 677 || start > fGroups[group].fNumBits 678 || uint32(start + length) > fGroups[group].fNumBits 679 || length == 0) { 680 FATAL(("tried to free an invalid block_run (%ld, %u, %u)\n", group, start, length)); 681 DEBUGGER(("tried to free invalid block_run")); 682 return B_BAD_VALUE; 683 } 684 // check if someone tries to free reserved areas at the beginning of the drive 685 if (group == 0 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 686 FATAL(("tried to free a reserved block_run (%ld, %u, %u)\n", group, start, length)); 687 DEBUGGER(("tried to free reserved block")); 688 return B_BAD_VALUE; 689 } 690 #ifdef DEBUG 691 if (CheckBlockRun(run) < B_OK) 692 return B_BAD_DATA; 693 #endif 694 695 if (fGroups[group].Free(transaction, start, length) < B_OK) 696 RETURN_ERROR(B_IO_ERROR); 697 698 #ifdef DEBUG 699 if (CheckBlockRun(run, NULL, NULL, false) < B_OK) 700 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just freed)\n")); 701 #endif 702 703 fVolume->SuperBlock().used_blocks = 704 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 705 return B_OK; 706 } 707 708 709 size_t 710 BlockAllocator::BitmapSize() const 711 { 712 return fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 713 } 714 715 716 // #pragma mark - 717 // Functions to check the validity of the bitmap - they are used from 718 // the "chkbfs" command 719 720 721 bool 722 BlockAllocator::IsValidCheckControl(check_control *control) 723 { 724 if (control == NULL 725 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 726 FATAL(("invalid check_control (%p)!\n", control)); 727 return false; 728 } 729 730 return true; 731 } 732 733 734 status_t 735 BlockAllocator::StartChecking(check_control *control) 736 { 737 if (!IsValidCheckControl(control)) 738 return B_BAD_VALUE; 739 740 status_t status = fLock.Lock(); 741 if (status < B_OK) 742 return status; 743 744 size_t size = BitmapSize(); 745 fCheckBitmap = (uint32 *)malloc(size); 746 if (fCheckBitmap == NULL) { 747 fLock.Unlock(); 748 return B_NO_MEMORY; 749 } 750 751 check_cookie *cookie = new check_cookie(); 752 if (cookie == NULL) { 753 free(fCheckBitmap); 754 fCheckBitmap = NULL; 755 fLock.Unlock(); 756 757 return B_NO_MEMORY; 758 } 759 760 // initialize bitmap 761 memset(fCheckBitmap, 0, size); 762 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); block-- > 0;) 763 SetCheckBitmapAt(block); 764 765 cookie->stack.Push(fVolume->Root()); 766 cookie->stack.Push(fVolume->Indices()); 767 cookie->iterator = NULL; 768 control->cookie = cookie; 769 770 fCheckCookie = cookie; 771 // to be able to restore nicely if "chkbfs" exited abnormally 772 773 // ToDo: check reserved area in bitmap! 774 775 return B_OK; 776 } 777 778 779 status_t 780 BlockAllocator::StopChecking(check_control *control) 781 { 782 check_cookie *cookie; 783 if (control == NULL) 784 cookie = fCheckCookie; 785 else 786 cookie = (check_cookie *)control->cookie; 787 788 if (cookie == NULL) 789 return B_ERROR; 790 791 if (cookie->iterator != NULL) { 792 delete cookie->iterator; 793 cookie->iterator = NULL; 794 795 // the current directory inode is still locked in memory 796 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 797 } 798 799 // if CheckNextNode() could completely work through, we can 800 // fix any damages of the bitmap 801 if (control != NULL && control->status == B_ENTRY_NOT_FOUND) { 802 // calculate the number of used blocks in the check bitmap 803 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 804 off_t usedBlocks = 0LL; 805 806 // ToDo: update the allocation groups used blocks info 807 for (uint32 i = size >> 2; i-- > 0;) { 808 uint32 compare = 1; 809 for (int16 j = 0; j < 32; j++, compare <<= 1) { 810 if (compare & fCheckBitmap[i]) 811 usedBlocks++; 812 } 813 } 814 815 control->stats.freed = fVolume->UsedBlocks() - usedBlocks + control->stats.missing; 816 if (control->stats.freed < 0) 817 control->stats.freed = 0; 818 819 // Should we fix errors? Were there any errors we can fix? 820 if (control->flags & BFS_FIX_BITMAP_ERRORS 821 && (control->stats.freed != 0 || control->stats.missing != 0)) { 822 // if so, write the check bitmap back over the original one, 823 // and use transactions here to play safe - we even use several 824 // transactions, so that we don't blow the maximum log size 825 // on large disks; since we don't need to make this atomic 826 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 827 828 int32 blocksInBitmap = fNumGroups * fBlocksPerGroup; 829 int32 blockSize = fVolume->BlockSize(); 830 831 for (int32 i = 0; i < blocksInBitmap; i += 512) { 832 Transaction transaction(fVolume, 1 + i); 833 834 int32 blocksToWrite = 512; 835 if (blocksToWrite + i > blocksInBitmap) 836 blocksToWrite = blocksInBitmap - i; 837 838 status_t status = transaction.WriteBlocks(1 + i, 839 (uint8 *)fCheckBitmap + i * blockSize, blocksToWrite); 840 if (status < B_OK) { 841 FATAL(("error writing bitmap: %s\n", strerror(status))); 842 break; 843 } 844 transaction.Done(); 845 } 846 } 847 } else 848 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 849 850 free(fCheckBitmap); 851 fCheckBitmap = NULL; 852 fCheckCookie = NULL; 853 delete cookie; 854 fLock.Unlock(); 855 856 return B_OK; 857 } 858 859 860 status_t 861 BlockAllocator::CheckNextNode(check_control *control) 862 { 863 if (!IsValidCheckControl(control)) 864 return B_BAD_VALUE; 865 866 check_cookie *cookie = (check_cookie *)control->cookie; 867 868 while (true) { 869 if (cookie->iterator == NULL) { 870 if (!cookie->stack.Pop(&cookie->current)) { 871 // no more runs on the stack, we are obviously finished! 872 control->status = B_ENTRY_NOT_FOUND; 873 return B_ENTRY_NOT_FOUND; 874 } 875 876 // get iterator for the next directory 877 878 #ifdef UNSAFE_GET_VNODE 879 RecursiveLocker locker(fVolume->Lock()); 880 #endif 881 Vnode vnode(fVolume, cookie->current); 882 Inode *inode; 883 if (vnode.Get(&inode) < B_OK) { 884 FATAL(("check: Could not open inode at %Ld\n", fVolume->ToBlock(cookie->current))); 885 continue; 886 } 887 888 if (!inode->IsContainer()) { 889 FATAL(("check: inode at %Ld should have been a directory\n", fVolume->ToBlock(cookie->current))); 890 continue; 891 } 892 893 BPlusTree *tree; 894 if (inode->GetTree(&tree) != B_OK) { 895 FATAL(("check: could not open b+tree from inode at %Ld\n", fVolume->ToBlock(cookie->current))); 896 continue; 897 } 898 899 cookie->parent = inode; 900 cookie->parent_mode = inode->Mode(); 901 902 cookie->iterator = new TreeIterator(tree); 903 if (cookie->iterator == NULL) 904 RETURN_ERROR(B_NO_MEMORY); 905 906 // the inode must stay locked in memory until the iterator is freed 907 vnode.Keep(); 908 909 // check the inode of the directory 910 control->errors = 0; 911 control->status = CheckInode(inode, control); 912 913 if (inode->GetName(control->name) < B_OK) 914 strcpy(control->name, "(node has no name)"); 915 916 control->inode = inode->ID(); 917 control->mode = inode->Mode(); 918 919 return B_OK; 920 } 921 922 char name[B_FILE_NAME_LENGTH]; 923 uint16 length; 924 vnode_id id; 925 926 status_t status = cookie->iterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); 927 if (status == B_ENTRY_NOT_FOUND) { 928 // there are no more entries in this iterator, free it and go on 929 delete cookie->iterator; 930 cookie->iterator = NULL; 931 932 // unlock the directory's inode from memory 933 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 934 935 continue; 936 } else if (status == B_OK) { 937 // ignore "." and ".." entries 938 if (!strcmp(name, ".") || !strcmp(name, "..")) 939 continue; 940 941 // fill in the control data as soon as we have them 942 strlcpy(control->name, name, B_FILE_NAME_LENGTH); 943 control->inode = id; 944 control->errors = 0; 945 946 Vnode vnode(fVolume, id); 947 Inode *inode; 948 if (vnode.Get(&inode) < B_OK) { 949 FATAL(("Could not open inode ID %Ld!\n", id)); 950 control->errors |= BFS_COULD_NOT_OPEN; 951 control->status = B_ERROR; 952 return B_OK; 953 } 954 955 // check if the inode's name is the same as in the b+tree 956 if (inode->IsRegularNode()) { 957 SimpleLocker locker(inode->SmallDataLock()); 958 959 const char *localName = inode->Name(); 960 if (localName == NULL || strcmp(localName, name)) { 961 control->errors |= BFS_NAMES_DONT_MATCH; 962 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, localName)); 963 } 964 } 965 966 control->mode = inode->Mode(); 967 968 // Check for the correct mode of the node (if the mode of the 969 // file don't fit to its parent, there is a serious problem) 970 if ((cookie->parent_mode & S_ATTR_DIR && !inode->IsAttribute()) 971 || (cookie->parent_mode & S_INDEX_DIR && !inode->IsIndex()) 972 || ((cookie->parent_mode & S_DIRECTORY | S_ATTR_DIR | S_INDEX_DIR) == S_DIRECTORY 973 && inode->Mode() & (S_ATTR | S_ATTR_DIR | S_INDEX_DIR))) { 974 FATAL(("inode at %Ld is of wrong type: %o (parent %o at %Ld)!\n", 975 inode->BlockNumber(), inode->Mode(), cookie->parent_mode, cookie->parent->BlockNumber())); 976 977 // if we are allowed to fix errors, we should remove the file 978 if (control->flags & BFS_REMOVE_WRONG_TYPES 979 && control->flags & BFS_FIX_BITMAP_ERRORS) { 980 #ifdef UNSAFE_GET_VNODE 981 RecursiveLocker locker(fVolume->Lock()); 982 #endif 983 // it's safe to start a transaction, because Inode::Remove() 984 // won't touch the block bitmap (which we hold the lock for) 985 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 986 // the bitmap anyway 987 Transaction transaction(fVolume, cookie->parent->BlockNumber()); 988 989 inode->Node()->flags |= INODE_DONT_FREE_SPACE; 990 status = cookie->parent->Remove(&transaction, name, NULL, inode->IsContainer()); 991 if (status == B_OK) 992 transaction.Done(); 993 } else 994 status = B_ERROR; 995 996 control->errors |= BFS_WRONG_TYPE; 997 control->status = status; 998 return B_OK; 999 } 1000 1001 // If the inode has an attribute directory, push it on the stack 1002 if (!inode->Attributes().IsZero()) 1003 cookie->stack.Push(inode->Attributes()); 1004 1005 // push the directory on the stack so that it will be scanned later 1006 if (inode->IsContainer() && !inode->IsIndex()) 1007 cookie->stack.Push(inode->BlockRun()); 1008 else { 1009 // check it now 1010 control->status = CheckInode(inode, control); 1011 1012 return B_OK; 1013 } 1014 } 1015 } 1016 // is never reached 1017 } 1018 1019 1020 bool 1021 BlockAllocator::CheckBitmapIsUsedAt(off_t block) const 1022 { 1023 size_t size = BitmapSize(); 1024 uint32 index = block / 32; // 32bit resolution 1025 if (index > size / 4) 1026 return false; 1027 1028 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) & (1UL << (block & 0x1f)); 1029 } 1030 1031 1032 void 1033 BlockAllocator::SetCheckBitmapAt(off_t block) 1034 { 1035 size_t size = BitmapSize(); 1036 uint32 index = block / 32; // 32bit resolution 1037 if (index > size / 4) 1038 return; 1039 1040 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1041 } 1042 1043 1044 status_t 1045 BlockAllocator::CheckBlockRun(block_run run, const char *type, check_control *control, bool allocated) 1046 { 1047 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1048 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1049 || uint32(run.Start() + run.Length()) > fGroups[run.AllocationGroup()].fNumBits 1050 || run.length == 0) { 1051 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, run.AllocationGroup(), run.Start(), run.Length())); 1052 if (control == NULL) 1053 return B_BAD_DATA; 1054 1055 control->errors |= BFS_INVALID_BLOCK_RUN; 1056 return B_OK; 1057 } 1058 1059 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1060 uint32 block = run.Start() / bitsPerBlock; 1061 uint32 pos = run.Start() % bitsPerBlock; 1062 int32 length = 0; 1063 off_t firstMissing = -1, firstSet = -1; 1064 off_t firstGroupBlock = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1065 1066 AllocationBlock cached(fVolume); 1067 1068 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1069 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1070 RETURN_ERROR(B_IO_ERROR); 1071 1072 if (pos >= cached.NumBlockBits()) { 1073 // something very strange has happened... 1074 RETURN_ERROR(B_ERROR); 1075 } 1076 1077 while (length < run.Length() && pos < cached.NumBlockBits()) { 1078 if (cached.IsUsed(pos) != allocated) { 1079 if (control == NULL) { 1080 PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated (pos = %ld, length = %ld)!\n", 1081 type, run.AllocationGroup(), run.Start(), run.Length(), pos, length)); 1082 return B_BAD_DATA; 1083 } 1084 if (firstMissing == -1) { 1085 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1086 control->errors |= BFS_MISSING_BLOCKS; 1087 } 1088 control->stats.missing++; 1089 } else if (firstMissing != -1) { 1090 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are %sallocated!\n", 1091 type, run.allocation_group, run.start, run.length, firstMissing, 1092 firstGroupBlock + pos + block * bitsPerBlock - 1, allocated ? "not " : "")); 1093 firstMissing = -1; 1094 } 1095 1096 if (fCheckBitmap != NULL) { 1097 // Set the block in the check bitmap as well, but have a look if it 1098 // is already allocated first 1099 uint32 offset = pos + block * bitsPerBlock; 1100 if (CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1101 if (firstSet == -1) { 1102 firstSet = firstGroupBlock + offset; 1103 control->errors |= BFS_BLOCKS_ALREADY_SET; 1104 } 1105 control->stats.already_set++; 1106 } else { 1107 if (firstSet != -1) { 1108 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", 1109 type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + offset - 1)); 1110 firstSet = -1; 1111 } 1112 SetCheckBitmapAt(firstGroupBlock + offset); 1113 } 1114 } 1115 length++; 1116 pos++; 1117 } 1118 1119 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1120 if (firstMissing != -1) 1121 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1122 if (firstSet != -1) 1123 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1124 } 1125 } 1126 1127 return B_OK; 1128 } 1129 1130 1131 status_t 1132 BlockAllocator::CheckInode(Inode *inode, check_control *control) 1133 { 1134 if (control != NULL && fCheckBitmap == NULL) 1135 return B_NO_INIT; 1136 if (inode == NULL) 1137 return B_BAD_VALUE; 1138 1139 status_t status = CheckBlockRun(inode->BlockRun(), "inode", control); 1140 if (status < B_OK) 1141 return status; 1142 1143 data_stream *data = &inode->Node()->data; 1144 1145 // check the direct range 1146 1147 if (data->max_direct_range) { 1148 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1149 if (data->direct[i].IsZero()) 1150 break; 1151 1152 status = CheckBlockRun(data->direct[i], "direct", control); 1153 if (status < B_OK) 1154 return status; 1155 } 1156 } 1157 1158 CachedBlock cached(fVolume); 1159 1160 // check the indirect range 1161 1162 if (data->max_indirect_range) { 1163 status = CheckBlockRun(data->indirect, "indirect", control); 1164 if (status < B_OK) 1165 return status; 1166 1167 off_t block = fVolume->ToBlock(data->indirect); 1168 1169 for (int32 i = 0; i < data->indirect.Length(); i++) { 1170 block_run *runs = (block_run *)cached.SetTo(block + i); 1171 if (runs == NULL) 1172 RETURN_ERROR(B_IO_ERROR); 1173 1174 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1175 int32 index = 0; 1176 for (; index < runsPerBlock; index++) { 1177 if (runs[index].IsZero()) 1178 break; 1179 1180 status = CheckBlockRun(runs[index], "indirect->run", control); 1181 if (status < B_OK) 1182 return status; 1183 } 1184 if (index < runsPerBlock) 1185 break; 1186 } 1187 } 1188 1189 // check the double indirect range 1190 1191 if (data->max_double_indirect_range) { 1192 status = CheckBlockRun(data->double_indirect, "double indirect", control); 1193 if (status < B_OK) 1194 return status; 1195 1196 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1197 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1198 1199 CachedBlock cachedDirect(fVolume); 1200 int32 maxIndirectIndex = (data->double_indirect.Length() << fVolume->BlockShift()) 1201 / sizeof(block_run); 1202 1203 for (int32 indirectIndex = 0; indirectIndex < maxIndirectIndex; indirectIndex++) { 1204 // get the indirect array block 1205 block_run *array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) 1206 + indirectIndex / runsPerBlock); 1207 if (array == NULL) 1208 return B_IO_ERROR; 1209 1210 block_run indirect = array[indirectIndex % runsPerBlock]; 1211 // are we finished yet? 1212 if (indirect.IsZero()) 1213 return B_OK; 1214 1215 status = CheckBlockRun(indirect, "double indirect->runs", control); 1216 if (status < B_OK) 1217 return status; 1218 1219 int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) / sizeof(block_run); 1220 1221 for (int32 index = 0; index < maxIndex; ) { 1222 block_run *runs = (block_run *)cachedDirect.SetTo(fVolume->ToBlock(indirect) 1223 + index / runsPerBlock); 1224 if (runs == NULL) 1225 return B_IO_ERROR; 1226 1227 do { 1228 // are we finished yet? 1229 if (runs[index % runsPerBlock].IsZero()) 1230 return B_OK; 1231 1232 status = CheckBlockRun(runs[index % runsPerBlock], "double indirect->runs->run", control); 1233 if (status < B_OK) 1234 return status; 1235 } while ((++index % runsPerArray) != 0); 1236 } 1237 } 1238 } 1239 1240 return B_OK; 1241 } 1242 1243