1 /* BlockAllocator - block bitmap handling and allocation policies 2 ** 3 ** Initial version by Axel Dörfler, axeld@pinc-software.de 4 ** This file may be used under the terms of the OpenBeOS License. 5 */ 6 7 8 #include "Debug.h" 9 #include "BlockAllocator.h" 10 #include "Volume.h" 11 #include "Inode.h" 12 #include "BPlusTree.h" 13 #include "Stack.h" 14 #include "bfs_control.h" 15 16 #include <kernel_cpp.h> 17 #include <stdlib.h> 18 19 #ifdef USER 20 # define spawn_kernel_thread spawn_thread 21 #endif 22 23 24 // Things the BlockAllocator should do: 25 26 // - find a range of blocks of a certain size nearby a specific position 27 // - allocating an unsharp range of blocks for pre-allocation 28 // - free blocks 29 // - know how to deal with each allocation, special handling for directories, 30 // files, symlinks, etc. (type sensitive allocation policies) 31 32 // What makes the code complicated is the fact that we are not just reading 33 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 34 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 35 // of partitions will grow even more - so that's not an option. 36 // Instead we are reading in every block when it's used - since an allocation 37 // group can span several blocks in the block bitmap, the AllocationBlock 38 // class is there to make handling those easier. 39 40 // The current implementation is only slightly optimized and could probably 41 // be improved a lot. Furthermore, the allocation policies used here should 42 // have some real world tests. 43 44 45 struct check_cookie { 46 check_cookie() {} 47 48 block_run current; 49 Inode *parent; 50 mode_t parent_mode; 51 Stack<block_run> stack; 52 TreeIterator *iterator; 53 }; 54 55 56 class AllocationBlock : public CachedBlock { 57 public: 58 AllocationBlock(Volume *volume); 59 60 void Allocate(uint16 start, uint16 numBlocks); 61 void Free(uint16 start, uint16 numBlocks); 62 inline bool IsUsed(uint16 block); 63 64 status_t SetTo(AllocationGroup &group, uint16 block); 65 66 uint32 NumBlockBits() const { return fNumBits; } 67 uint32 &Block(int32 index) { return ((uint32 *)fBlock)[index]; } 68 69 private: 70 uint32 fNumBits; 71 }; 72 73 74 class AllocationGroup { 75 public: 76 AllocationGroup(); 77 78 void AddFreeRange(int32 start, int32 blocks); 79 bool IsFull() const { return fFreeBits == 0; } 80 81 status_t Allocate(Transaction *transaction, uint16 start, int32 length); 82 status_t Free(Transaction *transaction, uint16 start, int32 length); 83 84 uint32 fNumBits; 85 int32 fStart; 86 int32 fFirstFree, fLargest, fLargestFirst; 87 // ToDo: fLargest & fLargestFirst are not maintained (and therefore used) yet! 88 int32 fFreeBits; 89 }; 90 91 92 AllocationBlock::AllocationBlock(Volume *volume) 93 : CachedBlock(volume) 94 { 95 } 96 97 98 status_t 99 AllocationBlock::SetTo(AllocationGroup &group, uint16 block) 100 { 101 // 8 blocks per byte 102 fNumBits = fVolume->BlockSize() << 3; 103 // the last group may have less bits in the last block 104 if ((block + 1) * fNumBits > group.fNumBits) 105 fNumBits = group.fNumBits % fNumBits; 106 107 return CachedBlock::SetTo(group.fStart + block) != NULL ? B_OK : B_ERROR; 108 } 109 110 111 bool 112 AllocationBlock::IsUsed(uint16 block) 113 { 114 if (block > fNumBits) 115 return true; 116 // the block bitmap is accessed in 32-bit blocks 117 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 118 } 119 120 121 void 122 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 123 { 124 ASSERT(start < fNumBits); 125 ASSERT(uint32(start + numBlocks) <= fNumBits); 126 127 if (uint32(start + numBlocks) > fNumBits) { 128 FATAL(("Allocation::Allocate(): tried to allocate too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 129 DIE(("Allocation::Allocate(): tried to allocate too many blocks")); 130 } 131 132 int32 block = start >> 5; 133 134 while (numBlocks > 0) { 135 uint32 mask = 0; 136 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 137 mask |= 1UL << (i % 32); 138 139 #ifdef DEBUG 140 // check for already set blocks 141 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32 *)fBlock)[block]) { 142 FATAL(("AllocationBlock::Allocate(): some blocks are already allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 143 DEBUGGER(("blocks already set!")); 144 } 145 #endif 146 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 147 start = 0; 148 } 149 } 150 151 152 void 153 AllocationBlock::Free(uint16 start, uint16 numBlocks) 154 { 155 ASSERT(start < fNumBits); 156 ASSERT(uint32(start + numBlocks) <= fNumBits); 157 158 if (uint32(start + numBlocks) > fNumBits) { 159 FATAL(("Allocation::Free(): tried to free too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 160 DIE(("Allocation::Free(): tried to free too many blocks")); 161 } 162 163 int32 block = start >> 5; 164 165 while (numBlocks > 0) { 166 uint32 mask = 0; 167 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 168 mask |= 1UL << (i % 32); 169 170 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 171 start = 0; 172 } 173 } 174 175 176 // #pragma mark - 177 178 179 AllocationGroup::AllocationGroup() 180 : 181 fFirstFree(-1), 182 fLargest(-1), 183 fLargestFirst(-1), 184 fFreeBits(0) 185 { 186 } 187 188 189 void 190 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 191 { 192 //D(if (blocks > 512) 193 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 194 195 if (fFirstFree == -1) 196 fFirstFree = start; 197 198 if (fLargest < blocks) { 199 fLargest = blocks; 200 fLargestFirst = start; 201 } 202 203 fFreeBits += blocks; 204 } 205 206 207 /** Allocates the specified run in the allocation group. 208 * Doesn't check if the run is valid or already allocated 209 * partially, nor does it maintain the free ranges hints 210 * or the volume's used blocks count. 211 * It only does the low-level work of allocating some bits 212 * in the block bitmap. 213 * Assumes that the block bitmap lock is hold. 214 */ 215 216 status_t 217 AllocationGroup::Allocate(Transaction *transaction, uint16 start, int32 length) 218 { 219 if (start > fNumBits) 220 return B_ERROR; 221 222 // Update the allocation group info 223 // ToDo: this info will be incorrect if something goes wrong later 224 // Note, the fFirstFree block doesn't have to be really free 225 if (start == fFirstFree) 226 fFirstFree = start + length; 227 fFreeBits -= length; 228 229 Volume *volume = transaction->GetVolume(); 230 231 // calculate block in the block bitmap and position within 232 uint32 bitsPerBlock = volume->BlockSize() << 3; 233 uint32 block = start / bitsPerBlock; 234 start = start % bitsPerBlock; 235 236 AllocationBlock cached(volume); 237 238 while (length > 0) { 239 if (cached.SetTo(*this, block) < B_OK) 240 RETURN_ERROR(B_IO_ERROR); 241 242 uint32 numBlocks = length; 243 if (start + numBlocks > cached.NumBlockBits()) 244 numBlocks = cached.NumBlockBits() - start; 245 246 cached.Allocate(start, numBlocks); 247 248 if (cached.WriteBack(transaction) < B_OK) 249 return B_IO_ERROR; 250 251 length -= numBlocks; 252 start = 0; 253 block++; 254 } 255 256 return B_OK; 257 } 258 259 260 /** Frees the specified run in the allocation group. 261 * Doesn't check if the run is valid or was not completely 262 * allocated, nor does it maintain the free ranges hints 263 * or the volume's used blocks count. 264 * It only does the low-level work of freeing some bits 265 * in the block bitmap. 266 * Assumes that the block bitmap lock is hold. 267 */ 268 269 status_t 270 AllocationGroup::Free(Transaction *transaction, uint16 start, int32 length) 271 { 272 if (start > fNumBits) 273 return B_ERROR; 274 275 // Update the allocation group info 276 // ToDo: this info will be incorrect if something goes wrong later 277 if (fFirstFree > start) 278 fFirstFree = start; 279 fFreeBits += length; 280 281 Volume *volume = transaction->GetVolume(); 282 283 // calculate block in the block bitmap and position within 284 uint32 bitsPerBlock = volume->BlockSize() << 3; 285 uint32 block = start / bitsPerBlock; 286 start = start % bitsPerBlock; 287 288 AllocationBlock cached(volume); 289 290 while (length > 0) { 291 if (cached.SetTo(*this, block) < B_OK) 292 RETURN_ERROR(B_IO_ERROR); 293 294 uint16 freeLength = length; 295 if (uint32(start + length) > cached.NumBlockBits()) 296 freeLength = cached.NumBlockBits() - start; 297 298 cached.Free(start, freeLength); 299 300 if (cached.WriteBack(transaction) < B_OK) 301 return B_IO_ERROR; 302 303 length -= freeLength; 304 start = 0; 305 block++; 306 } 307 return B_OK; 308 } 309 310 311 // #pragma mark - 312 313 314 BlockAllocator::BlockAllocator(Volume *volume) 315 : 316 fVolume(volume), 317 fLock("bfs allocator"), 318 fGroups(NULL), 319 fCheckBitmap(NULL) 320 { 321 } 322 323 324 BlockAllocator::~BlockAllocator() 325 { 326 delete[] fGroups; 327 } 328 329 330 status_t 331 BlockAllocator::Initialize(bool full) 332 { 333 if (fLock.InitCheck() < B_OK) 334 return B_ERROR; 335 336 fNumGroups = fVolume->AllocationGroups(); 337 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 338 fGroups = new AllocationGroup[fNumGroups]; 339 if (fGroups == NULL) 340 return B_NO_MEMORY; 341 342 if (!full) 343 return B_OK; 344 345 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::initialize, 346 "bfs block allocator", B_LOW_PRIORITY, (void *)this); 347 if (id < B_OK) 348 return initialize(this); 349 350 return resume_thread(id); 351 } 352 353 354 status_t 355 BlockAllocator::InitializeAndClearBitmap(Transaction &transaction) 356 { 357 status_t status = Initialize(false); 358 if (status < B_OK) 359 return status; 360 361 uint32 blocks = fBlocksPerGroup; 362 uint32 numBits = 8 * blocks * fVolume->BlockSize(); 363 364 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 365 if (buffer == NULL) 366 RETURN_ERROR(B_NO_MEMORY); 367 368 memset(buffer, 0, numBits >> 3); 369 370 off_t offset = 1; 371 372 // initialize the AllocationGroup objects and clear the on-disk bitmap 373 374 for (int32 i = 0; i < fNumGroups; i++) { 375 if (cached_write(fVolume->Device(), offset, buffer, blocks, fVolume->BlockSize()) < B_OK) 376 return B_ERROR; 377 378 // the last allocation group may contain less blocks than the others 379 fGroups[i].fNumBits = i == fNumGroups - 1 ? fVolume->NumBlocks() - i * numBits : numBits; 380 fGroups[i].fStart = offset; 381 fGroups[i].fFirstFree = fGroups[i].fLargestFirst = 0; 382 fGroups[i].fFreeBits = fGroups[i].fLargest = fGroups[i].fNumBits; 383 384 offset += blocks; 385 } 386 free(buffer); 387 388 // reserve the boot block, the log area, and the block bitmap itself 389 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 390 391 if (fGroups[0].Allocate(&transaction, 0, reservedBlocks) < B_OK) { 392 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 393 return B_ERROR; 394 } 395 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 396 397 return B_OK; 398 } 399 400 401 status_t 402 BlockAllocator::initialize(BlockAllocator *allocator) 403 { 404 Locker lock(allocator->fLock); 405 406 Volume *volume = allocator->fVolume; 407 uint32 blocks = allocator->fBlocksPerGroup; 408 uint32 numBits = 8 * blocks * volume->BlockSize(); 409 off_t freeBlocks = 0; 410 411 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 412 if (buffer == NULL) 413 RETURN_ERROR(B_NO_MEMORY); 414 415 AllocationGroup *groups = allocator->fGroups; 416 off_t offset = 1; 417 int32 num = allocator->fNumGroups; 418 419 for (int32 i = 0;i < num;i++) { 420 if (cached_read(volume->Device(), offset, buffer, blocks, volume->BlockSize()) < B_OK) 421 break; 422 423 // the last allocation group may contain less blocks than the others 424 groups[i].fNumBits = i == num - 1 ? volume->NumBlocks() - i * numBits : numBits; 425 groups[i].fStart = offset; 426 427 // finds all free ranges in this allocation group 428 int32 start = -1, range = 0; 429 int32 size = groups[i].fNumBits, num = 0; 430 431 for (int32 k = 0;k < (size >> 2);k++) { 432 for (int32 j = 0; j < 32 && num < size; j++, num++) { 433 if (buffer[k] & (1UL << j)) { 434 if (range > 0) { 435 groups[i].AddFreeRange(start, range); 436 range = 0; 437 } 438 } else if (range++ == 0) 439 start = num; 440 } 441 } 442 if (range) 443 groups[i].AddFreeRange(start, range); 444 445 freeBlocks += groups[i].fFreeBits; 446 447 offset += blocks; 448 } 449 free(buffer); 450 451 // check if block bitmap and log area are reserved 452 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 453 if (allocator->CheckBlockRun(block_run::Run(0, 0, reservedBlocks)) < B_OK) { 454 Transaction transaction(volume, 0); 455 if (groups[0].Allocate(&transaction, 0, reservedBlocks) < B_OK) { 456 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 457 volume->Panic(); 458 } else { 459 FATAL(("space for block bitmap or log area was not reserved!\n")); 460 461 transaction.Done(); 462 } 463 } 464 465 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 466 if (volume->UsedBlocks() != usedBlocks) { 467 // If the disk in a dirty state at mount time, it's 468 // normal that the values don't match 469 INFORM(("volume reports %Ld used blocks, correct is %Ld\n", volume->UsedBlocks(), usedBlocks)); 470 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 471 } 472 473 return B_OK; 474 } 475 476 477 status_t 478 BlockAllocator::AllocateBlocks(Transaction *transaction, int32 group, uint16 start, 479 uint16 maximum, uint16 minimum, block_run &run) 480 { 481 if (maximum == 0) 482 return B_BAD_VALUE; 483 484 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", group, start, maximum, minimum)); 485 486 AllocationBlock cached(fVolume); 487 Locker lock(fLock); 488 489 // the first scan through all allocation groups will look for the 490 // wanted maximum of blocks, the second scan will just look to 491 // satisfy the minimal requirement 492 uint16 numBlocks = maximum; 493 494 for (int32 i = 0; i < fNumGroups * 2; i++, group++, start = 0) { 495 group = group % fNumGroups; 496 497 if (start >= fGroups[group].fNumBits || fGroups[group].IsFull()) 498 continue; 499 500 if (i >= fNumGroups) { 501 // if the minimum is the same as the maximum, it's not necessary to 502 // search for in the allocation groups a second time 503 if (maximum == minimum) 504 return B_DEVICE_FULL; 505 506 numBlocks = minimum; 507 } 508 509 // The wanted maximum is smaller than the largest free block in the group 510 // or already smaller than the minimum 511 // ToDo: disabled because it's currently not maintained after the first allocation 512 //if (numBlocks > fGroups[group].fLargest) 513 // continue; 514 515 if (start < fGroups[group].fFirstFree) 516 start = fGroups[group].fFirstFree; 517 518 // there may be more than one block per allocation group - and 519 // we iterate through it to find a place for the allocation. 520 // (one allocation can't exceed one allocation group) 521 522 uint32 block = start / (fVolume->BlockSize() << 3); 523 int32 range = 0, rangeStart = 0; 524 525 for (; block < fBlocksPerGroup; block++) { 526 if (cached.SetTo(fGroups[group], block) < B_OK) 527 RETURN_ERROR(B_ERROR); 528 529 // find a block large enough to hold the allocation 530 for (uint32 bit = start % cached.NumBlockBits(); bit < cached.NumBlockBits(); bit++) { 531 if (!cached.IsUsed(bit)) { 532 if (range == 0) { 533 // start new range 534 rangeStart = block * cached.NumBlockBits() + bit; 535 } 536 537 // have we found a range large enough to hold numBlocks? 538 if (++range >= maximum) 539 break; 540 } else if (i >= fNumGroups && range >= minimum) { 541 // we have found a block larger than the required minimum (second pass) 542 break; 543 } else { 544 // end of a range 545 range = 0; 546 } 547 } 548 549 // if we found a suitable block, mark the blocks as in use, and write 550 // the updated block bitmap back to disk 551 if (range >= numBlocks) { 552 // adjust allocation size 553 if (numBlocks < maximum) 554 numBlocks = range; 555 556 if (fGroups[group].Allocate(transaction, rangeStart, numBlocks) < B_OK) 557 RETURN_ERROR(B_IO_ERROR); 558 559 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group); 560 run.start = HOST_ENDIAN_TO_BFS_INT16(rangeStart); 561 run.length = HOST_ENDIAN_TO_BFS_INT16(numBlocks); 562 563 fVolume->SuperBlock().used_blocks = 564 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + numBlocks); 565 // We are not writing back the disk's super block - it's 566 // either done by the journaling code, or when the disk 567 // is unmounted. 568 // If the value is not correct at mount time, it will be 569 // fixed anyway. 570 571 return B_OK; 572 } 573 574 // start from the beginning of the next block 575 start = 0; 576 } 577 } 578 return B_DEVICE_FULL; 579 } 580 581 582 status_t 583 BlockAllocator::AllocateForInode(Transaction *transaction, const block_run *parent, 584 mode_t type, block_run &run) 585 { 586 // apply some allocation policies here (AllocateBlocks() will break them 587 // if necessary) - we will start with those described in Dominic Giampaolo's 588 // "Practical File System Design", and see how good they work 589 590 // files are going in the same allocation group as its parent, sub-directories 591 // will be inserted 8 allocation groups after the one of the parent 592 uint16 group = parent->AllocationGroup(); 593 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 594 group += 8; 595 596 return AllocateBlocks(transaction, group, 0, 1, 1, run); 597 } 598 599 600 status_t 601 BlockAllocator::Allocate(Transaction *transaction, const Inode *inode, off_t numBlocks, 602 block_run &run, uint16 minimum) 603 { 604 if (numBlocks <= 0) 605 return B_ERROR; 606 607 // one block_run can't hold more data than there is in one allocation group 608 if (numBlocks > fGroups[0].fNumBits) 609 numBlocks = fGroups[0].fNumBits; 610 611 // since block_run.length is uint16, the largest number of blocks that 612 // can be covered by a block_run is 65535 613 // ToDo: if we drop compatibility, couldn't we do this any better? 614 // There are basically two possibilities: 615 // a) since a length of zero doesn't have any sense, take that for 65536 - 616 // but that could cause many problems (bugs) in other areas 617 // b) reduce the maximum amount of blocks per block_run, so that the remaining 618 // number of free blocks can be used in a useful manner (like 4 blocks) - 619 // but that would also reduce the maximum file size 620 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 621 numBlocks = MAX_BLOCK_RUN_LENGTH; 622 623 // apply some allocation policies here (AllocateBlocks() will break them 624 // if necessary) 625 uint16 group = inode->BlockRun().AllocationGroup(); 626 uint16 start = 0; 627 628 // are there already allocated blocks? (then just try to allocate near the last one) 629 if (inode->Size() > 0) { 630 data_stream *data = &inode->Node()->data; 631 // ToDo: we currently don't care for when the data stream 632 // is already grown into the indirect ranges 633 if (data->max_double_indirect_range == 0 634 && data->max_indirect_range == 0) { 635 // Since size > 0, there must be a valid block run in this stream 636 int32 last = 0; 637 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 638 if (data->direct[last + 1].IsZero()) 639 break; 640 641 group = data->direct[last].AllocationGroup(); 642 start = data->direct[last].Start() + data->direct[last].Length(); 643 } 644 } else if (inode->IsContainer() || inode->IsSymLink()) { 645 // directory and symbolic link data will go in the same allocation 646 // group as the inode is in but after the inode data 647 start = inode->BlockRun().Start(); 648 } else { 649 // file data will start in the next allocation group 650 group = inode->BlockRun().AllocationGroup() + 1; 651 } 652 653 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 654 } 655 656 657 status_t 658 BlockAllocator::Free(Transaction *transaction, block_run run) 659 { 660 Locker lock(fLock); 661 662 int32 group = run.AllocationGroup(); 663 uint16 start = run.Start(); 664 uint16 length = run.Length(); 665 666 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, length)); 667 668 // doesn't use Volume::IsValidBlockRun() here because it can check better 669 // against the group size (the last group may have a different length) 670 if (group < 0 || group >= fNumGroups 671 || start > fGroups[group].fNumBits 672 || uint32(start + length) > fGroups[group].fNumBits 673 || length == 0) { 674 FATAL(("tried to free an invalid block_run (%ld, %u, %u)\n", group, start, length)); 675 DEBUGGER(("tried to free invalid block_run")); 676 return B_BAD_VALUE; 677 } 678 // check if someone tries to free reserved areas at the beginning of the drive 679 if (group == 0 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 680 FATAL(("tried to free a reserved block_run (%ld, %u, %u)\n", group, start, length)); 681 DEBUGGER(("tried to free reserved block")); 682 return B_BAD_VALUE; 683 } 684 #ifdef DEBUG 685 if (CheckBlockRun(run) < B_OK) 686 return B_BAD_DATA; 687 #endif 688 689 if (fGroups[group].Free(transaction, start, length) < B_OK) 690 RETURN_ERROR(B_IO_ERROR); 691 692 #ifdef DEBUG 693 if (CheckBlockRun(run) == B_OK) 694 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just freed)\n")); 695 #endif 696 697 fVolume->SuperBlock().used_blocks = 698 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 699 return B_OK; 700 } 701 702 703 size_t 704 BlockAllocator::BitmapSize() const 705 { 706 return fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 707 } 708 709 710 // #pragma mark - 711 // Functions to check the validity of the bitmap - they are used from 712 // the "chkbfs" command 713 714 715 bool 716 BlockAllocator::IsValidCheckControl(check_control *control) 717 { 718 if (control == NULL 719 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 720 FATAL(("invalid check_control (%p)!\n", control)); 721 return false; 722 } 723 724 return true; 725 } 726 727 728 status_t 729 BlockAllocator::StartChecking(check_control *control) 730 { 731 if (!IsValidCheckControl(control)) 732 return B_BAD_VALUE; 733 734 status_t status = fLock.Lock(); 735 if (status < B_OK) 736 return status; 737 738 size_t size = BitmapSize(); 739 fCheckBitmap = (uint32 *)malloc(size); 740 if (fCheckBitmap == NULL) { 741 fLock.Unlock(); 742 return B_NO_MEMORY; 743 } 744 745 check_cookie *cookie = new check_cookie(); 746 if (cookie == NULL) { 747 free(fCheckBitmap); 748 fCheckBitmap = NULL; 749 fLock.Unlock(); 750 751 return B_NO_MEMORY; 752 } 753 754 // initialize bitmap 755 memset(fCheckBitmap, 0, size); 756 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); block-- > 0;) 757 SetCheckBitmapAt(block); 758 759 cookie->stack.Push(fVolume->Root()); 760 cookie->stack.Push(fVolume->Indices()); 761 cookie->iterator = NULL; 762 control->cookie = cookie; 763 764 fCheckCookie = cookie; 765 // to be able to restore nicely if "chkbfs" exited abnormally 766 767 // ToDo: check reserved area in bitmap! 768 769 return B_OK; 770 } 771 772 773 status_t 774 BlockAllocator::StopChecking(check_control *control) 775 { 776 check_cookie *cookie; 777 if (control == NULL) 778 cookie = fCheckCookie; 779 else 780 cookie = (check_cookie *)control->cookie; 781 782 if (cookie == NULL) 783 return B_ERROR; 784 785 if (cookie->iterator != NULL) { 786 delete cookie->iterator; 787 cookie->iterator = NULL; 788 789 // the current directory inode is still locked in memory 790 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 791 } 792 793 // if CheckNextNode() could completely work through, we can 794 // fix any damages of the bitmap 795 if (control != NULL && control->status == B_ENTRY_NOT_FOUND) { 796 // calculate the number of used blocks in the check bitmap 797 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 798 off_t usedBlocks = 0LL; 799 800 // ToDo: update the allocation groups used blocks info 801 for (uint32 i = size >> 2; i-- > 0;) { 802 uint32 compare = 1; 803 for (int16 j = 0; j < 32; j++, compare <<= 1) { 804 if (compare & fCheckBitmap[i]) 805 usedBlocks++; 806 } 807 } 808 809 control->stats.freed = fVolume->UsedBlocks() - usedBlocks + control->stats.missing; 810 if (control->stats.freed < 0) 811 control->stats.freed = 0; 812 813 // Should we fix errors? Were there any errors we can fix? 814 if (control->flags & BFS_FIX_BITMAP_ERRORS 815 && (control->stats.freed != 0 || control->stats.missing != 0)) { 816 // if so, write the check bitmap back over the original one, 817 // and use transactions here to play safe - we even use several 818 // transactions, so that we don't blow the maximum log size 819 // on large disks; since we don't need to make this atomic 820 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 821 822 int32 blocksInBitmap = fNumGroups * fBlocksPerGroup; 823 int32 blockSize = fVolume->BlockSize(); 824 825 for (int32 i = 0; i < blocksInBitmap; i += 512) { 826 Transaction transaction(fVolume, 1 + i); 827 828 int32 blocksToWrite = 512; 829 if (blocksToWrite + i > blocksInBitmap) 830 blocksToWrite = blocksInBitmap - i; 831 832 status_t status = transaction.WriteBlocks(1 + i, 833 (uint8 *)fCheckBitmap + i * blockSize, blocksToWrite); 834 if (status < B_OK) { 835 FATAL(("error writing bitmap: %s\n", strerror(status))); 836 break; 837 } 838 transaction.Done(); 839 } 840 } 841 } else 842 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 843 844 free(fCheckBitmap); 845 fCheckBitmap = NULL; 846 fCheckCookie = NULL; 847 delete cookie; 848 fLock.Unlock(); 849 850 return B_OK; 851 } 852 853 854 status_t 855 BlockAllocator::CheckNextNode(check_control *control) 856 { 857 if (!IsValidCheckControl(control)) 858 return B_BAD_VALUE; 859 860 check_cookie *cookie = (check_cookie *)control->cookie; 861 862 while (true) { 863 if (cookie->iterator == NULL) { 864 if (!cookie->stack.Pop(&cookie->current)) { 865 // no more runs on the stack, we are obviously finished! 866 control->status = B_ENTRY_NOT_FOUND; 867 return B_ENTRY_NOT_FOUND; 868 } 869 870 // get iterator for the next directory 871 872 #ifdef UNSAFE_GET_VNODE 873 RecursiveLocker locker(fVolume->Lock()); 874 #endif 875 Vnode vnode(fVolume, cookie->current); 876 Inode *inode; 877 if (vnode.Get(&inode) < B_OK) { 878 FATAL(("check: Could not open inode at %Ld\n", fVolume->ToBlock(cookie->current))); 879 continue; 880 } 881 882 if (!inode->IsContainer()) { 883 FATAL(("check: inode at %Ld should have been a directory\n", fVolume->ToBlock(cookie->current))); 884 continue; 885 } 886 887 BPlusTree *tree; 888 if (inode->GetTree(&tree) != B_OK) { 889 FATAL(("check: could not open b+tree from inode at %Ld\n", fVolume->ToBlock(cookie->current))); 890 continue; 891 } 892 893 cookie->parent = inode; 894 cookie->parent_mode = inode->Mode(); 895 896 cookie->iterator = new TreeIterator(tree); 897 if (cookie->iterator == NULL) 898 RETURN_ERROR(B_NO_MEMORY); 899 900 // the inode must stay locked in memory until the iterator is freed 901 vnode.Keep(); 902 903 // check the inode of the directory 904 control->errors = 0; 905 control->status = CheckInode(inode, control); 906 907 if (inode->GetName(control->name) < B_OK) 908 strcpy(control->name, "(node has no name)"); 909 910 control->inode = inode->ID(); 911 control->mode = inode->Mode(); 912 913 return B_OK; 914 } 915 916 char name[B_FILE_NAME_LENGTH]; 917 uint16 length; 918 vnode_id id; 919 920 status_t status = cookie->iterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); 921 if (status == B_ENTRY_NOT_FOUND) { 922 // there are no more entries in this iterator, free it and go on 923 delete cookie->iterator; 924 cookie->iterator = NULL; 925 926 // unlock the directory's inode from memory 927 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 928 929 continue; 930 } else if (status == B_OK) { 931 // ignore "." and ".." entries 932 if (!strcmp(name, ".") || !strcmp(name, "..")) 933 continue; 934 935 // fill in the control data as soon as we have them 936 strlcpy(control->name, name, B_FILE_NAME_LENGTH); 937 control->inode = id; 938 control->errors = 0; 939 940 Vnode vnode(fVolume, id); 941 Inode *inode; 942 if (vnode.Get(&inode) < B_OK) { 943 FATAL(("Could not open inode ID %Ld!\n", id)); 944 control->errors |= BFS_COULD_NOT_OPEN; 945 control->status = B_ERROR; 946 return B_OK; 947 } 948 949 // check if the inode's name is the same as in the b+tree 950 if (inode->IsRegularNode()) { 951 SimpleLocker locker(inode->SmallDataLock()); 952 953 const char *localName = inode->Name(); 954 if (localName == NULL || strcmp(localName, name)) { 955 control->errors |= BFS_NAMES_DONT_MATCH; 956 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, localName)); 957 } 958 } 959 960 control->mode = inode->Mode(); 961 962 // Check for the correct mode of the node (if the mode of the 963 // file don't fit to its parent, there is a serious problem) 964 if ((cookie->parent_mode & S_ATTR_DIR && !inode->IsAttribute()) 965 || (cookie->parent_mode & S_INDEX_DIR && !inode->IsIndex()) 966 || ((cookie->parent_mode & S_DIRECTORY | S_ATTR_DIR | S_INDEX_DIR) == S_DIRECTORY 967 && inode->Mode() & (S_ATTR | S_ATTR_DIR | S_INDEX_DIR))) { 968 FATAL(("inode at %Ld is of wrong type: %o (parent %o at %Ld)!\n", 969 inode->BlockNumber(), inode->Mode(), cookie->parent_mode, cookie->parent->BlockNumber())); 970 971 // if we are allowed to fix errors, we should remove the file 972 if (control->flags & BFS_REMOVE_WRONG_TYPES 973 && control->flags & BFS_FIX_BITMAP_ERRORS) { 974 #ifdef UNSAFE_GET_VNODE 975 RecursiveLocker locker(fVolume->Lock()); 976 #endif 977 // it's safe to start a transaction, because Inode::Remove() 978 // won't touch the block bitmap (which we hold the lock for) 979 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 980 // the bitmap anyway 981 Transaction transaction(fVolume, cookie->parent->BlockNumber()); 982 983 inode->Node()->flags |= INODE_DONT_FREE_SPACE; 984 status = cookie->parent->Remove(&transaction, name, NULL, inode->IsContainer()); 985 if (status == B_OK) 986 transaction.Done(); 987 } else 988 status = B_ERROR; 989 990 control->errors |= BFS_WRONG_TYPE; 991 control->status = status; 992 return B_OK; 993 } 994 995 // If the inode has an attribute directory, push it on the stack 996 if (!inode->Attributes().IsZero()) 997 cookie->stack.Push(inode->Attributes()); 998 999 // push the directory on the stack so that it will be scanned later 1000 if (inode->IsContainer() && !inode->IsIndex()) 1001 cookie->stack.Push(inode->BlockRun()); 1002 else { 1003 // check it now 1004 control->status = CheckInode(inode, control); 1005 1006 return B_OK; 1007 } 1008 } 1009 } 1010 // is never reached 1011 } 1012 1013 1014 bool 1015 BlockAllocator::CheckBitmapIsUsedAt(off_t block) const 1016 { 1017 size_t size = BitmapSize(); 1018 uint32 index = block / 32; // 32bit resolution 1019 if (index > size / 4) 1020 return false; 1021 1022 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) & (1UL << (block & 0x1f)); 1023 } 1024 1025 1026 void 1027 BlockAllocator::SetCheckBitmapAt(off_t block) 1028 { 1029 size_t size = BitmapSize(); 1030 uint32 index = block / 32; // 32bit resolution 1031 if (index > size / 4) 1032 return; 1033 1034 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1035 } 1036 1037 1038 status_t 1039 BlockAllocator::CheckBlockRun(block_run run, const char *type, check_control *control) 1040 { 1041 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1042 || run.Start() > fGroups[run.allocation_group].fNumBits 1043 || uint32(run.Start() + run.Length()) > fGroups[run.AllocationGroup()].fNumBits 1044 || run.length == 0) { 1045 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, run.AllocationGroup(), run.Start(), run.Length())); 1046 if (control == NULL) 1047 return B_BAD_DATA; 1048 1049 control->errors |= BFS_INVALID_BLOCK_RUN; 1050 return B_OK; 1051 } 1052 1053 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1054 uint32 block = run.Start() / bitsPerBlock; 1055 uint32 pos = run.Start() % bitsPerBlock; 1056 int32 length = 0; 1057 off_t firstMissing = -1, firstSet = -1; 1058 off_t firstGroupBlock = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1059 1060 AllocationBlock cached(fVolume); 1061 1062 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1063 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1064 RETURN_ERROR(B_IO_ERROR); 1065 1066 if (pos >= cached.NumBlockBits()) { 1067 // something very strange has happened... 1068 RETURN_ERROR(B_ERROR); 1069 } 1070 1071 while (length < run.Length() && pos < cached.NumBlockBits()) { 1072 if (!cached.IsUsed(pos)) { 1073 if (control == NULL) { 1074 PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length())); 1075 return B_BAD_DATA; 1076 } 1077 if (firstMissing == -1) { 1078 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1079 control->errors |= BFS_MISSING_BLOCKS; 1080 } 1081 control->stats.missing++; 1082 } else if (firstMissing != -1) { 1083 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", 1084 type, run.allocation_group, run.start, run.length, firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1085 firstMissing = -1; 1086 } 1087 1088 if (fCheckBitmap != NULL) { 1089 // Set the block in the check bitmap as well, but have a look if it 1090 // is already allocated first 1091 uint32 offset = pos + block * bitsPerBlock; 1092 if (CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1093 if (firstSet == -1) { 1094 firstSet = firstGroupBlock + offset; 1095 control->errors |= BFS_BLOCKS_ALREADY_SET; 1096 } 1097 control->stats.already_set++; 1098 } else { 1099 if (firstSet != -1) { 1100 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", 1101 type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + offset - 1)); 1102 firstSet = -1; 1103 } 1104 SetCheckBitmapAt(firstGroupBlock + offset); 1105 } 1106 } 1107 length++; 1108 pos++; 1109 } 1110 1111 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1112 if (firstMissing != -1) 1113 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1114 if (firstSet != -1) 1115 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1116 } 1117 } 1118 1119 return B_OK; 1120 } 1121 1122 1123 status_t 1124 BlockAllocator::CheckInode(Inode *inode, check_control *control) 1125 { 1126 if (control != NULL && fCheckBitmap == NULL) 1127 return B_NO_INIT; 1128 if (inode == NULL) 1129 return B_BAD_VALUE; 1130 1131 status_t status = CheckBlockRun(inode->BlockRun(), "inode", control); 1132 if (status < B_OK) 1133 return status; 1134 1135 data_stream *data = &inode->Node()->data; 1136 1137 // check the direct range 1138 1139 if (data->max_direct_range) { 1140 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1141 if (data->direct[i].IsZero()) 1142 break; 1143 1144 status = CheckBlockRun(data->direct[i], "direct", control); 1145 if (status < B_OK) 1146 return status; 1147 } 1148 } 1149 1150 CachedBlock cached(fVolume); 1151 1152 // check the indirect range 1153 1154 if (data->max_indirect_range) { 1155 status = CheckBlockRun(data->indirect, "indirect", control); 1156 if (status < B_OK) 1157 return status; 1158 1159 off_t block = fVolume->ToBlock(data->indirect); 1160 1161 for (int32 i = 0; i < data->indirect.Length(); i++) { 1162 block_run *runs = (block_run *)cached.SetTo(block + i); 1163 if (runs == NULL) 1164 RETURN_ERROR(B_IO_ERROR); 1165 1166 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1167 int32 index = 0; 1168 for (; index < runsPerBlock; index++) { 1169 if (runs[index].IsZero()) 1170 break; 1171 1172 status = CheckBlockRun(runs[index], "indirect->run", control); 1173 if (status < B_OK) 1174 return status; 1175 } 1176 if (index < runsPerBlock) 1177 break; 1178 } 1179 } 1180 1181 // check the double indirect range 1182 1183 if (data->max_double_indirect_range) { 1184 status = CheckBlockRun(data->double_indirect, "double indirect", control); 1185 if (status < B_OK) 1186 return status; 1187 1188 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1189 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1190 1191 CachedBlock cachedDirect(fVolume); 1192 int32 maxIndirectIndex = (data->double_indirect.Length() << fVolume->BlockShift()) 1193 / sizeof(block_run); 1194 1195 for (int32 indirectIndex = 0; indirectIndex < maxIndirectIndex; indirectIndex++) { 1196 // get the indirect array block 1197 block_run *array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) 1198 + indirectIndex / runsPerBlock); 1199 if (array == NULL) 1200 return B_IO_ERROR; 1201 1202 block_run indirect = array[indirectIndex % runsPerBlock]; 1203 // are we finished yet? 1204 if (indirect.IsZero()) 1205 return B_OK; 1206 1207 status = CheckBlockRun(indirect, "double indirect->runs", control); 1208 if (status < B_OK) 1209 return status; 1210 1211 int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) / sizeof(block_run); 1212 1213 for (int32 index = 0; index < maxIndex; ) { 1214 block_run *runs = (block_run *)cachedDirect.SetTo(fVolume->ToBlock(indirect) 1215 + index / runsPerBlock); 1216 if (runs == NULL) 1217 return B_IO_ERROR; 1218 1219 do { 1220 // are we finished yet? 1221 if (runs[index % runsPerBlock].IsZero()) 1222 return B_OK; 1223 1224 status = CheckBlockRun(runs[index % runsPerBlock], "double indirect->runs->run", control); 1225 if (status < B_OK) 1226 return status; 1227 } while ((++index % runsPerArray) != 0); 1228 } 1229 } 1230 } 1231 1232 return B_OK; 1233 } 1234 1235