1 /* BlockAllocator - block bitmap handling and allocation policies 2 ** 3 ** Initial version by Axel Dörfler, axeld@pinc-software.de 4 ** This file may be used under the terms of the OpenBeOS License. 5 */ 6 7 8 #include "cpp.h" 9 #include "Debug.h" 10 #include "BlockAllocator.h" 11 #include "Volume.h" 12 #include "Inode.h" 13 #include "BPlusTree.h" 14 #include "Stack.h" 15 #include "bfs_control.h" 16 17 #include <malloc.h> 18 19 #ifdef USER 20 # define spawn_kernel_thread spawn_thread 21 #endif 22 23 24 // Things the BlockAllocator should do: 25 26 // - find a range of blocks of a certain size nearby a specific position 27 // - allocating an unsharp range of blocks for pre-allocation 28 // - free blocks 29 // - know how to deal with each allocation, special handling for directories, 30 // files, symlinks, etc. (type sensitive allocation policies) 31 32 // What makes the code complicated is the fact that we are not just reading 33 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 34 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 35 // of partitions will grow even more - so that's not an option. 36 // Instead we are reading in every block when it's used - since an allocation 37 // group can span several blocks in the block bitmap, the AllocationBlock 38 // class is there to make handling those easier. 39 40 // The current implementation is only slightly optimized and could probably 41 // be improved a lot. Furthermore, the allocation policies used here should 42 // have some real world tests. 43 44 45 struct check_cookie { 46 check_cookie() {} 47 48 block_run current; 49 Inode *parent; 50 mode_t parent_mode; 51 Stack<block_run> stack; 52 TreeIterator *iterator; 53 }; 54 55 56 class AllocationBlock : public CachedBlock { 57 public: 58 AllocationBlock(Volume *volume); 59 60 void Allocate(uint16 start, uint16 numBlocks); 61 void Free(uint16 start, uint16 numBlocks); 62 inline bool IsUsed(uint16 block); 63 64 status_t SetTo(AllocationGroup &group, uint16 block); 65 66 uint32 NumBlockBits() const { return fNumBits; } 67 uint32 &Block(int32 index) { return ((uint32 *)fBlock)[index]; } 68 69 private: 70 uint32 fNumBits; 71 }; 72 73 74 class AllocationGroup { 75 public: 76 AllocationGroup(); 77 78 void AddFreeRange(int32 start, int32 blocks); 79 bool IsFull() const { return fFreeBits == 0; } 80 81 status_t Allocate(Transaction *transaction, uint16 start, int32 length); 82 status_t Free(Transaction *transaction, uint16 start, int32 length); 83 84 uint32 fNumBits; 85 int32 fStart; 86 int32 fFirstFree, fLargest, fLargestFirst; 87 // ToDo: fLargest & fLargestFirst are not maintained (and therefore used) yet! 88 int32 fFreeBits; 89 }; 90 91 92 AllocationBlock::AllocationBlock(Volume *volume) 93 : CachedBlock(volume) 94 { 95 } 96 97 98 status_t 99 AllocationBlock::SetTo(AllocationGroup &group, uint16 block) 100 { 101 // 8 blocks per byte 102 fNumBits = fVolume->BlockSize() << 3; 103 // the last group may have less bits in the last block 104 if ((block + 1) * fNumBits > group.fNumBits) 105 fNumBits = group.fNumBits % fNumBits; 106 107 return CachedBlock::SetTo(group.fStart + block) != NULL ? B_OK : B_ERROR; 108 } 109 110 111 bool 112 AllocationBlock::IsUsed(uint16 block) 113 { 114 if (block > fNumBits) 115 return true; 116 // the block bitmap is accessed in 32-bit blocks 117 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 118 } 119 120 121 void 122 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 123 { 124 ASSERT(start < fNumBits); 125 ASSERT(uint32(start + numBlocks) <= fNumBits); 126 127 if (uint32(start + numBlocks) > fNumBits) { 128 FATAL(("Allocation::Allocate(): tried to allocate too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 129 DIE(("Allocation::Allocate(): tried to allocate too many blocks")); 130 } 131 132 int32 block = start >> 5; 133 134 while (numBlocks > 0) { 135 uint32 mask = 0; 136 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 137 mask |= 1UL << (i % 32); 138 139 #ifdef DEBUG 140 // check for already set blocks 141 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32 *)fBlock)[block]) { 142 FATAL(("AllocationBlock::Allocate(): some blocks are already allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 143 DEBUGGER(("blocks already set!")); 144 } 145 #endif 146 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 147 start = 0; 148 } 149 } 150 151 152 void 153 AllocationBlock::Free(uint16 start, uint16 numBlocks) 154 { 155 ASSERT(start < fNumBits); 156 ASSERT(uint32(start + numBlocks) <= fNumBits); 157 158 if (uint32(start + numBlocks) > fNumBits) { 159 FATAL(("Allocation::Free(): tried to free too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 160 DIE(("Allocation::Free(): tried to free too many blocks")); 161 } 162 163 int32 block = start >> 5; 164 165 while (numBlocks > 0) { 166 uint32 mask = 0; 167 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 168 mask |= 1UL << (i % 32); 169 170 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 171 start = 0; 172 } 173 } 174 175 176 // #pragma mark - 177 178 179 AllocationGroup::AllocationGroup() 180 : 181 fFirstFree(-1), 182 fLargest(-1), 183 fLargestFirst(-1), 184 fFreeBits(0) 185 { 186 } 187 188 189 void 190 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 191 { 192 //D(if (blocks > 512) 193 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 194 195 if (fFirstFree == -1) 196 fFirstFree = start; 197 198 if (fLargest < blocks) { 199 fLargest = blocks; 200 fLargestFirst = start; 201 } 202 203 fFreeBits += blocks; 204 } 205 206 207 /** Allocates the specified run in the allocation group. 208 * Doesn't check if the run is valid or already allocated 209 * partially, nor does it maintain the free ranges hints 210 * or the volume's used blocks count. 211 * It only does the low-level work of allocating some bits 212 * in the block bitmap. 213 * Assumes that the block bitmap lock is hold. 214 */ 215 216 status_t 217 AllocationGroup::Allocate(Transaction *transaction, uint16 start, int32 length) 218 { 219 if (start > fNumBits) 220 return B_ERROR; 221 222 // Update the allocation group info 223 // ToDo: this info will be incorrect if something goes wrong later 224 // Note, the fFirstFree block doesn't have to be really free 225 if (start == fFirstFree) 226 fFirstFree = start + length; 227 fFreeBits -= length; 228 229 Volume *volume = transaction->GetVolume(); 230 231 // calculate block in the block bitmap and position within 232 uint32 bitsPerBlock = volume->BlockSize() << 3; 233 uint32 block = start / bitsPerBlock; 234 start = start % bitsPerBlock; 235 236 AllocationBlock cached(volume); 237 238 while (length > 0) { 239 if (cached.SetTo(*this, block) < B_OK) 240 RETURN_ERROR(B_IO_ERROR); 241 242 uint32 numBlocks = length; 243 if (start + numBlocks > cached.NumBlockBits()) 244 numBlocks = cached.NumBlockBits() - start; 245 246 cached.Allocate(start, numBlocks); 247 248 if (cached.WriteBack(transaction) < B_OK) 249 return B_IO_ERROR; 250 251 length -= numBlocks; 252 start = 0; 253 block++; 254 } 255 256 return B_OK; 257 } 258 259 260 /** Frees the specified run in the allocation group. 261 * Doesn't check if the run is valid or was not completely 262 * allocated, nor does it maintain the free ranges hints 263 * or the volume's used blocks count. 264 * It only does the low-level work of freeing some bits 265 * in the block bitmap. 266 * Assumes that the block bitmap lock is hold. 267 */ 268 269 status_t 270 AllocationGroup::Free(Transaction *transaction, uint16 start, int32 length) 271 { 272 if (start > fNumBits) 273 return B_ERROR; 274 275 // Update the allocation group info 276 // ToDo: this info will be incorrect if something goes wrong later 277 if (fFirstFree > start) 278 fFirstFree = start; 279 fFreeBits += length; 280 281 Volume *volume = transaction->GetVolume(); 282 283 // calculate block in the block bitmap and position within 284 uint32 bitsPerBlock = volume->BlockSize() << 3; 285 uint32 block = start / bitsPerBlock; 286 start = start % bitsPerBlock; 287 288 AllocationBlock cached(volume); 289 290 while (length > 0) { 291 if (cached.SetTo(*this, block) < B_OK) 292 RETURN_ERROR(B_IO_ERROR); 293 294 uint16 freeLength = length; 295 if (uint32(start + length) > cached.NumBlockBits()) 296 freeLength = cached.NumBlockBits() - start; 297 298 cached.Free(start, freeLength); 299 300 if (cached.WriteBack(transaction) < B_OK) 301 return B_IO_ERROR; 302 303 length -= freeLength; 304 start = 0; 305 block++; 306 } 307 return B_OK; 308 } 309 310 311 // #pragma mark - 312 313 314 BlockAllocator::BlockAllocator(Volume *volume) 315 : 316 fVolume(volume), 317 fLock("bfs allocator"), 318 fGroups(NULL), 319 fCheckBitmap(NULL) 320 { 321 } 322 323 324 BlockAllocator::~BlockAllocator() 325 { 326 delete[] fGroups; 327 } 328 329 330 status_t 331 BlockAllocator::Initialize() 332 { 333 if (fLock.InitCheck() < B_OK) 334 return B_ERROR; 335 336 fNumGroups = fVolume->AllocationGroups(); 337 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 338 fGroups = new AllocationGroup[fNumGroups]; 339 if (fGroups == NULL) 340 return B_NO_MEMORY; 341 342 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::initialize, 343 "bfs block allocator", B_LOW_PRIORITY, (void *)this); 344 if (id < B_OK) 345 return initialize(this); 346 347 return resume_thread(id); 348 } 349 350 351 status_t 352 BlockAllocator::initialize(BlockAllocator *allocator) 353 { 354 Locker lock(allocator->fLock); 355 356 Volume *volume = allocator->fVolume; 357 uint32 blocks = allocator->fBlocksPerGroup; 358 uint32 numBits = 8 * blocks * volume->BlockSize(); 359 off_t freeBlocks = 0; 360 361 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 362 if (buffer == NULL) 363 RETURN_ERROR(B_NO_MEMORY); 364 365 AllocationGroup *groups = allocator->fGroups; 366 off_t offset = 1; 367 int32 num = allocator->fNumGroups; 368 369 for (int32 i = 0;i < num;i++) { 370 if (cached_read(volume->Device(), offset, buffer, blocks, volume->BlockSize()) < B_OK) 371 break; 372 373 // the last allocation group may contain less blocks than the others 374 groups[i].fNumBits = i == num - 1 ? allocator->fVolume->NumBlocks() - i * numBits : numBits; 375 groups[i].fStart = offset; 376 377 // finds all free ranges in this allocation group 378 int32 start = -1, range = 0; 379 int32 size = groups[i].fNumBits, num = 0; 380 381 for (int32 k = 0;k < (size >> 2);k++) { 382 for (int32 j = 0; j < 32 && num < size; j++, num++) { 383 if (buffer[k] & (1UL << j)) { 384 if (range > 0) { 385 groups[i].AddFreeRange(start, range); 386 range = 0; 387 } 388 } else if (range++ == 0) 389 start = num; 390 } 391 } 392 if (range) 393 groups[i].AddFreeRange(start, range); 394 395 freeBlocks += groups[i].fFreeBits; 396 397 offset += blocks; 398 } 399 free(buffer); 400 401 // check if block bitmap and log area are reserved 402 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 403 if (allocator->CheckBlockRun(block_run::Run(0, 0, reservedBlocks)) < B_OK) { 404 Transaction transaction(volume, 0); 405 if (groups[0].Allocate(&transaction, 0, reservedBlocks) < B_OK) { 406 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 407 volume->Panic(); 408 } else { 409 FATAL(("space for block bitmap or log area was not reserved!\n")); 410 411 transaction.Done(); 412 } 413 } 414 415 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 416 if (volume->UsedBlocks() != usedBlocks) { 417 // If the disk in a dirty state at mount time, it's 418 // normal that the values don't match 419 INFORM(("volume reports %Ld used blocks, correct is %Ld\n", volume->UsedBlocks(), usedBlocks)); 420 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 421 } 422 423 return B_OK; 424 } 425 426 427 status_t 428 BlockAllocator::AllocateBlocks(Transaction *transaction, int32 group, uint16 start, 429 uint16 maximum, uint16 minimum, block_run &run) 430 { 431 if (maximum == 0) 432 return B_BAD_VALUE; 433 434 AllocationBlock cached(fVolume); 435 Locker lock(fLock); 436 437 // the first scan through all allocation groups will look for the 438 // wanted maximum of blocks, the second scan will just look to 439 // satisfy the minimal requirement 440 uint16 numBlocks = maximum; 441 442 for (int32 i = 0; i < fNumGroups * 2; i++, group++, start = 0) { 443 group = group % fNumGroups; 444 445 if (start >= fGroups[group].fNumBits || fGroups[group].IsFull()) 446 continue; 447 448 if (i >= fNumGroups) { 449 // if the minimum is the same as the maximum, it's not necessary to 450 // search for in the allocation groups a second time 451 if (maximum == minimum) 452 return B_DEVICE_FULL; 453 454 numBlocks = minimum; 455 } 456 457 // The wanted maximum is smaller than the largest free block in the group 458 // or already smaller than the minimum 459 // ToDo: disabled because it's currently not maintained after the first allocation 460 //if (numBlocks > fGroups[group].fLargest) 461 // continue; 462 463 if (start < fGroups[group].fFirstFree) 464 start = fGroups[group].fFirstFree; 465 466 // there may be more than one block per allocation group - and 467 // we iterate through it to find a place for the allocation. 468 // (one allocation can't exceed one allocation group) 469 470 uint32 block = start / (fVolume->BlockSize() << 3); 471 int32 range = 0, rangeStart = 0; 472 473 for (; block < fBlocksPerGroup; block++) { 474 if (cached.SetTo(fGroups[group], block) < B_OK) 475 RETURN_ERROR(B_ERROR); 476 477 // find a block large enough to hold the allocation 478 for (uint32 bit = start % cached.NumBlockBits(); bit < cached.NumBlockBits(); bit++) { 479 if (!cached.IsUsed(bit)) { 480 if (range == 0) { 481 // start new range 482 rangeStart = block * cached.NumBlockBits() + bit; 483 } 484 485 // have we found a range large enough to hold numBlocks? 486 if (++range >= maximum) 487 break; 488 } else if (i >= fNumGroups && range >= minimum) { 489 // we have found a block larger than the required minimum (second pass) 490 break; 491 } else { 492 // end of a range 493 range = 0; 494 } 495 } 496 497 // if we found a suitable block, mark the blocks as in use, and write 498 // the updated block bitmap back to disk 499 if (range >= numBlocks) { 500 // adjust allocation size 501 if (numBlocks < maximum) 502 numBlocks = range; 503 504 if (fGroups[group].Allocate(transaction, rangeStart, numBlocks) < B_OK) 505 RETURN_ERROR(B_IO_ERROR); 506 507 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group); 508 run.start = HOST_ENDIAN_TO_BFS_INT16(rangeStart); 509 run.length = HOST_ENDIAN_TO_BFS_INT16(numBlocks); 510 511 fVolume->SuperBlock().used_blocks = 512 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + numBlocks); 513 // We are not writing back the disk's super block - it's 514 // either done by the journaling code, or when the disk 515 // is unmounted. 516 // If the value is not correct at mount time, it will be 517 // fixed anyway. 518 519 return B_OK; 520 } 521 522 // start from the beginning of the next block 523 start = 0; 524 } 525 } 526 return B_DEVICE_FULL; 527 } 528 529 530 status_t 531 BlockAllocator::AllocateForInode(Transaction *transaction, const block_run *parent, 532 mode_t type, block_run &run) 533 { 534 // apply some allocation policies here (AllocateBlocks() will break them 535 // if necessary) - we will start with those described in Dominic Giampaolo's 536 // "Practical File System Design", and see how good they work 537 538 // files are going in the same allocation group as its parent, sub-directories 539 // will be inserted 8 allocation groups after the one of the parent 540 uint16 group = parent->AllocationGroup(); 541 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 542 group += 8; 543 544 return AllocateBlocks(transaction, group, 0, 1, 1, run); 545 } 546 547 548 status_t 549 BlockAllocator::Allocate(Transaction *transaction, const Inode *inode, off_t numBlocks, 550 block_run &run, uint16 minimum) 551 { 552 if (numBlocks <= 0) 553 return B_ERROR; 554 555 // one block_run can't hold more data than there is in one allocation group 556 if (numBlocks > fGroups[0].fNumBits) 557 numBlocks = fGroups[0].fNumBits; 558 559 // since block_run.length is uint16, the largest number of blocks that 560 // can be covered by a block_run is 65535 561 // ToDo: if we drop compatibility, couldn't we do this any better? 562 // There are basically two possibilities: 563 // a) since a length of zero doesn't have any sense, take that for 65536 - 564 // but that could cause many problems (bugs) in other areas 565 // b) reduce the maximum amount of blocks per block_run, so that the remaining 566 // number of free blocks can be used in a useful manner (like 4 blocks) - 567 // but that would also reduce the maximum file size 568 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 569 numBlocks = MAX_BLOCK_RUN_LENGTH; 570 571 // apply some allocation policies here (AllocateBlocks() will break them 572 // if necessary) 573 uint16 group = inode->BlockRun().AllocationGroup(); 574 uint16 start = 0; 575 576 // are there already allocated blocks? (then just try to allocate near the last one) 577 if (inode->Size() > 0) { 578 data_stream *data = &inode->Node()->data; 579 // ToDo: we currently don't care for when the data stream 580 // is already grown into the indirect ranges 581 if (data->max_double_indirect_range == 0 582 && data->max_indirect_range == 0) { 583 // Since size > 0, there must be a valid block run in this stream 584 int32 last = 0; 585 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 586 if (data->direct[last + 1].IsZero()) 587 break; 588 589 group = data->direct[last].AllocationGroup(); 590 start = data->direct[last].Start() + data->direct[last].Length(); 591 } 592 } else if (inode->IsContainer() || inode->IsSymLink()) { 593 // directory and symbolic link data will go in the same allocation 594 // group as the inode is in but after the inode data 595 start = inode->BlockRun().Start(); 596 } else { 597 // file data will start in the next allocation group 598 group = inode->BlockRun().AllocationGroup() + 1; 599 } 600 601 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 602 } 603 604 605 status_t 606 BlockAllocator::Free(Transaction *transaction, block_run run) 607 { 608 Locker lock(fLock); 609 610 int32 group = run.AllocationGroup(); 611 uint16 start = run.Start(); 612 uint16 length = run.Length(); 613 614 // doesn't use Volume::IsValidBlockRun() here because it can check better 615 // against the group size (the last group may have a different length) 616 if (group < 0 || group >= fNumGroups 617 || start > fGroups[group].fNumBits 618 || uint32(start + length) > fGroups[group].fNumBits 619 || length == 0) { 620 FATAL(("tried to free an invalid block_run (%ld, %u, %u)\n", group, start, length)); 621 DEBUGGER(("tried to free invalid block_run")); 622 return B_BAD_VALUE; 623 } 624 // check if someone tries to free reserved areas at the beginning of the drive 625 if (group == 0 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 626 FATAL(("tried to free a reserved block_run (%ld, %u, %u)\n", group, start, length)); 627 DEBUGGER(("tried to free reserved block")); 628 return B_BAD_VALUE; 629 } 630 #ifdef DEBUG 631 if (CheckBlockRun(run) < B_OK) 632 return B_BAD_DATA; 633 #endif 634 635 if (fGroups[group].Free(transaction, start, length) < B_OK) 636 RETURN_ERROR(B_IO_ERROR); 637 638 fVolume->SuperBlock().used_blocks = 639 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 640 return B_OK; 641 } 642 643 644 // #pragma mark - 645 // Functions to check the validity of the bitmap - they are used from 646 // the "chkbfs" command 647 648 649 bool 650 BlockAllocator::IsValidCheckControl(check_control *control) 651 { 652 if (control == NULL 653 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 654 FATAL(("invalid check_control (%p)!\n", control)); 655 return false; 656 } 657 658 return true; 659 } 660 661 662 status_t 663 BlockAllocator::StartChecking(check_control *control) 664 { 665 if (!IsValidCheckControl(control)) 666 return B_BAD_VALUE; 667 668 status_t status = fLock.Lock(); 669 if (status < B_OK) 670 return status; 671 672 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 673 fCheckBitmap = (uint32 *)malloc(size); 674 if (fCheckBitmap == NULL) { 675 fLock.Unlock(); 676 return B_NO_MEMORY; 677 } 678 679 check_cookie *cookie = new check_cookie(); 680 if (cookie == NULL) { 681 free(fCheckBitmap); 682 fCheckBitmap = NULL; 683 fLock.Unlock(); 684 685 return B_NO_MEMORY; 686 } 687 688 // initialize bitmap 689 memset(fCheckBitmap, 0, size); 690 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); block-- > 0;) 691 SetCheckBitmapAt(block); 692 693 cookie->stack.Push(fVolume->Root()); 694 cookie->stack.Push(fVolume->Indices()); 695 cookie->iterator = NULL; 696 control->cookie = cookie; 697 698 fCheckCookie = cookie; 699 // to be able to restore nicely if "chkbfs" exited abnormally 700 701 // ToDo: check reserved area in bitmap! 702 703 return B_OK; 704 } 705 706 707 status_t 708 BlockAllocator::StopChecking(check_control *control) 709 { 710 check_cookie *cookie; 711 if (control == NULL) 712 cookie = fCheckCookie; 713 else 714 cookie = (check_cookie *)control->cookie; 715 716 if (cookie == NULL) 717 return B_ERROR; 718 719 if (cookie->iterator != NULL) { 720 delete cookie->iterator; 721 cookie->iterator = NULL; 722 723 // the current directory inode is still locked in memory 724 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 725 } 726 727 // if CheckNextNode() could completely work through, we can 728 // fix any damages of the bitmap 729 if (control != NULL && control->status == B_ENTRY_NOT_FOUND) { 730 // calculate the number of used blocks in the check bitmap 731 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 732 off_t usedBlocks = 0LL; 733 734 // ToDo: update the allocation groups used blocks info 735 for (uint32 i = size >> 2; i-- > 0;) { 736 uint32 compare = 1; 737 for (int16 j = 0; j < 32; j++, compare <<= 1) { 738 if (compare & fCheckBitmap[i]) 739 usedBlocks++; 740 } 741 } 742 743 control->stats.freed = fVolume->UsedBlocks() - usedBlocks + control->stats.missing; 744 if (control->stats.freed < 0) 745 control->stats.freed = 0; 746 747 // Should we fix errors? Were there any errors we can fix? 748 if (control->flags & BFS_FIX_BITMAP_ERRORS 749 && (control->stats.freed != 0 || control->stats.missing != 0)) { 750 // if so, write the check bitmap back over the original one, 751 // and use transactions here to play safe - we even use several 752 // transactions, so that we don't blow the maximum log size 753 // on large disks; since we don't need to make this atomic 754 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 755 756 int32 blocksInBitmap = fNumGroups * fBlocksPerGroup; 757 int32 blockSize = fVolume->BlockSize(); 758 759 for (int32 i = 0; i < blocksInBitmap; i += 512) { 760 Transaction transaction(fVolume, 1 + i); 761 762 int32 blocksToWrite = 512; 763 if (blocksToWrite + i > blocksInBitmap) 764 blocksToWrite = blocksInBitmap - i; 765 766 status_t status = transaction.WriteBlocks(1 + i, 767 (uint8 *)fCheckBitmap + i * blockSize, blocksToWrite); 768 if (status < B_OK) { 769 FATAL(("error writing bitmap: %s\n", strerror(status))); 770 break; 771 } 772 transaction.Done(); 773 } 774 } 775 } else 776 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 777 778 free(fCheckBitmap); 779 fCheckBitmap = NULL; 780 fCheckCookie = NULL; 781 delete cookie; 782 fLock.Unlock(); 783 784 return B_OK; 785 } 786 787 788 status_t 789 BlockAllocator::CheckNextNode(check_control *control) 790 { 791 if (!IsValidCheckControl(control)) 792 return B_BAD_VALUE; 793 794 check_cookie *cookie = (check_cookie *)control->cookie; 795 796 while (true) { 797 if (cookie->iterator == NULL) { 798 if (!cookie->stack.Pop(&cookie->current)) { 799 // no more runs on the stack, we are obviously finished! 800 control->status = B_ENTRY_NOT_FOUND; 801 return B_ENTRY_NOT_FOUND; 802 } 803 804 // get iterator for the next directory 805 806 Vnode vnode(fVolume, cookie->current); 807 Inode *inode; 808 if (vnode.Get(&inode) < B_OK) { 809 FATAL(("check: Could not open inode at %Ld\n", fVolume->ToBlock(cookie->current))); 810 continue; 811 } 812 813 if (!inode->IsContainer()) { 814 FATAL(("check: inode at %Ld should have been a directory\n", fVolume->ToBlock(cookie->current))); 815 continue; 816 } 817 818 BPlusTree *tree; 819 if (inode->GetTree(&tree) != B_OK) { 820 FATAL(("check: could not open b+tree from inode at %Ld\n", fVolume->ToBlock(cookie->current))); 821 continue; 822 } 823 824 cookie->parent = inode; 825 cookie->parent_mode = inode->Mode(); 826 827 cookie->iterator = new TreeIterator(tree); 828 if (cookie->iterator == NULL) 829 RETURN_ERROR(B_NO_MEMORY); 830 831 // the inode must stay locked in memory until the iterator is freed 832 vnode.Keep(); 833 834 // check the inode of the directory 835 control->errors = 0; 836 control->status = CheckInode(inode, control); 837 838 if (inode->GetName(control->name) < B_OK) 839 strcpy(control->name, "(node has no name)"); 840 841 control->inode = inode->ID(); 842 control->mode = inode->Mode(); 843 844 return B_OK; 845 } 846 847 char name[B_FILE_NAME_LENGTH]; 848 uint16 length; 849 vnode_id id; 850 851 status_t status = cookie->iterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); 852 if (status == B_ENTRY_NOT_FOUND) { 853 // there are no more entries in this iterator, free it and go on 854 delete cookie->iterator; 855 cookie->iterator = NULL; 856 857 // unlock the directory's inode from memory 858 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 859 860 continue; 861 } else if (status == B_OK) { 862 // ignore "." and ".." entries 863 if (!strcmp(name, ".") || !strcmp(name, "..")) 864 continue; 865 866 // fill in the control data as soon as we have them 867 strlcpy(control->name, name, B_FILE_NAME_LENGTH); 868 control->inode = id; 869 control->errors = 0; 870 871 Vnode vnode(fVolume, id); 872 Inode *inode; 873 if (vnode.Get(&inode) < B_OK) { 874 FATAL(("Could not open inode ID %Ld!\n", id)); 875 control->errors |= BFS_COULD_NOT_OPEN; 876 control->status = B_ERROR; 877 return B_OK; 878 } 879 880 // check if the inode's name is the same as in the b+tree 881 if (inode->IsRegularNode()) { 882 SimpleLocker locker(inode->SmallDataLock()); 883 884 const char *localName = inode->Name(); 885 if (localName == NULL || strcmp(localName, name)) { 886 control->errors |= BFS_NAMES_DONT_MATCH; 887 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, localName)); 888 } 889 } 890 891 control->mode = inode->Mode(); 892 893 // Check for the correct mode of the node (if the mode of the 894 // file don't fit to its parent, there is a serious problem) 895 if ((cookie->parent_mode & S_ATTR_DIR && !inode->IsAttribute()) 896 || (cookie->parent_mode & S_INDEX_DIR && !inode->IsIndex()) 897 || ((cookie->parent_mode & S_DIRECTORY | S_ATTR_DIR | S_INDEX_DIR) == S_DIRECTORY 898 && inode->Mode() & (S_ATTR | S_ATTR_DIR | S_INDEX_DIR))) { 899 FATAL(("inode at %Ld is of wrong type: %o (parent %o at %Ld)!\n", 900 inode->BlockNumber(), inode->Mode(), cookie->parent_mode, cookie->parent->BlockNumber())); 901 902 // if we are allowed to fix errors, we should remove the file 903 if (control->flags & BFS_REMOVE_WRONG_TYPES 904 && control->flags & BFS_FIX_BITMAP_ERRORS) { 905 // it's safe to start a transaction, because Inode::Remove() 906 // won't touch the block bitmap (which we hold the lock for) 907 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 908 // the bitmap anyway 909 Transaction transaction(fVolume, cookie->parent->BlockNumber()); 910 911 inode->Node()->flags |= INODE_DONT_FREE_SPACE; 912 status = cookie->parent->Remove(&transaction, name, NULL, inode->IsContainer()); 913 if (status == B_OK) 914 transaction.Done(); 915 } else 916 status = B_ERROR; 917 918 control->errors |= BFS_WRONG_TYPE; 919 control->status = status; 920 return B_OK; 921 } 922 923 // If the inode has an attribute directory, push it on the stack 924 if (!inode->Attributes().IsZero()) 925 cookie->stack.Push(inode->Attributes()); 926 927 // push the directory on the stack so that it will be scanned later 928 if (inode->IsContainer() && !inode->IsIndex()) 929 cookie->stack.Push(inode->BlockRun()); 930 else { 931 // check it now 932 control->status = CheckInode(inode, control); 933 934 return B_OK; 935 } 936 } 937 } 938 // is never reached 939 } 940 941 942 bool 943 BlockAllocator::CheckBitmapIsUsedAt(off_t block) const 944 { 945 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 946 uint32 index = block / 32; // 32bit resolution 947 if (index > size / 4) 948 return false; 949 950 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) & (1UL << (block & 0x1f)); 951 } 952 953 954 void 955 BlockAllocator::SetCheckBitmapAt(off_t block) 956 { 957 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 958 uint32 index = block / 32; // 32bit resolution 959 if (index > size / 4) 960 return; 961 962 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 963 } 964 965 966 status_t 967 BlockAllocator::CheckBlockRun(block_run run, const char *type, check_control *control) 968 { 969 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 970 || run.Start() > fGroups[run.allocation_group].fNumBits 971 || uint32(run.Start() + run.Length()) > fGroups[run.AllocationGroup()].fNumBits 972 || run.length == 0) { 973 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, run.AllocationGroup(), run.Start(), run.Length())); 974 if (control == NULL) 975 return B_BAD_DATA; 976 977 control->errors |= BFS_INVALID_BLOCK_RUN; 978 return B_OK; 979 } 980 981 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 982 uint32 block = run.Start() / bitsPerBlock; 983 uint32 pos = run.Start() % bitsPerBlock; 984 int32 length = 0; 985 off_t firstMissing = -1, firstSet = -1; 986 off_t firstGroupBlock = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 987 988 AllocationBlock cached(fVolume); 989 990 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 991 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 992 RETURN_ERROR(B_IO_ERROR); 993 994 if (pos >= cached.NumBlockBits()) { 995 // something very strange has happened... 996 RETURN_ERROR(B_ERROR); 997 } 998 999 while (length < run.Length() && pos < cached.NumBlockBits()) { 1000 if (!cached.IsUsed(pos)) { 1001 if (control == NULL) { 1002 PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length())); 1003 return B_BAD_DATA; 1004 } 1005 if (firstMissing == -1) { 1006 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1007 control->errors |= BFS_MISSING_BLOCKS; 1008 } 1009 control->stats.missing++; 1010 } else if (firstMissing != -1) { 1011 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", 1012 type, run.allocation_group, run.start, run.length, firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1013 firstMissing = -1; 1014 } 1015 1016 if (fCheckBitmap != NULL) { 1017 // Set the block in the check bitmap as well, but have a look if it 1018 // is already allocated first 1019 uint32 offset = pos + block * bitsPerBlock; 1020 if (CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1021 if (firstSet == -1) { 1022 firstSet = firstGroupBlock + offset; 1023 control->errors |= BFS_BLOCKS_ALREADY_SET; 1024 } 1025 control->stats.already_set++; 1026 } else { 1027 if (firstSet != -1) { 1028 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", 1029 type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + offset - 1)); 1030 firstSet = -1; 1031 } 1032 SetCheckBitmapAt(firstGroupBlock + offset); 1033 } 1034 } 1035 length++; 1036 pos++; 1037 } 1038 1039 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1040 if (firstMissing != -1) 1041 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1042 if (firstSet != -1) 1043 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1044 } 1045 } 1046 1047 return B_OK; 1048 } 1049 1050 1051 status_t 1052 BlockAllocator::CheckInode(Inode *inode, check_control *control) 1053 { 1054 if (control != NULL && fCheckBitmap == NULL) 1055 return B_NO_INIT; 1056 if (inode == NULL) 1057 return B_BAD_VALUE; 1058 1059 status_t status = CheckBlockRun(inode->BlockRun(), "inode", control); 1060 if (status < B_OK) 1061 return status; 1062 1063 data_stream *data = &inode->Node()->data; 1064 1065 // check the direct range 1066 1067 if (data->max_direct_range) { 1068 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1069 if (data->direct[i].IsZero()) 1070 break; 1071 1072 status = CheckBlockRun(data->direct[i], "direct", control); 1073 if (status < B_OK) 1074 return status; 1075 } 1076 } 1077 1078 CachedBlock cached(fVolume); 1079 1080 // check the indirect range 1081 1082 if (data->max_indirect_range) { 1083 status = CheckBlockRun(data->indirect, "indirect", control); 1084 if (status < B_OK) 1085 return status; 1086 1087 off_t block = fVolume->ToBlock(data->indirect); 1088 1089 for (int32 i = 0; i < data->indirect.Length(); i++) { 1090 block_run *runs = (block_run *)cached.SetTo(block + i); 1091 if (runs == NULL) 1092 RETURN_ERROR(B_IO_ERROR); 1093 1094 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1095 int32 index = 0; 1096 for (; index < runsPerBlock; index++) { 1097 if (runs[index].IsZero()) 1098 break; 1099 1100 status = CheckBlockRun(runs[index], "indirect->run", control); 1101 if (status < B_OK) 1102 return status; 1103 } 1104 if (index < runsPerBlock) 1105 break; 1106 } 1107 } 1108 1109 // check the double indirect range 1110 1111 if (data->max_double_indirect_range) { 1112 status = CheckBlockRun(data->double_indirect, "double indirect", control); 1113 if (status < B_OK) 1114 return status; 1115 1116 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1117 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1118 1119 CachedBlock cachedDirect(fVolume); 1120 int32 maxIndirectIndex = (data->double_indirect.Length() << fVolume->BlockShift()) 1121 / sizeof(block_run); 1122 1123 for (int32 indirectIndex = 0; indirectIndex < maxIndirectIndex; indirectIndex++) { 1124 // get the indirect array block 1125 block_run *array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) 1126 + indirectIndex / runsPerBlock); 1127 if (array == NULL) 1128 return B_IO_ERROR; 1129 1130 block_run indirect = array[indirectIndex % runsPerBlock]; 1131 // are we finished yet? 1132 if (indirect.IsZero()) 1133 return B_OK; 1134 1135 status = CheckBlockRun(indirect, "double indirect->runs", control); 1136 if (status < B_OK) 1137 return status; 1138 1139 int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) / sizeof(block_run); 1140 1141 for (int32 index = 0; index < maxIndex; ) { 1142 block_run *runs = (block_run *)cachedDirect.SetTo(fVolume->ToBlock(indirect) 1143 + index / runsPerBlock); 1144 if (runs == NULL) 1145 return B_IO_ERROR; 1146 1147 do { 1148 // are we finished yet? 1149 if (runs[index % runsPerBlock].IsZero()) 1150 return B_OK; 1151 1152 status = CheckBlockRun(runs[index % runsPerBlock], "double indirect->runs->run", control); 1153 if (status < B_OK) 1154 return status; 1155 } while ((++index % runsPerArray) != 0); 1156 } 1157 } 1158 } 1159 1160 return B_OK; 1161 } 1162 1163