1 /* BlockAllocator - block bitmap handling and allocation policies 2 * 3 * Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de. 4 * This file may be used under the terms of the MIT License. 5 */ 6 7 8 #include "Debug.h" 9 #include "BlockAllocator.h" 10 #include "Volume.h" 11 #include "Inode.h" 12 #include "BPlusTree.h" 13 #include "Stack.h" 14 #include "bfs_control.h" 15 16 #include <util/kernel_cpp.h> 17 #include <stdlib.h> 18 19 #ifdef USER 20 # define spawn_kernel_thread spawn_thread 21 #endif 22 23 24 // Things the BlockAllocator should do: 25 26 // - find a range of blocks of a certain size nearby a specific position 27 // - allocating an unsharp range of blocks for pre-allocation 28 // - free blocks 29 // - know how to deal with each allocation, special handling for directories, 30 // files, symlinks, etc. (type sensitive allocation policies) 31 32 // What makes the code complicated is the fact that we are not just reading 33 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 34 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 35 // of partitions will grow even more - so that's not an option. 36 // Instead we are reading in every block when it's used - since an allocation 37 // group can span several blocks in the block bitmap, the AllocationBlock 38 // class is there to make handling those easier. 39 40 // The current implementation is only slightly optimized and could probably 41 // be improved a lot. Furthermore, the allocation policies used here should 42 // have some real world tests. 43 44 45 struct check_cookie { 46 check_cookie() {} 47 48 block_run current; 49 Inode *parent; 50 mode_t parent_mode; 51 Stack<block_run> stack; 52 TreeIterator *iterator; 53 }; 54 55 56 class AllocationBlock : public CachedBlock { 57 public: 58 AllocationBlock(Volume *volume); 59 60 void Allocate(uint16 start, uint16 numBlocks); 61 void Free(uint16 start, uint16 numBlocks); 62 inline bool IsUsed(uint16 block); 63 64 status_t SetTo(AllocationGroup &group, uint16 block); 65 status_t SetToWritable(Transaction &transaction, AllocationGroup &group, uint16 block); 66 67 uint32 NumBlockBits() const { return fNumBits; } 68 uint32 &Block(int32 index) { return ((uint32 *)fBlock)[index]; } 69 70 private: 71 uint32 fNumBits; 72 #ifdef DEBUG 73 bool fWritable; 74 #endif 75 }; 76 77 78 class AllocationGroup { 79 public: 80 AllocationGroup(); 81 82 void AddFreeRange(int32 start, int32 blocks); 83 bool IsFull() const { return fFreeBits == 0; } 84 85 status_t Allocate(Transaction &transaction, uint16 start, int32 length); 86 status_t Free(Transaction &transaction, uint16 start, int32 length); 87 88 uint32 fNumBits; 89 int32 fStart; 90 int32 fFirstFree, fLargest, fLargestFirst; 91 // ToDo: fLargest & fLargestFirst are not maintained (and therefore used) yet! 92 int32 fFreeBits; 93 }; 94 95 96 AllocationBlock::AllocationBlock(Volume *volume) 97 : CachedBlock(volume) 98 { 99 } 100 101 102 status_t 103 AllocationBlock::SetTo(AllocationGroup &group, uint16 block) 104 { 105 // 8 blocks per byte 106 fNumBits = fVolume->BlockSize() << 3; 107 // the last group may have less bits in the last block 108 if ((block + 1) * fNumBits > group.fNumBits) 109 fNumBits = group.fNumBits % fNumBits; 110 111 #ifdef DEBUG 112 fWritable = false; 113 #endif 114 return CachedBlock::SetTo(group.fStart + block) != NULL ? B_OK : B_ERROR; 115 } 116 117 118 status_t 119 AllocationBlock::SetToWritable(Transaction &transaction, AllocationGroup &group, uint16 block) 120 { 121 // 8 blocks per byte 122 fNumBits = fVolume->BlockSize() << 3; 123 // the last group may have less bits in the last block 124 if ((block + 1) * fNumBits > group.fNumBits) 125 fNumBits = group.fNumBits % fNumBits; 126 127 #ifdef DEBUG 128 fWritable = true; 129 #endif 130 return CachedBlock::SetToWritable(transaction, group.fStart + block) != NULL ? B_OK : B_ERROR; 131 } 132 133 134 bool 135 AllocationBlock::IsUsed(uint16 block) 136 { 137 if (block > fNumBits) 138 return true; 139 // the block bitmap is accessed in 32-bit blocks 140 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 141 } 142 143 144 void 145 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 146 { 147 ASSERT(start < fNumBits); 148 ASSERT(uint32(start + numBlocks) <= fNumBits); 149 #ifdef DEBUG 150 ASSERT(fWritable); 151 #endif 152 153 if (uint32(start + numBlocks) > fNumBits) { 154 FATAL(("Allocation::Allocate(): tried to allocate too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 155 DIE(("Allocation::Allocate(): tried to allocate too many blocks")); 156 } 157 158 int32 block = start >> 5; 159 160 while (numBlocks > 0) { 161 uint32 mask = 0; 162 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 163 mask |= 1UL << (i % 32); 164 165 #ifdef DEBUG 166 // check for already set blocks 167 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32 *)fBlock)[block]) { 168 FATAL(("AllocationBlock::Allocate(): some blocks are already allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 169 DEBUGGER(("blocks already set!")); 170 } 171 #endif 172 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 173 start = 0; 174 } 175 } 176 177 178 void 179 AllocationBlock::Free(uint16 start, uint16 numBlocks) 180 { 181 ASSERT(start < fNumBits); 182 ASSERT(uint32(start + numBlocks) <= fNumBits); 183 #ifdef DEBUG 184 ASSERT(fWritable); 185 #endif 186 187 if (uint32(start + numBlocks) > fNumBits) { 188 FATAL(("Allocation::Free(): tried to free too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 189 DIE(("Allocation::Free(): tried to free too many blocks")); 190 } 191 192 int32 block = start >> 5; 193 194 while (numBlocks > 0) { 195 uint32 mask = 0; 196 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 197 mask |= 1UL << (i % 32); 198 199 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 200 start = 0; 201 } 202 } 203 204 205 // #pragma mark - 206 207 208 AllocationGroup::AllocationGroup() 209 : 210 fFirstFree(-1), 211 fLargest(-1), 212 fLargestFirst(-1), 213 fFreeBits(0) 214 { 215 } 216 217 218 void 219 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 220 { 221 //D(if (blocks > 512) 222 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 223 224 if (fFirstFree == -1) 225 fFirstFree = start; 226 227 if (fLargest < blocks) { 228 fLargest = blocks; 229 fLargestFirst = start; 230 } 231 232 fFreeBits += blocks; 233 } 234 235 236 /** Allocates the specified run in the allocation group. 237 * Doesn't check if the run is valid or already allocated 238 * partially, nor does it maintain the free ranges hints 239 * or the volume's used blocks count. 240 * It only does the low-level work of allocating some bits 241 * in the block bitmap. 242 * Assumes that the block bitmap lock is hold. 243 */ 244 245 status_t 246 AllocationGroup::Allocate(Transaction &transaction, uint16 start, int32 length) 247 { 248 if (start > fNumBits) 249 return B_ERROR; 250 251 // Update the allocation group info 252 // ToDo: this info will be incorrect if something goes wrong later 253 // Note, the fFirstFree block doesn't have to be really free 254 if (start == fFirstFree) 255 fFirstFree = start + length; 256 fFreeBits -= length; 257 258 Volume *volume = transaction.GetVolume(); 259 260 // calculate block in the block bitmap and position within 261 uint32 bitsPerBlock = volume->BlockSize() << 3; 262 uint32 block = start / bitsPerBlock; 263 start = start % bitsPerBlock; 264 265 AllocationBlock cached(volume); 266 267 while (length > 0) { 268 if (cached.SetToWritable(transaction, *this, block) < B_OK) 269 RETURN_ERROR(B_IO_ERROR); 270 271 uint32 numBlocks = length; 272 if (start + numBlocks > cached.NumBlockBits()) 273 numBlocks = cached.NumBlockBits() - start; 274 275 cached.Allocate(start, numBlocks); 276 277 length -= numBlocks; 278 start = 0; 279 block++; 280 } 281 282 return B_OK; 283 } 284 285 286 /** Frees the specified run in the allocation group. 287 * Doesn't check if the run is valid or was not completely 288 * allocated, nor does it maintain the free ranges hints 289 * or the volume's used blocks count. 290 * It only does the low-level work of freeing some bits 291 * in the block bitmap. 292 * Assumes that the block bitmap lock is hold. 293 */ 294 295 status_t 296 AllocationGroup::Free(Transaction &transaction, uint16 start, int32 length) 297 { 298 if (start > fNumBits) 299 return B_ERROR; 300 301 // Update the allocation group info 302 // ToDo: this info will be incorrect if something goes wrong later 303 if (fFirstFree > start) 304 fFirstFree = start; 305 fFreeBits += length; 306 307 Volume *volume = transaction.GetVolume(); 308 309 // calculate block in the block bitmap and position within 310 uint32 bitsPerBlock = volume->BlockSize() << 3; 311 uint32 block = start / bitsPerBlock; 312 start = start % bitsPerBlock; 313 314 AllocationBlock cached(volume); 315 316 while (length > 0) { 317 if (cached.SetToWritable(transaction, *this, block) < B_OK) 318 RETURN_ERROR(B_IO_ERROR); 319 320 uint16 freeLength = length; 321 if (uint32(start + length) > cached.NumBlockBits()) 322 freeLength = cached.NumBlockBits() - start; 323 324 cached.Free(start, freeLength); 325 326 length -= freeLength; 327 start = 0; 328 block++; 329 } 330 return B_OK; 331 } 332 333 334 // #pragma mark - 335 336 337 BlockAllocator::BlockAllocator(Volume *volume) 338 : 339 fVolume(volume), 340 fLock("bfs allocator"), 341 fGroups(NULL), 342 fCheckBitmap(NULL) 343 { 344 } 345 346 347 BlockAllocator::~BlockAllocator() 348 { 349 delete[] fGroups; 350 } 351 352 353 status_t 354 BlockAllocator::Initialize(bool full) 355 { 356 if (fLock.InitCheck() < B_OK) 357 return B_ERROR; 358 359 fNumGroups = fVolume->AllocationGroups(); 360 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 361 fGroups = new AllocationGroup[fNumGroups]; 362 if (fGroups == NULL) 363 return B_NO_MEMORY; 364 365 if (!full) 366 return B_OK; 367 368 fLock.Lock(); 369 // the lock will be released by the initialize() function 370 371 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::initialize, 372 "bfs block allocator", B_LOW_PRIORITY, (void *)this); 373 if (id < B_OK) 374 return initialize(this); 375 376 return resume_thread(id); 377 } 378 379 380 status_t 381 BlockAllocator::InitializeAndClearBitmap(Transaction &transaction) 382 { 383 status_t status = Initialize(false); 384 if (status < B_OK) 385 return status; 386 387 uint32 blocks = fBlocksPerGroup; 388 uint32 numBits = 8 * blocks * fVolume->BlockSize(); 389 uint32 blockShift = fVolume->BlockShift(); 390 391 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 392 if (buffer == NULL) 393 RETURN_ERROR(B_NO_MEMORY); 394 395 memset(buffer, 0, numBits >> 3); 396 397 off_t offset = 1; 398 399 // initialize the AllocationGroup objects and clear the on-disk bitmap 400 401 for (int32 i = 0; i < fNumGroups; i++) { 402 if (write_pos(fVolume->Device(), offset << blockShift, buffer, blocks << blockShift) < B_OK) 403 return B_ERROR; 404 405 // the last allocation group may contain less blocks than the others 406 fGroups[i].fNumBits = i == fNumGroups - 1 ? fVolume->NumBlocks() - i * numBits : numBits; 407 fGroups[i].fStart = offset; 408 fGroups[i].fFirstFree = fGroups[i].fLargestFirst = 0; 409 fGroups[i].fFreeBits = fGroups[i].fLargest = fGroups[i].fNumBits; 410 411 offset += blocks; 412 } 413 free(buffer); 414 415 // reserve the boot block, the log area, and the block bitmap itself 416 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 417 418 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 419 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 420 return B_ERROR; 421 } 422 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 423 424 return B_OK; 425 } 426 427 428 status_t 429 BlockAllocator::initialize(BlockAllocator *allocator) 430 { 431 // The lock must already be held at this point 432 433 Volume *volume = allocator->fVolume; 434 uint32 blocks = allocator->fBlocksPerGroup; 435 uint32 numBits = 8 * blocks * volume->BlockSize(); 436 uint32 blockShift = volume->BlockShift(); 437 off_t freeBlocks = 0; 438 439 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 440 if (buffer == NULL) { 441 allocator->fLock.Unlock(); 442 RETURN_ERROR(B_NO_MEMORY); 443 } 444 445 AllocationGroup *groups = allocator->fGroups; 446 off_t offset = 1; 447 int32 num = allocator->fNumGroups; 448 449 for (int32 i = 0; i < num; i++) { 450 if (read_pos(volume->Device(), offset << blockShift, buffer, blocks << blockShift) < B_OK) 451 break; 452 453 // the last allocation group may contain less blocks than the others 454 groups[i].fNumBits = i == num - 1 ? volume->NumBlocks() - i * numBits : numBits; 455 groups[i].fStart = offset; 456 457 // finds all free ranges in this allocation group 458 int32 start = -1, range = 0; 459 int32 size = groups[i].fNumBits, num = 0; 460 461 for (int32 k = 0;k < (size >> 2);k++) { 462 for (int32 j = 0; j < 32 && num < size; j++, num++) { 463 if (buffer[k] & (1UL << j)) { 464 if (range > 0) { 465 groups[i].AddFreeRange(start, range); 466 range = 0; 467 } 468 } else if (range++ == 0) 469 start = num; 470 } 471 } 472 if (range) 473 groups[i].AddFreeRange(start, range); 474 475 freeBlocks += groups[i].fFreeBits; 476 477 offset += blocks; 478 } 479 free(buffer); 480 481 // check if block bitmap and log area are reserved 482 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 483 if (allocator->CheckBlockRun(block_run::Run(0, 0, reservedBlocks)) < B_OK) { 484 Transaction transaction(volume, 0); 485 if (groups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 486 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 487 volume->Panic(); 488 } else { 489 FATAL(("space for block bitmap or log area was not reserved!\n")); 490 491 transaction.Done(); 492 } 493 } 494 495 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 496 if (volume->UsedBlocks() != usedBlocks) { 497 // If the disk in a dirty state at mount time, it's 498 // normal that the values don't match 499 INFORM(("volume reports %Ld used blocks, correct is %Ld\n", volume->UsedBlocks(), usedBlocks)); 500 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 501 } 502 503 allocator->fLock.Unlock(); 504 return B_OK; 505 } 506 507 508 status_t 509 BlockAllocator::AllocateBlocks(Transaction &transaction, int32 group, uint16 start, 510 uint16 maximum, uint16 minimum, block_run &run) 511 { 512 if (maximum == 0) 513 return B_BAD_VALUE; 514 515 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", group, start, maximum, minimum)); 516 517 AllocationBlock cached(fVolume); 518 Locker lock(fLock); 519 520 // the first scan through all allocation groups will look for the 521 // wanted maximum of blocks, the second scan will just look to 522 // satisfy the minimal requirement 523 uint16 numBlocks = maximum; 524 525 for (int32 i = 0; i < fNumGroups * 2; i++, group++, start = 0) { 526 group = group % fNumGroups; 527 528 if (start >= fGroups[group].fNumBits || fGroups[group].IsFull()) 529 continue; 530 531 if (i >= fNumGroups) { 532 // if the minimum is the same as the maximum, it's not necessary to 533 // search for in the allocation groups a second time 534 if (maximum == minimum) 535 return B_DEVICE_FULL; 536 537 numBlocks = minimum; 538 } 539 540 // The wanted maximum is smaller than the largest free block in the group 541 // or already smaller than the minimum 542 // ToDo: disabled because it's currently not maintained after the first allocation 543 //if (numBlocks > fGroups[group].fLargest) 544 // continue; 545 546 if (start < fGroups[group].fFirstFree) 547 start = fGroups[group].fFirstFree; 548 549 // there may be more than one block per allocation group - and 550 // we iterate through it to find a place for the allocation. 551 // (one allocation can't exceed one allocation group) 552 553 uint32 block = start / (fVolume->BlockSize() << 3); 554 int32 range = 0, rangeStart = 0; 555 556 for (; block < fBlocksPerGroup; block++) { 557 if (cached.SetTo(fGroups[group], block) < B_OK) 558 RETURN_ERROR(B_ERROR); 559 560 // find a block large enough to hold the allocation 561 for (uint32 bit = start % cached.NumBlockBits(); bit < cached.NumBlockBits(); bit++) { 562 if (!cached.IsUsed(bit)) { 563 if (range == 0) { 564 // start new range 565 rangeStart = block * cached.NumBlockBits() + bit; 566 } 567 568 // have we found a range large enough to hold numBlocks? 569 if (++range >= maximum) 570 break; 571 } else if (i >= fNumGroups && range >= minimum) { 572 // we have found a block larger than the required minimum (second pass) 573 break; 574 } else { 575 // end of a range 576 range = 0; 577 } 578 } 579 580 // if we found a suitable block, mark the blocks as in use, and write 581 // the updated block bitmap back to disk 582 if (range >= numBlocks) { 583 // adjust allocation size 584 if (numBlocks < maximum) 585 numBlocks = range; 586 587 if (fGroups[group].Allocate(transaction, rangeStart, numBlocks) < B_OK) 588 RETURN_ERROR(B_IO_ERROR); 589 590 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group); 591 run.start = HOST_ENDIAN_TO_BFS_INT16(rangeStart); 592 run.length = HOST_ENDIAN_TO_BFS_INT16(numBlocks); 593 594 fVolume->SuperBlock().used_blocks = 595 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + numBlocks); 596 // We are not writing back the disk's super block - it's 597 // either done by the journaling code, or when the disk 598 // is unmounted. 599 // If the value is not correct at mount time, it will be 600 // fixed anyway. 601 602 return B_OK; 603 } 604 605 // start from the beginning of the next block 606 start = 0; 607 } 608 } 609 return B_DEVICE_FULL; 610 } 611 612 613 status_t 614 BlockAllocator::AllocateForInode(Transaction &transaction, const block_run *parent, 615 mode_t type, block_run &run) 616 { 617 // apply some allocation policies here (AllocateBlocks() will break them 618 // if necessary) - we will start with those described in Dominic Giampaolo's 619 // "Practical File System Design", and see how good they work 620 621 // files are going in the same allocation group as its parent, sub-directories 622 // will be inserted 8 allocation groups after the one of the parent 623 uint16 group = parent->AllocationGroup(); 624 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 625 group += 8; 626 627 return AllocateBlocks(transaction, group, 0, 1, 1, run); 628 } 629 630 631 status_t 632 BlockAllocator::Allocate(Transaction &transaction, Inode *inode, off_t numBlocks, 633 block_run &run, uint16 minimum) 634 { 635 if (numBlocks <= 0) 636 return B_ERROR; 637 638 // one block_run can't hold more data than there is in one allocation group 639 if (numBlocks > fGroups[0].fNumBits) 640 numBlocks = fGroups[0].fNumBits; 641 642 // since block_run.length is uint16, the largest number of blocks that 643 // can be covered by a block_run is 65535 644 // ToDo: if we drop compatibility, couldn't we do this any better? 645 // There are basically two possibilities: 646 // a) since a length of zero doesn't have any sense, take that for 65536 - 647 // but that could cause many problems (bugs) in other areas 648 // b) reduce the maximum amount of blocks per block_run, so that the remaining 649 // number of free blocks can be used in a useful manner (like 4 blocks) - 650 // but that would also reduce the maximum file size 651 // c) have BlockRun::Length() return (length + 1). 652 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 653 numBlocks = MAX_BLOCK_RUN_LENGTH; 654 655 // apply some allocation policies here (AllocateBlocks() will break them 656 // if necessary) 657 uint16 group = inode->BlockRun().AllocationGroup(); 658 uint16 start = 0; 659 660 // are there already allocated blocks? (then just try to allocate near the last one) 661 if (inode->Size() > 0) { 662 const data_stream &data = inode->Node().data; 663 // ToDo: we currently don't care for when the data stream 664 // is already grown into the indirect ranges 665 if (data.max_double_indirect_range == 0 666 && data.max_indirect_range == 0) { 667 // Since size > 0, there must be a valid block run in this stream 668 int32 last = 0; 669 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 670 if (data.direct[last + 1].IsZero()) 671 break; 672 673 group = data.direct[last].AllocationGroup(); 674 start = data.direct[last].Start() + data.direct[last].Length(); 675 } 676 } else if (inode->IsContainer() || inode->IsSymLink()) { 677 // directory and symbolic link data will go in the same allocation 678 // group as the inode is in but after the inode data 679 start = inode->BlockRun().Start(); 680 } else { 681 // file data will start in the next allocation group 682 group = inode->BlockRun().AllocationGroup() + 1; 683 } 684 685 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 686 } 687 688 689 status_t 690 BlockAllocator::Free(Transaction &transaction, block_run run) 691 { 692 Locker lock(fLock); 693 694 int32 group = run.AllocationGroup(); 695 uint16 start = run.Start(); 696 uint16 length = run.Length(); 697 698 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, length)); 699 700 // doesn't use Volume::IsValidBlockRun() here because it can check better 701 // against the group size (the last group may have a different length) 702 if (group < 0 || group >= fNumGroups 703 || start > fGroups[group].fNumBits 704 || uint32(start + length) > fGroups[group].fNumBits 705 || length == 0) { 706 FATAL(("tried to free an invalid block_run (%ld, %u, %u)\n", group, start, length)); 707 DEBUGGER(("tried to free invalid block_run")); 708 return B_BAD_VALUE; 709 } 710 // check if someone tries to free reserved areas at the beginning of the drive 711 if (group == 0 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 712 FATAL(("tried to free a reserved block_run (%ld, %u, %u)\n", group, start, length)); 713 DEBUGGER(("tried to free reserved block")); 714 return B_BAD_VALUE; 715 } 716 #ifdef DEBUG 717 if (CheckBlockRun(run) < B_OK) 718 return B_BAD_DATA; 719 #endif 720 721 if (fGroups[group].Free(transaction, start, length) < B_OK) 722 RETURN_ERROR(B_IO_ERROR); 723 724 #ifdef DEBUG 725 if (CheckBlockRun(run, NULL, NULL, false) < B_OK) 726 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just freed)\n")); 727 #endif 728 729 fVolume->SuperBlock().used_blocks = 730 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 731 return B_OK; 732 } 733 734 735 size_t 736 BlockAllocator::BitmapSize() const 737 { 738 return fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 739 } 740 741 742 // #pragma mark - 743 // Functions to check the validity of the bitmap - they are used from 744 // the "chkbfs" command 745 746 747 bool 748 BlockAllocator::IsValidCheckControl(check_control *control) 749 { 750 if (control == NULL 751 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 752 FATAL(("invalid check_control (%p)!\n", control)); 753 return false; 754 } 755 756 return true; 757 } 758 759 760 status_t 761 BlockAllocator::StartChecking(check_control *control) 762 { 763 if (!IsValidCheckControl(control)) 764 return B_BAD_VALUE; 765 766 status_t status = fLock.Lock(); 767 if (status < B_OK) 768 return status; 769 770 size_t size = BitmapSize(); 771 fCheckBitmap = (uint32 *)malloc(size); 772 if (fCheckBitmap == NULL) { 773 fLock.Unlock(); 774 return B_NO_MEMORY; 775 } 776 777 check_cookie *cookie = new check_cookie(); 778 if (cookie == NULL) { 779 free(fCheckBitmap); 780 fCheckBitmap = NULL; 781 fLock.Unlock(); 782 783 return B_NO_MEMORY; 784 } 785 786 // initialize bitmap 787 memset(fCheckBitmap, 0, size); 788 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); block-- > 0;) 789 SetCheckBitmapAt(block); 790 791 cookie->stack.Push(fVolume->Root()); 792 cookie->stack.Push(fVolume->Indices()); 793 cookie->iterator = NULL; 794 control->cookie = cookie; 795 796 fCheckCookie = cookie; 797 // to be able to restore nicely if "chkbfs" exited abnormally 798 799 // ToDo: check reserved area in bitmap! 800 801 return B_OK; 802 } 803 804 805 status_t 806 BlockAllocator::StopChecking(check_control *control) 807 { 808 check_cookie *cookie; 809 if (control == NULL) 810 cookie = fCheckCookie; 811 else 812 cookie = (check_cookie *)control->cookie; 813 814 if (cookie == NULL) 815 return B_ERROR; 816 817 if (cookie->iterator != NULL) { 818 delete cookie->iterator; 819 cookie->iterator = NULL; 820 821 // the current directory inode is still locked in memory 822 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 823 } 824 825 // if CheckNextNode() could completely work through, we can 826 // fix any damages of the bitmap 827 if (control != NULL && control->status == B_ENTRY_NOT_FOUND) { 828 // calculate the number of used blocks in the check bitmap 829 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 830 off_t usedBlocks = 0LL; 831 832 // ToDo: update the allocation groups used blocks info 833 for (uint32 i = size >> 2; i-- > 0;) { 834 uint32 compare = 1; 835 for (int16 j = 0; j < 32; j++, compare <<= 1) { 836 if (compare & fCheckBitmap[i]) 837 usedBlocks++; 838 } 839 } 840 841 control->stats.freed = fVolume->UsedBlocks() - usedBlocks + control->stats.missing; 842 if (control->stats.freed < 0) 843 control->stats.freed = 0; 844 845 // Should we fix errors? Were there any errors we can fix? 846 if (control->flags & BFS_FIX_BITMAP_ERRORS 847 && (control->stats.freed != 0 || control->stats.missing != 0)) { 848 // if so, write the check bitmap back over the original one, 849 // and use transactions here to play safe - we even use several 850 // transactions, so that we don't blow the maximum log size 851 // on large disks; since we don't need to make this atomic 852 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 853 854 int32 blocksInBitmap = fNumGroups * fBlocksPerGroup; 855 int32 blockSize = fVolume->BlockSize(); 856 857 for (int32 i = 0; i < blocksInBitmap; i += 512) { 858 Transaction transaction(fVolume, 1 + i); 859 860 int32 blocksToWrite = 512; 861 if (blocksToWrite + i > blocksInBitmap) 862 blocksToWrite = blocksInBitmap - i; 863 864 status_t status = transaction.WriteBlocks(1 + i, 865 (uint8 *)fCheckBitmap + i * blockSize, blocksToWrite); 866 if (status < B_OK) { 867 FATAL(("error writing bitmap: %s\n", strerror(status))); 868 break; 869 } 870 transaction.Done(); 871 } 872 } 873 } else 874 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 875 876 free(fCheckBitmap); 877 fCheckBitmap = NULL; 878 fCheckCookie = NULL; 879 delete cookie; 880 fLock.Unlock(); 881 882 return B_OK; 883 } 884 885 886 status_t 887 BlockAllocator::CheckNextNode(check_control *control) 888 { 889 if (!IsValidCheckControl(control)) 890 return B_BAD_VALUE; 891 892 check_cookie *cookie = (check_cookie *)control->cookie; 893 894 while (true) { 895 if (cookie->iterator == NULL) { 896 if (!cookie->stack.Pop(&cookie->current)) { 897 // no more runs on the stack, we are obviously finished! 898 control->status = B_ENTRY_NOT_FOUND; 899 return B_ENTRY_NOT_FOUND; 900 } 901 902 // get iterator for the next directory 903 904 #ifdef UNSAFE_GET_VNODE 905 RecursiveLocker locker(fVolume->Lock()); 906 #endif 907 Vnode vnode(fVolume, cookie->current); 908 Inode *inode; 909 if (vnode.Get(&inode) < B_OK) { 910 FATAL(("check: Could not open inode at %Ld\n", fVolume->ToBlock(cookie->current))); 911 continue; 912 } 913 914 if (!inode->IsContainer()) { 915 FATAL(("check: inode at %Ld should have been a directory\n", fVolume->ToBlock(cookie->current))); 916 continue; 917 } 918 919 BPlusTree *tree; 920 if (inode->GetTree(&tree) != B_OK) { 921 FATAL(("check: could not open b+tree from inode at %Ld\n", fVolume->ToBlock(cookie->current))); 922 continue; 923 } 924 925 cookie->parent = inode; 926 cookie->parent_mode = inode->Mode(); 927 928 cookie->iterator = new TreeIterator(tree); 929 if (cookie->iterator == NULL) 930 RETURN_ERROR(B_NO_MEMORY); 931 932 // the inode must stay locked in memory until the iterator is freed 933 vnode.Keep(); 934 935 // check the inode of the directory 936 control->errors = 0; 937 control->status = CheckInode(inode, control); 938 939 if (inode->GetName(control->name) < B_OK) 940 strcpy(control->name, "(node has no name)"); 941 942 control->inode = inode->ID(); 943 control->mode = inode->Mode(); 944 945 return B_OK; 946 } 947 948 char name[B_FILE_NAME_LENGTH]; 949 uint16 length; 950 vnode_id id; 951 952 status_t status = cookie->iterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); 953 if (status == B_ENTRY_NOT_FOUND) { 954 // there are no more entries in this iterator, free it and go on 955 delete cookie->iterator; 956 cookie->iterator = NULL; 957 958 // unlock the directory's inode from memory 959 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 960 961 continue; 962 } else if (status == B_OK) { 963 // ignore "." and ".." entries 964 if (!strcmp(name, ".") || !strcmp(name, "..")) 965 continue; 966 967 // fill in the control data as soon as we have them 968 strlcpy(control->name, name, B_FILE_NAME_LENGTH); 969 control->inode = id; 970 control->errors = 0; 971 972 Vnode vnode(fVolume, id); 973 Inode *inode; 974 if (vnode.Get(&inode) < B_OK) { 975 FATAL(("Could not open inode ID %Ld!\n", id)); 976 control->errors |= BFS_COULD_NOT_OPEN; 977 control->status = B_ERROR; 978 return B_OK; 979 } 980 981 // check if the inode's name is the same as in the b+tree 982 if (inode->IsRegularNode()) { 983 SimpleLocker locker(inode->SmallDataLock()); 984 NodeGetter node(fVolume, inode); 985 986 const char *localName = inode->Name(node.Node()); 987 if (localName == NULL || strcmp(localName, name)) { 988 control->errors |= BFS_NAMES_DONT_MATCH; 989 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, localName)); 990 } 991 } 992 993 control->mode = inode->Mode(); 994 995 // Check for the correct mode of the node (if the mode of the 996 // file don't fit to its parent, there is a serious problem) 997 if ((cookie->parent_mode & S_ATTR_DIR && !inode->IsAttribute()) 998 || (cookie->parent_mode & S_INDEX_DIR && !inode->IsIndex()) 999 || ((cookie->parent_mode & S_DIRECTORY | S_ATTR_DIR | S_INDEX_DIR) == S_DIRECTORY 1000 && inode->Mode() & (S_ATTR | S_ATTR_DIR | S_INDEX_DIR))) { 1001 FATAL(("inode at %Ld is of wrong type: %o (parent %o at %Ld)!\n", 1002 inode->BlockNumber(), inode->Mode(), cookie->parent_mode, cookie->parent->BlockNumber())); 1003 1004 // if we are allowed to fix errors, we should remove the file 1005 if (control->flags & BFS_REMOVE_WRONG_TYPES 1006 && control->flags & BFS_FIX_BITMAP_ERRORS) { 1007 #ifdef UNSAFE_GET_VNODE 1008 RecursiveLocker locker(fVolume->Lock()); 1009 #endif 1010 // it's safe to start a transaction, because Inode::Remove() 1011 // won't touch the block bitmap (which we hold the lock for) 1012 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 1013 // the bitmap anyway 1014 Transaction transaction(fVolume, cookie->parent->BlockNumber()); 1015 1016 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE); 1017 status = cookie->parent->Remove(transaction, name, NULL, inode->IsContainer()); 1018 if (status == B_OK) 1019 transaction.Done(); 1020 } else 1021 status = B_ERROR; 1022 1023 control->errors |= BFS_WRONG_TYPE; 1024 control->status = status; 1025 return B_OK; 1026 } 1027 1028 // If the inode has an attribute directory, push it on the stack 1029 if (!inode->Attributes().IsZero()) 1030 cookie->stack.Push(inode->Attributes()); 1031 1032 // push the directory on the stack so that it will be scanned later 1033 if (inode->IsContainer() && !inode->IsIndex()) 1034 cookie->stack.Push(inode->BlockRun()); 1035 else { 1036 // check it now 1037 control->status = CheckInode(inode, control); 1038 1039 return B_OK; 1040 } 1041 } 1042 } 1043 // is never reached 1044 } 1045 1046 1047 bool 1048 BlockAllocator::CheckBitmapIsUsedAt(off_t block) const 1049 { 1050 size_t size = BitmapSize(); 1051 uint32 index = block / 32; // 32bit resolution 1052 if (index > size / 4) 1053 return false; 1054 1055 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) & (1UL << (block & 0x1f)); 1056 } 1057 1058 1059 void 1060 BlockAllocator::SetCheckBitmapAt(off_t block) 1061 { 1062 size_t size = BitmapSize(); 1063 uint32 index = block / 32; // 32bit resolution 1064 if (index > size / 4) 1065 return; 1066 1067 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1068 } 1069 1070 1071 status_t 1072 BlockAllocator::CheckBlockRun(block_run run, const char *type, check_control *control, bool allocated) 1073 { 1074 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1075 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1076 || uint32(run.Start() + run.Length()) > fGroups[run.AllocationGroup()].fNumBits 1077 || run.length == 0) { 1078 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, run.AllocationGroup(), run.Start(), run.Length())); 1079 if (control == NULL) 1080 return B_BAD_DATA; 1081 1082 control->errors |= BFS_INVALID_BLOCK_RUN; 1083 return B_OK; 1084 } 1085 1086 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1087 uint32 block = run.Start() / bitsPerBlock; 1088 uint32 pos = run.Start() % bitsPerBlock; 1089 int32 length = 0; 1090 off_t firstMissing = -1, firstSet = -1; 1091 off_t firstGroupBlock = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1092 1093 AllocationBlock cached(fVolume); 1094 1095 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1096 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1097 RETURN_ERROR(B_IO_ERROR); 1098 1099 if (pos >= cached.NumBlockBits()) { 1100 // something very strange has happened... 1101 RETURN_ERROR(B_ERROR); 1102 } 1103 1104 while (length < run.Length() && pos < cached.NumBlockBits()) { 1105 if (cached.IsUsed(pos) != allocated) { 1106 if (control == NULL) { 1107 PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated (pos = %ld, length = %ld)!\n", 1108 type, run.AllocationGroup(), run.Start(), run.Length(), pos, length)); 1109 return B_BAD_DATA; 1110 } 1111 if (firstMissing == -1) { 1112 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1113 control->errors |= BFS_MISSING_BLOCKS; 1114 } 1115 control->stats.missing++; 1116 } else if (firstMissing != -1) { 1117 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are %sallocated!\n", 1118 type, run.allocation_group, run.start, run.length, firstMissing, 1119 firstGroupBlock + pos + block * bitsPerBlock - 1, allocated ? "not " : "")); 1120 firstMissing = -1; 1121 } 1122 1123 if (fCheckBitmap != NULL) { 1124 // Set the block in the check bitmap as well, but have a look if it 1125 // is already allocated first 1126 uint32 offset = pos + block * bitsPerBlock; 1127 if (CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1128 if (firstSet == -1) { 1129 firstSet = firstGroupBlock + offset; 1130 control->errors |= BFS_BLOCKS_ALREADY_SET; 1131 } 1132 control->stats.already_set++; 1133 } else { 1134 if (firstSet != -1) { 1135 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", 1136 type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + offset - 1)); 1137 firstSet = -1; 1138 } 1139 SetCheckBitmapAt(firstGroupBlock + offset); 1140 } 1141 } 1142 length++; 1143 pos++; 1144 } 1145 1146 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1147 if (firstMissing != -1) 1148 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1149 if (firstSet != -1) 1150 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1151 } 1152 } 1153 1154 return B_OK; 1155 } 1156 1157 1158 status_t 1159 BlockAllocator::CheckInode(Inode *inode, check_control *control) 1160 { 1161 if (control != NULL && fCheckBitmap == NULL) 1162 return B_NO_INIT; 1163 if (inode == NULL) 1164 return B_BAD_VALUE; 1165 1166 status_t status = CheckBlockRun(inode->BlockRun(), "inode", control); 1167 if (status < B_OK) 1168 return status; 1169 1170 data_stream *data = &inode->Node().data; 1171 1172 // check the direct range 1173 1174 if (data->max_direct_range) { 1175 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1176 if (data->direct[i].IsZero()) 1177 break; 1178 1179 status = CheckBlockRun(data->direct[i], "direct", control); 1180 if (status < B_OK) 1181 return status; 1182 } 1183 } 1184 1185 CachedBlock cached(fVolume); 1186 1187 // check the indirect range 1188 1189 if (data->max_indirect_range) { 1190 status = CheckBlockRun(data->indirect, "indirect", control); 1191 if (status < B_OK) 1192 return status; 1193 1194 off_t block = fVolume->ToBlock(data->indirect); 1195 1196 for (int32 i = 0; i < data->indirect.Length(); i++) { 1197 block_run *runs = (block_run *)cached.SetTo(block + i); 1198 if (runs == NULL) 1199 RETURN_ERROR(B_IO_ERROR); 1200 1201 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1202 int32 index = 0; 1203 for (; index < runsPerBlock; index++) { 1204 if (runs[index].IsZero()) 1205 break; 1206 1207 status = CheckBlockRun(runs[index], "indirect->run", control); 1208 if (status < B_OK) 1209 return status; 1210 } 1211 if (index < runsPerBlock) 1212 break; 1213 } 1214 } 1215 1216 // check the double indirect range 1217 1218 if (data->max_double_indirect_range) { 1219 status = CheckBlockRun(data->double_indirect, "double indirect", control); 1220 if (status < B_OK) 1221 return status; 1222 1223 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1224 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1225 1226 CachedBlock cachedDirect(fVolume); 1227 int32 maxIndirectIndex = (data->double_indirect.Length() << fVolume->BlockShift()) 1228 / sizeof(block_run); 1229 1230 for (int32 indirectIndex = 0; indirectIndex < maxIndirectIndex; indirectIndex++) { 1231 // get the indirect array block 1232 block_run *array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) 1233 + indirectIndex / runsPerBlock); 1234 if (array == NULL) 1235 return B_IO_ERROR; 1236 1237 block_run indirect = array[indirectIndex % runsPerBlock]; 1238 // are we finished yet? 1239 if (indirect.IsZero()) 1240 return B_OK; 1241 1242 status = CheckBlockRun(indirect, "double indirect->runs", control); 1243 if (status < B_OK) 1244 return status; 1245 1246 int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) / sizeof(block_run); 1247 1248 for (int32 index = 0; index < maxIndex; ) { 1249 block_run *runs = (block_run *)cachedDirect.SetTo(fVolume->ToBlock(indirect) 1250 + index / runsPerBlock); 1251 if (runs == NULL) 1252 return B_IO_ERROR; 1253 1254 do { 1255 // are we finished yet? 1256 if (runs[index % runsPerBlock].IsZero()) 1257 return B_OK; 1258 1259 status = CheckBlockRun(runs[index % runsPerBlock], "double indirect->runs->run", control); 1260 if (status < B_OK) 1261 return status; 1262 } while ((++index % runsPerArray) != 0); 1263 } 1264 } 1265 } 1266 1267 return B_OK; 1268 } 1269 1270