1 /* BlockAllocator - block bitmap handling and allocation policies 2 * 3 * Copyright 2001-2007, Axel Dörfler, axeld@pinc-software.de. 4 * This file may be used under the terms of the MIT License. 5 */ 6 7 8 #include "Debug.h" 9 #include "BlockAllocator.h" 10 #include "Volume.h" 11 #include "Inode.h" 12 #include "BPlusTree.h" 13 #include "bfs_control.h" 14 15 #include "system_dependencies.h" 16 17 #ifdef USER 18 # define spawn_kernel_thread spawn_thread 19 #endif 20 21 22 // Things the BlockAllocator should do: 23 24 // - find a range of blocks of a certain size nearby a specific position 25 // - allocating an unsharp range of blocks for pre-allocation 26 // - free blocks 27 // - know how to deal with each allocation, special handling for directories, 28 // files, symlinks, etc. (type sensitive allocation policies) 29 30 // What makes the code complicated is the fact that we are not just reading 31 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 32 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 33 // of partitions will grow even more - so that's not an option. 34 // Instead we are reading in every block when it's used - since an allocation 35 // group can span several blocks in the block bitmap, the AllocationBlock 36 // class is there to make handling those easier. 37 38 // The current implementation is only slightly optimized and could probably 39 // be improved a lot. Furthermore, the allocation policies used here should 40 // have some real world tests. 41 42 43 struct check_cookie { 44 check_cookie() {} 45 46 block_run current; 47 Inode *parent; 48 mode_t parent_mode; 49 Stack<block_run> stack; 50 TreeIterator *iterator; 51 }; 52 53 54 class AllocationBlock : public CachedBlock { 55 public: 56 AllocationBlock(Volume *volume); 57 58 void Allocate(uint16 start, uint16 numBlocks); 59 void Free(uint16 start, uint16 numBlocks); 60 inline bool IsUsed(uint16 block); 61 62 status_t SetTo(AllocationGroup &group, uint16 block); 63 status_t SetToWritable(Transaction &transaction, AllocationGroup &group, uint16 block); 64 65 uint32 NumBlockBits() const { return fNumBits; } 66 uint32 &Block(int32 index) { return ((uint32 *)fBlock)[index]; } 67 68 private: 69 uint32 fNumBits; 70 #ifdef DEBUG 71 bool fWritable; 72 #endif 73 }; 74 75 76 class AllocationGroup { 77 public: 78 AllocationGroup(); 79 80 void AddFreeRange(int32 start, int32 blocks); 81 bool IsFull() const { return fFreeBits == 0; } 82 83 status_t Allocate(Transaction &transaction, uint16 start, int32 length); 84 status_t Free(Transaction &transaction, uint16 start, int32 length); 85 86 uint32 NumBits() const { return fNumBits; } 87 uint32 NumBlocks() const { return fNumBlocks; } 88 int32 Start() const { return fStart; } 89 90 private: 91 friend class BlockAllocator; 92 93 uint32 fNumBits; 94 uint32 fNumBlocks; 95 int32 fStart; 96 int32 fFirstFree, fLargest, fLargestFirst; 97 // ToDo: fLargest & fLargestFirst are not maintained (and therefore used) yet! 98 int32 fFreeBits; 99 }; 100 101 102 AllocationBlock::AllocationBlock(Volume *volume) 103 : CachedBlock(volume) 104 { 105 } 106 107 108 status_t 109 AllocationBlock::SetTo(AllocationGroup &group, uint16 block) 110 { 111 // 8 blocks per byte 112 fNumBits = fVolume->BlockSize() << 3; 113 // the last group may have less bits than the others 114 if ((block + 1) * fNumBits > group.NumBits()) 115 fNumBits = group.NumBits() % fNumBits; 116 117 #ifdef DEBUG 118 fWritable = false; 119 #endif 120 return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR; 121 } 122 123 124 status_t 125 AllocationBlock::SetToWritable(Transaction &transaction, AllocationGroup &group, uint16 block) 126 { 127 // 8 blocks per byte 128 fNumBits = fVolume->BlockSize() << 3; 129 // the last group may have less bits in the last block 130 if ((block + 1) * fNumBits > group.NumBits()) 131 fNumBits = group.NumBits() % fNumBits; 132 133 #ifdef DEBUG 134 fWritable = true; 135 #endif 136 return CachedBlock::SetToWritable(transaction, group.Start() + block) != NULL 137 ? B_OK : B_ERROR; 138 } 139 140 141 bool 142 AllocationBlock::IsUsed(uint16 block) 143 { 144 if (block > fNumBits) 145 return true; 146 147 // the block bitmap is accessed in 32-bit blocks 148 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 149 } 150 151 152 void 153 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 154 { 155 ASSERT(start < fNumBits); 156 ASSERT(uint32(start + numBlocks) <= fNumBits); 157 #ifdef DEBUG 158 ASSERT(fWritable); 159 #endif 160 161 if (uint32(start + numBlocks) > fNumBits) { 162 FATAL(("Allocation::Allocate(): tried to allocate too many blocks: %u (numBlocks = %u)!\n", numBlocks, (unsigned)fNumBits)); 163 DIE(("Allocation::Allocate(): tried to allocate too many blocks")); 164 } 165 166 int32 block = start >> 5; 167 168 while (numBlocks > 0) { 169 uint32 mask = 0; 170 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 171 mask |= 1UL << (i % 32); 172 173 #ifdef DEBUG 174 // check for already set blocks 175 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32 *)fBlock)[block]) { 176 FATAL(("AllocationBlock::Allocate(): some blocks are already allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 177 DEBUGGER(("blocks already set!")); 178 } 179 #endif 180 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 181 start = 0; 182 } 183 } 184 185 186 void 187 AllocationBlock::Free(uint16 start, uint16 numBlocks) 188 { 189 ASSERT(start < fNumBits); 190 ASSERT(uint32(start + numBlocks) <= fNumBits); 191 #ifdef DEBUG 192 ASSERT(fWritable); 193 #endif 194 195 if (uint32(start + numBlocks) > fNumBits) { 196 FATAL(("Allocation::Free(): tried to free too many blocks: %u (numBlocks = %u)!\n", numBlocks, (unsigned)fNumBits)); 197 DIE(("Allocation::Free(): tried to free too many blocks")); 198 } 199 200 int32 block = start >> 5; 201 202 while (numBlocks > 0) { 203 uint32 mask = 0; 204 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 205 mask |= 1UL << (i % 32); 206 207 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 208 start = 0; 209 } 210 } 211 212 213 // #pragma mark - 214 215 216 /** The allocation groups are created and initialized in 217 * BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap() 218 * respectively. 219 */ 220 221 AllocationGroup::AllocationGroup() 222 : 223 fFirstFree(-1), 224 fLargest(-1), 225 fLargestFirst(-1), 226 fFreeBits(0) 227 { 228 } 229 230 231 void 232 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 233 { 234 //D(if (blocks > 512) 235 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 236 237 if (fFirstFree == -1) 238 fFirstFree = start; 239 240 if (fLargest < blocks) { 241 fLargest = blocks; 242 fLargestFirst = start; 243 } 244 245 fFreeBits += blocks; 246 } 247 248 249 /** Allocates the specified run in the allocation group. 250 * Doesn't check if the run is valid or already allocated 251 * partially, nor does it maintain the free ranges hints 252 * or the volume's used blocks count. 253 * It only does the low-level work of allocating some bits 254 * in the block bitmap. 255 * Assumes that the block bitmap lock is hold. 256 */ 257 258 status_t 259 AllocationGroup::Allocate(Transaction &transaction, uint16 start, int32 length) 260 { 261 if (start > fNumBits) 262 return B_ERROR; 263 264 // Update the allocation group info 265 // ToDo: this info will be incorrect if something goes wrong later 266 // Note, the fFirstFree block doesn't have to be really free 267 if (start == fFirstFree) 268 fFirstFree = start + length; 269 fFreeBits -= length; 270 271 Volume *volume = transaction.GetVolume(); 272 273 // calculate block in the block bitmap and position within 274 uint32 bitsPerBlock = volume->BlockSize() << 3; 275 uint32 block = start / bitsPerBlock; 276 start = start % bitsPerBlock; 277 278 AllocationBlock cached(volume); 279 280 while (length > 0) { 281 if (cached.SetToWritable(transaction, *this, block) < B_OK) 282 RETURN_ERROR(B_IO_ERROR); 283 284 uint32 numBlocks = length; 285 if (start + numBlocks > cached.NumBlockBits()) 286 numBlocks = cached.NumBlockBits() - start; 287 288 cached.Allocate(start, numBlocks); 289 290 length -= numBlocks; 291 start = 0; 292 block++; 293 } 294 295 return B_OK; 296 } 297 298 299 /** Frees the specified run in the allocation group. 300 * Doesn't check if the run is valid or was not completely 301 * allocated, nor does it maintain the free ranges hints 302 * or the volume's used blocks count. 303 * It only does the low-level work of freeing some bits 304 * in the block bitmap. 305 * Assumes that the block bitmap lock is hold. 306 */ 307 308 status_t 309 AllocationGroup::Free(Transaction &transaction, uint16 start, int32 length) 310 { 311 if (start > fNumBits) 312 return B_ERROR; 313 314 // Update the allocation group info 315 // ToDo: this info will be incorrect if something goes wrong later 316 if (fFirstFree > start) 317 fFirstFree = start; 318 fFreeBits += length; 319 320 Volume *volume = transaction.GetVolume(); 321 322 // calculate block in the block bitmap and position within 323 uint32 bitsPerBlock = volume->BlockSize() << 3; 324 uint32 block = start / bitsPerBlock; 325 start = start % bitsPerBlock; 326 327 AllocationBlock cached(volume); 328 329 while (length > 0) { 330 if (cached.SetToWritable(transaction, *this, block) < B_OK) 331 RETURN_ERROR(B_IO_ERROR); 332 333 uint16 freeLength = length; 334 if (uint32(start + length) > cached.NumBlockBits()) 335 freeLength = cached.NumBlockBits() - start; 336 337 cached.Free(start, freeLength); 338 339 length -= freeLength; 340 start = 0; 341 block++; 342 } 343 return B_OK; 344 } 345 346 347 // #pragma mark - 348 349 350 BlockAllocator::BlockAllocator(Volume *volume) 351 : 352 fVolume(volume), 353 fLock("bfs allocator"), 354 fGroups(NULL), 355 fCheckBitmap(NULL) 356 { 357 } 358 359 360 BlockAllocator::~BlockAllocator() 361 { 362 delete[] fGroups; 363 } 364 365 366 status_t 367 BlockAllocator::Initialize(bool full) 368 { 369 if (fLock.InitCheck() < B_OK) 370 return B_ERROR; 371 372 fNumGroups = fVolume->AllocationGroups(); 373 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 374 fGroups = new AllocationGroup[fNumGroups]; 375 if (fGroups == NULL) 376 return B_NO_MEMORY; 377 378 if (!full) 379 return B_OK; 380 381 fLock.Lock(); 382 // the lock will be released by the initialize() function 383 384 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize, 385 "bfs block allocator", B_LOW_PRIORITY, (void *)this); 386 if (id < B_OK) 387 return _Initialize(this); 388 389 return resume_thread(id); 390 } 391 392 393 status_t 394 BlockAllocator::InitializeAndClearBitmap(Transaction &transaction) 395 { 396 status_t status = Initialize(false); 397 if (status < B_OK) 398 return status; 399 400 uint32 blocks = fBlocksPerGroup; 401 uint32 numBits = 8 * blocks * fVolume->BlockSize(); 402 uint32 blockShift = fVolume->BlockShift(); 403 404 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 405 if (buffer == NULL) 406 RETURN_ERROR(B_NO_MEMORY); 407 408 memset(buffer, 0, numBits >> 3); 409 410 off_t offset = 1; 411 412 // initialize the AllocationGroup objects and clear the on-disk bitmap 413 414 for (int32 i = 0; i < fNumGroups; i++) { 415 if (write_pos(fVolume->Device(), offset << blockShift, buffer, blocks << blockShift) < B_OK) 416 return B_ERROR; 417 418 // the last allocation group may contain less blocks than the others 419 if (i == fNumGroups - 1) { 420 fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits; 421 fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) >> (blockShift + 3)); 422 } else { 423 fGroups[i].fNumBits = numBits; 424 fGroups[i].fNumBlocks = blocks; 425 } 426 fGroups[i].fStart = offset; 427 fGroups[i].fFirstFree = fGroups[i].fLargestFirst = 0; 428 fGroups[i].fFreeBits = fGroups[i].fLargest = fGroups[i].fNumBits; 429 430 offset += blocks; 431 } 432 free(buffer); 433 434 // reserve the boot block, the log area, and the block bitmap itself 435 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 436 437 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 438 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 439 return B_ERROR; 440 } 441 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 442 443 return B_OK; 444 } 445 446 447 status_t 448 BlockAllocator::_Initialize(BlockAllocator *allocator) 449 { 450 // The lock must already be held at this point 451 452 Volume *volume = allocator->fVolume; 453 uint32 blocks = allocator->fBlocksPerGroup; 454 uint32 numBits = 8 * blocks * volume->BlockSize(); 455 uint32 blockShift = volume->BlockShift(); 456 off_t freeBlocks = 0; 457 458 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 459 if (buffer == NULL) { 460 allocator->fLock.Unlock(); 461 RETURN_ERROR(B_NO_MEMORY); 462 } 463 464 AllocationGroup *groups = allocator->fGroups; 465 off_t offset = 1; 466 int32 num = allocator->fNumGroups; 467 468 for (int32 i = 0; i < num; i++) { 469 if (read_pos(volume->Device(), offset << blockShift, buffer, 470 blocks << blockShift) < B_OK) 471 break; 472 473 // the last allocation group may contain less blocks than the others 474 if (i == num - 1) { 475 groups[i].fNumBits = volume->NumBlocks() - i * numBits; 476 groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) >> (blockShift + 3)); 477 } else { 478 groups[i].fNumBits = numBits; 479 groups[i].fNumBlocks = blocks; 480 } 481 groups[i].fStart = offset; 482 483 // finds all free ranges in this allocation group 484 int32 start = -1, range = 0; 485 int32 size = groups[i].fNumBits, num = 0; 486 487 for (int32 k = 0;k < (size >> 2);k++) { 488 for (int32 j = 0; j < 32 && num < size; j++, num++) { 489 if (buffer[k] & (1UL << j)) { 490 if (range > 0) { 491 groups[i].AddFreeRange(start, range); 492 range = 0; 493 } 494 } else if (range++ == 0) 495 start = num; 496 } 497 } 498 if (range) 499 groups[i].AddFreeRange(start, range); 500 501 freeBlocks += groups[i].fFreeBits; 502 503 offset += blocks; 504 } 505 free(buffer); 506 507 // check if block bitmap and log area are reserved 508 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 509 if (allocator->CheckBlockRun(block_run::Run(0, 0, reservedBlocks)) < B_OK) { 510 Transaction transaction(volume, 0); 511 if (groups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 512 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 513 volume->Panic(); 514 } else { 515 FATAL(("space for block bitmap or log area was not reserved!\n")); 516 517 transaction.Done(); 518 } 519 } 520 521 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 522 if (volume->UsedBlocks() != usedBlocks) { 523 // If the disk in a dirty state at mount time, it's 524 // normal that the values don't match 525 INFORM(("volume reports %Ld used blocks, correct is %Ld\n", volume->UsedBlocks(), usedBlocks)); 526 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 527 } 528 529 allocator->fLock.Unlock(); 530 return B_OK; 531 } 532 533 534 status_t 535 BlockAllocator::AllocateBlocks(Transaction &transaction, int32 group, uint16 start, 536 uint16 maximum, uint16 minimum, block_run &run) 537 { 538 if (maximum == 0) 539 return B_BAD_VALUE; 540 541 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", group, start, maximum, minimum)); 542 543 AllocationBlock cached(fVolume); 544 Locker lock(fLock); 545 546 // the first scan through all allocation groups will look for the 547 // wanted maximum of blocks, the second scan will just look to 548 // satisfy the minimal requirement 549 uint16 numBlocks = maximum; 550 551 for (int32 i = 0; i < fNumGroups * 2; i++, group++, start = 0) { 552 group = group % fNumGroups; 553 554 if (start >= fGroups[group].NumBits() || fGroups[group].IsFull()) 555 continue; 556 557 if (i >= fNumGroups) { 558 // if the minimum is the same as the maximum, it's not necessary to 559 // search for in the allocation groups a second time 560 if (maximum == minimum) 561 return B_DEVICE_FULL; 562 563 numBlocks = minimum; 564 } 565 566 // The wanted maximum is smaller than the largest free block in the group 567 // or already smaller than the minimum 568 // ToDo: disabled because it's currently not maintained after the first allocation 569 //if (numBlocks > fGroups[group].fLargest) 570 // continue; 571 572 if (start < fGroups[group].fFirstFree) 573 start = fGroups[group].fFirstFree; 574 575 // there may be more than one block per allocation group - and 576 // we iterate through it to find a place for the allocation. 577 // (one allocation can't exceed one allocation group) 578 579 uint32 block = start / (fVolume->BlockSize() << 3); 580 int32 range = 0, rangeStart = 0; 581 582 for (; block < fGroups[group].NumBlocks(); block++) { 583 if (cached.SetTo(fGroups[group], block) < B_OK) 584 RETURN_ERROR(B_ERROR); 585 586 // find a block large enough to hold the allocation 587 for (uint32 bit = start % cached.NumBlockBits(); 588 bit < cached.NumBlockBits(); bit++) { 589 if (!cached.IsUsed(bit)) { 590 if (range == 0) { 591 // start new range 592 rangeStart = block * cached.NumBlockBits() + bit; 593 } 594 595 // have we found a range large enough to hold numBlocks? 596 if (++range >= maximum) 597 break; 598 } else if (i >= fNumGroups && range >= minimum) { 599 // we have found a block larger than the required minimum (second pass) 600 break; 601 } else { 602 // end of a range 603 range = 0; 604 } 605 } 606 607 // ToDo: we could also remember a "largest free block that fits the minimal 608 // requirement" in the group, and use that - this would avoid the need 609 // for a second run 610 611 // if we found a suitable block, mark the blocks as in use, and write 612 // the updated block bitmap back to disk 613 if (range >= numBlocks) { 614 // adjust allocation size 615 if (numBlocks < maximum) 616 numBlocks = range; 617 618 if (fGroups[group].Allocate(transaction, rangeStart, numBlocks) < B_OK) 619 RETURN_ERROR(B_IO_ERROR); 620 621 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group); 622 run.start = HOST_ENDIAN_TO_BFS_INT16(rangeStart); 623 run.length = HOST_ENDIAN_TO_BFS_INT16(numBlocks); 624 625 fVolume->SuperBlock().used_blocks = 626 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + numBlocks); 627 // We are not writing back the disk's super block - it's 628 // either done by the journaling code, or when the disk 629 // is unmounted. 630 // If the value is not correct at mount time, it will be 631 // fixed anyway. 632 633 return B_OK; 634 } 635 636 // start from the beginning of the next block 637 start = 0; 638 } 639 } 640 return B_DEVICE_FULL; 641 } 642 643 644 status_t 645 BlockAllocator::AllocateForInode(Transaction &transaction, const block_run *parent, 646 mode_t type, block_run &run) 647 { 648 // apply some allocation policies here (AllocateBlocks() will break them 649 // if necessary) - we will start with those described in Dominic Giampaolo's 650 // "Practical File System Design", and see how good they work 651 652 // files are going in the same allocation group as its parent, sub-directories 653 // will be inserted 8 allocation groups after the one of the parent 654 uint16 group = parent->AllocationGroup(); 655 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 656 group += 8; 657 658 return AllocateBlocks(transaction, group, 0, 1, 1, run); 659 } 660 661 662 status_t 663 BlockAllocator::Allocate(Transaction &transaction, Inode *inode, off_t numBlocks, 664 block_run &run, uint16 minimum) 665 { 666 if (numBlocks <= 0) 667 return B_ERROR; 668 669 // one block_run can't hold more data than there is in one allocation group 670 if (numBlocks > fGroups[0].NumBits()) 671 numBlocks = fGroups[0].NumBits(); 672 673 // since block_run.length is uint16, the largest number of blocks that 674 // can be covered by a block_run is 65535 675 // ToDo: if we drop compatibility, couldn't we do this any better? 676 // There are basically two possibilities: 677 // a) since a length of zero doesn't have any sense, take that for 65536 - 678 // but that could cause many problems (bugs) in other areas 679 // b) reduce the maximum amount of blocks per block_run, so that the remaining 680 // number of free blocks can be used in a useful manner (like 4 blocks) - 681 // but that would also reduce the maximum file size 682 // c) have BlockRun::Length() return (length + 1). 683 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 684 numBlocks = MAX_BLOCK_RUN_LENGTH; 685 686 // apply some allocation policies here (AllocateBlocks() will break them 687 // if necessary) 688 uint16 group = inode->BlockRun().AllocationGroup(); 689 uint16 start = 0; 690 691 // are there already allocated blocks? (then just try to allocate near the last one) 692 if (inode->Size() > 0) { 693 const data_stream &data = inode->Node().data; 694 // ToDo: we currently don't care for when the data stream 695 // is already grown into the indirect ranges 696 if (data.max_double_indirect_range == 0 697 && data.max_indirect_range == 0) { 698 // Since size > 0, there must be a valid block run in this stream 699 int32 last = 0; 700 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 701 if (data.direct[last + 1].IsZero()) 702 break; 703 704 group = data.direct[last].AllocationGroup(); 705 start = data.direct[last].Start() + data.direct[last].Length(); 706 } 707 } else if (inode->IsContainer() || inode->IsSymLink()) { 708 // directory and symbolic link data will go in the same allocation 709 // group as the inode is in but after the inode data 710 start = inode->BlockRun().Start(); 711 } else { 712 // file data will start in the next allocation group 713 group = inode->BlockRun().AllocationGroup() + 1; 714 } 715 716 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 717 } 718 719 720 status_t 721 BlockAllocator::Free(Transaction &transaction, block_run run) 722 { 723 Locker lock(fLock); 724 725 int32 group = run.AllocationGroup(); 726 uint16 start = run.Start(); 727 uint16 length = run.Length(); 728 729 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, length)); 730 731 // doesn't use Volume::IsValidBlockRun() here because it can check better 732 // against the group size (the last group may have a different length) 733 if (group < 0 || group >= fNumGroups 734 || start > fGroups[group].NumBits() 735 || uint32(start + length) > fGroups[group].NumBits() 736 || length == 0) { 737 FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group, start, length)); 738 DEBUGGER(("tried to free invalid block_run")); 739 return B_BAD_VALUE; 740 } 741 // check if someone tries to free reserved areas at the beginning of the drive 742 if (group == 0 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 743 FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group, start, length)); 744 DEBUGGER(("tried to free reserved block")); 745 return B_BAD_VALUE; 746 } 747 #ifdef DEBUG 748 if (CheckBlockRun(run) < B_OK) 749 return B_BAD_DATA; 750 #endif 751 752 if (fGroups[group].Free(transaction, start, length) < B_OK) 753 RETURN_ERROR(B_IO_ERROR); 754 755 #ifdef DEBUG 756 if (CheckBlockRun(run, NULL, NULL, false) < B_OK) 757 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just freed)\n")); 758 #endif 759 760 fVolume->SuperBlock().used_blocks = 761 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 762 return B_OK; 763 } 764 765 766 size_t 767 BlockAllocator::BitmapSize() const 768 { 769 return fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 770 } 771 772 773 // #pragma mark - 774 // Functions to check the validity of the bitmap - they are used from 775 // the "chkbfs" command 776 777 778 bool 779 BlockAllocator::_IsValidCheckControl(check_control *control) 780 { 781 if (control == NULL 782 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 783 FATAL(("invalid check_control (%p)!\n", control)); 784 return false; 785 } 786 787 return true; 788 } 789 790 791 status_t 792 BlockAllocator::StartChecking(check_control *control) 793 { 794 if (!_IsValidCheckControl(control)) 795 return B_BAD_VALUE; 796 797 status_t status = fLock.Lock(); 798 if (status < B_OK) 799 return status; 800 801 size_t size = BitmapSize(); 802 fCheckBitmap = (uint32 *)malloc(size); 803 if (fCheckBitmap == NULL) { 804 fLock.Unlock(); 805 return B_NO_MEMORY; 806 } 807 808 check_cookie *cookie = new check_cookie(); 809 if (cookie == NULL) { 810 free(fCheckBitmap); 811 fCheckBitmap = NULL; 812 fLock.Unlock(); 813 814 return B_NO_MEMORY; 815 } 816 817 // initialize bitmap 818 memset(fCheckBitmap, 0, size); 819 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); block-- > 0;) { 820 _SetCheckBitmapAt(block); 821 } 822 823 cookie->stack.Push(fVolume->Root()); 824 cookie->stack.Push(fVolume->Indices()); 825 cookie->iterator = NULL; 826 control->cookie = cookie; 827 828 fCheckCookie = cookie; 829 // to be able to restore nicely if "chkbfs" exited abnormally 830 831 // ToDo: check reserved area in bitmap! 832 833 return B_OK; 834 } 835 836 837 status_t 838 BlockAllocator::StopChecking(check_control *control) 839 { 840 check_cookie *cookie; 841 if (control == NULL) 842 cookie = fCheckCookie; 843 else 844 cookie = (check_cookie *)control->cookie; 845 846 if (cookie == NULL) 847 return B_ERROR; 848 849 if (cookie->iterator != NULL) { 850 delete cookie->iterator; 851 cookie->iterator = NULL; 852 853 // the current directory inode is still locked in memory 854 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 855 } 856 857 // if CheckNextNode() could completely work through, we can 858 // fix any damages of the bitmap 859 if (control != NULL && control->status == B_ENTRY_NOT_FOUND) { 860 // calculate the number of used blocks in the check bitmap 861 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 862 off_t usedBlocks = 0LL; 863 864 // ToDo: update the allocation groups used blocks info 865 for (uint32 i = size >> 2; i-- > 0;) { 866 uint32 compare = 1; 867 for (int16 j = 0; j < 32; j++, compare <<= 1) { 868 if (compare & fCheckBitmap[i]) 869 usedBlocks++; 870 } 871 } 872 873 control->stats.freed = fVolume->UsedBlocks() - usedBlocks + control->stats.missing; 874 if (control->stats.freed < 0) 875 control->stats.freed = 0; 876 877 // Should we fix errors? Were there any errors we can fix? 878 if (control->flags & BFS_FIX_BITMAP_ERRORS 879 && (control->stats.freed != 0 || control->stats.missing != 0)) { 880 // if so, write the check bitmap back over the original one, 881 // and use transactions here to play safe - we even use several 882 // transactions, so that we don't blow the maximum log size 883 // on large disks; since we don't need to make this atomic 884 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 885 886 int32 blocksInBitmap = fNumGroups * fBlocksPerGroup; 887 int32 blockSize = fVolume->BlockSize(); 888 889 for (int32 i = 0; i < blocksInBitmap; i += 512) { 890 Transaction transaction(fVolume, 1 + i); 891 892 int32 blocksToWrite = 512; 893 if (blocksToWrite + i > blocksInBitmap) 894 blocksToWrite = blocksInBitmap - i; 895 896 status_t status = transaction.WriteBlocks(1 + i, 897 (uint8 *)fCheckBitmap + i * blockSize, blocksToWrite); 898 if (status < B_OK) { 899 FATAL(("error writing bitmap: %s\n", strerror(status))); 900 break; 901 } 902 transaction.Done(); 903 } 904 } 905 } else 906 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 907 908 free(fCheckBitmap); 909 fCheckBitmap = NULL; 910 fCheckCookie = NULL; 911 delete cookie; 912 fLock.Unlock(); 913 914 return B_OK; 915 } 916 917 918 status_t 919 BlockAllocator::CheckNextNode(check_control *control) 920 { 921 if (!_IsValidCheckControl(control)) 922 return B_BAD_VALUE; 923 924 check_cookie *cookie = (check_cookie *)control->cookie; 925 926 while (true) { 927 if (cookie->iterator == NULL) { 928 if (!cookie->stack.Pop(&cookie->current)) { 929 // no more runs on the stack, we are obviously finished! 930 control->status = B_ENTRY_NOT_FOUND; 931 return B_ENTRY_NOT_FOUND; 932 } 933 934 // get iterator for the next directory 935 936 Vnode vnode(fVolume, cookie->current); 937 Inode *inode; 938 if (vnode.Get(&inode) < B_OK) { 939 FATAL(("check: Could not open inode at %Ld\n", fVolume->ToBlock(cookie->current))); 940 continue; 941 } 942 943 if (!inode->IsContainer()) { 944 FATAL(("check: inode at %Ld should have been a directory\n", fVolume->ToBlock(cookie->current))); 945 continue; 946 } 947 948 BPlusTree *tree; 949 if (inode->GetTree(&tree) != B_OK) { 950 FATAL(("check: could not open b+tree from inode at %Ld\n", fVolume->ToBlock(cookie->current))); 951 continue; 952 } 953 954 cookie->parent = inode; 955 cookie->parent_mode = inode->Mode(); 956 957 cookie->iterator = new TreeIterator(tree); 958 if (cookie->iterator == NULL) 959 RETURN_ERROR(B_NO_MEMORY); 960 961 // the inode must stay locked in memory until the iterator is freed 962 vnode.Keep(); 963 964 // check the inode of the directory 965 control->errors = 0; 966 control->status = CheckInode(inode, control); 967 968 if (inode->GetName(control->name) < B_OK) 969 strcpy(control->name, "(node has no name)"); 970 971 control->inode = inode->ID(); 972 control->mode = inode->Mode(); 973 974 return B_OK; 975 } 976 977 char name[B_FILE_NAME_LENGTH]; 978 uint16 length; 979 vnode_id id; 980 981 status_t status = cookie->iterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); 982 if (status == B_ENTRY_NOT_FOUND) { 983 // there are no more entries in this iterator, free it and go on 984 delete cookie->iterator; 985 cookie->iterator = NULL; 986 987 // unlock the directory's inode from memory 988 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 989 990 continue; 991 } else if (status == B_OK) { 992 // ignore "." and ".." entries 993 if (!strcmp(name, ".") || !strcmp(name, "..")) 994 continue; 995 996 // fill in the control data as soon as we have them 997 strlcpy(control->name, name, B_FILE_NAME_LENGTH); 998 control->inode = id; 999 control->errors = 0; 1000 1001 Vnode vnode(fVolume, id); 1002 Inode *inode; 1003 if (vnode.Get(&inode) < B_OK) { 1004 FATAL(("Could not open inode ID %Ld!\n", id)); 1005 control->errors |= BFS_COULD_NOT_OPEN; 1006 control->status = B_ERROR; 1007 return B_OK; 1008 } 1009 1010 // check if the inode's name is the same as in the b+tree 1011 if (inode->IsRegularNode()) { 1012 SimpleLocker locker(inode->SmallDataLock()); 1013 NodeGetter node(fVolume, inode); 1014 1015 const char *localName = inode->Name(node.Node()); 1016 if (localName == NULL || strcmp(localName, name)) { 1017 control->errors |= BFS_NAMES_DONT_MATCH; 1018 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, localName)); 1019 } 1020 } 1021 1022 control->mode = inode->Mode(); 1023 1024 // Check for the correct mode of the node (if the mode of the 1025 // file don't fit to its parent, there is a serious problem) 1026 if (((cookie->parent_mode & S_ATTR_DIR) != 0 && !inode->IsAttribute()) 1027 || ((cookie->parent_mode & S_INDEX_DIR) != 0 && !inode->IsIndex()) 1028 || ((cookie->parent_mode & (S_DIRECTORY | S_ATTR_DIR | S_INDEX_DIR)) 1029 == S_DIRECTORY 1030 && (inode->Mode() & (S_ATTR | S_ATTR_DIR | S_INDEX_DIR)) != 0)) { 1031 FATAL(("inode at %Ld is of wrong type: %o (parent %o at %Ld)!\n", 1032 inode->BlockNumber(), inode->Mode(), cookie->parent_mode, cookie->parent->BlockNumber())); 1033 1034 // if we are allowed to fix errors, we should remove the file 1035 if (control->flags & BFS_REMOVE_WRONG_TYPES 1036 && control->flags & BFS_FIX_BITMAP_ERRORS) { 1037 // it's safe to start a transaction, because Inode::Remove() 1038 // won't touch the block bitmap (which we hold the lock for) 1039 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 1040 // the bitmap anyway 1041 Transaction transaction(fVolume, cookie->parent->BlockNumber()); 1042 1043 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE); 1044 status = cookie->parent->Remove(transaction, name, NULL, inode->IsContainer()); 1045 if (status == B_OK) 1046 transaction.Done(); 1047 } else 1048 status = B_ERROR; 1049 1050 control->errors |= BFS_WRONG_TYPE; 1051 control->status = status; 1052 return B_OK; 1053 } 1054 1055 // If the inode has an attribute directory, push it on the stack 1056 if (!inode->Attributes().IsZero()) 1057 cookie->stack.Push(inode->Attributes()); 1058 1059 // push the directory on the stack so that it will be scanned later 1060 if (inode->IsContainer() && !inode->IsIndex()) 1061 cookie->stack.Push(inode->BlockRun()); 1062 else { 1063 // check it now 1064 control->status = CheckInode(inode, control); 1065 1066 return B_OK; 1067 } 1068 } 1069 } 1070 // is never reached 1071 } 1072 1073 1074 bool 1075 BlockAllocator::_CheckBitmapIsUsedAt(off_t block) const 1076 { 1077 size_t size = BitmapSize(); 1078 uint32 index = block / 32; // 32bit resolution 1079 if (index > size / 4) 1080 return false; 1081 1082 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) & (1UL << (block & 0x1f)); 1083 } 1084 1085 1086 void 1087 BlockAllocator::_SetCheckBitmapAt(off_t block) 1088 { 1089 size_t size = BitmapSize(); 1090 uint32 index = block / 32; // 32bit resolution 1091 if (index > size / 4) 1092 return; 1093 1094 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1095 } 1096 1097 1098 status_t 1099 BlockAllocator::CheckBlockRun(block_run run, const char *type, check_control *control, bool allocated) 1100 { 1101 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1102 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1103 || uint32(run.Start() + run.Length()) > fGroups[run.AllocationGroup()].fNumBits 1104 || run.length == 0) { 1105 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, run.AllocationGroup(), run.Start(), run.Length())); 1106 if (control == NULL) 1107 return B_BAD_DATA; 1108 1109 control->errors |= BFS_INVALID_BLOCK_RUN; 1110 return B_OK; 1111 } 1112 1113 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1114 uint32 block = run.Start() / bitsPerBlock; 1115 uint32 pos = run.Start() % bitsPerBlock; 1116 int32 length = 0; 1117 off_t firstMissing = -1, firstSet = -1; 1118 off_t firstGroupBlock = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1119 1120 AllocationBlock cached(fVolume); 1121 1122 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1123 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1124 RETURN_ERROR(B_IO_ERROR); 1125 1126 if (pos >= cached.NumBlockBits()) { 1127 // something very strange has happened... 1128 RETURN_ERROR(B_ERROR); 1129 } 1130 1131 while (length < run.Length() && pos < cached.NumBlockBits()) { 1132 if (cached.IsUsed(pos) != allocated) { 1133 if (control == NULL) { 1134 PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated (pos = %ld, length = %ld)!\n", 1135 type, run.AllocationGroup(), run.Start(), run.Length(), pos, length)); 1136 return B_BAD_DATA; 1137 } 1138 if (firstMissing == -1) { 1139 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1140 control->errors |= BFS_MISSING_BLOCKS; 1141 } 1142 control->stats.missing++; 1143 } else if (firstMissing != -1) { 1144 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are %sallocated!\n", 1145 type, run.allocation_group, run.start, run.length, firstMissing, 1146 firstGroupBlock + pos + block * bitsPerBlock - 1, allocated ? "not " : "")); 1147 firstMissing = -1; 1148 } 1149 1150 if (fCheckBitmap != NULL) { 1151 // Set the block in the check bitmap as well, but have a look if it 1152 // is already allocated first 1153 uint32 offset = pos + block * bitsPerBlock; 1154 if (_CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1155 if (firstSet == -1) { 1156 firstSet = firstGroupBlock + offset; 1157 control->errors |= BFS_BLOCKS_ALREADY_SET; 1158 } 1159 control->stats.already_set++; 1160 } else { 1161 if (firstSet != -1) { 1162 FATAL(("%s: block_run(%d, %u, %u): blocks %Ld - %Ld are already set!\n", 1163 type, (int)run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + offset - 1)); 1164 firstSet = -1; 1165 } 1166 _SetCheckBitmapAt(firstGroupBlock + offset); 1167 } 1168 } 1169 length++; 1170 pos++; 1171 } 1172 1173 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1174 if (firstMissing != -1) 1175 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1176 if (firstSet != -1) 1177 FATAL(("%s: block_run(%d, %u, %u): blocks %Ld - %Ld are already set!\n", type, (int)run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1178 } 1179 } 1180 1181 return B_OK; 1182 } 1183 1184 1185 status_t 1186 BlockAllocator::CheckInode(Inode *inode, check_control *control) 1187 { 1188 if (control != NULL && fCheckBitmap == NULL) 1189 return B_NO_INIT; 1190 if (inode == NULL) 1191 return B_BAD_VALUE; 1192 1193 status_t status = CheckBlockRun(inode->BlockRun(), "inode", control); 1194 if (status < B_OK) 1195 return status; 1196 1197 if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) { 1198 // symlinks may not have a valid data stream 1199 if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH) 1200 return B_BAD_DATA; 1201 1202 return B_OK; 1203 } 1204 1205 data_stream *data = &inode->Node().data; 1206 1207 // check the direct range 1208 1209 if (data->max_direct_range) { 1210 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1211 if (data->direct[i].IsZero()) 1212 break; 1213 1214 status = CheckBlockRun(data->direct[i], "direct", control); 1215 if (status < B_OK) 1216 return status; 1217 } 1218 } 1219 1220 CachedBlock cached(fVolume); 1221 1222 // check the indirect range 1223 1224 if (data->max_indirect_range) { 1225 status = CheckBlockRun(data->indirect, "indirect", control); 1226 if (status < B_OK) 1227 return status; 1228 1229 off_t block = fVolume->ToBlock(data->indirect); 1230 1231 for (int32 i = 0; i < data->indirect.Length(); i++) { 1232 block_run *runs = (block_run *)cached.SetTo(block + i); 1233 if (runs == NULL) 1234 RETURN_ERROR(B_IO_ERROR); 1235 1236 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1237 int32 index = 0; 1238 for (; index < runsPerBlock; index++) { 1239 if (runs[index].IsZero()) 1240 break; 1241 1242 status = CheckBlockRun(runs[index], "indirect->run", control); 1243 if (status < B_OK) 1244 return status; 1245 } 1246 if (index < runsPerBlock) 1247 break; 1248 } 1249 } 1250 1251 // check the double indirect range 1252 1253 if (data->max_double_indirect_range) { 1254 status = CheckBlockRun(data->double_indirect, "double indirect", control); 1255 if (status < B_OK) 1256 return status; 1257 1258 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1259 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1260 1261 CachedBlock cachedDirect(fVolume); 1262 int32 maxIndirectIndex = (data->double_indirect.Length() << fVolume->BlockShift()) 1263 / sizeof(block_run); 1264 1265 for (int32 indirectIndex = 0; indirectIndex < maxIndirectIndex; indirectIndex++) { 1266 // get the indirect array block 1267 block_run *array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) 1268 + indirectIndex / runsPerBlock); 1269 if (array == NULL) 1270 return B_IO_ERROR; 1271 1272 block_run indirect = array[indirectIndex % runsPerBlock]; 1273 // are we finished yet? 1274 if (indirect.IsZero()) 1275 return B_OK; 1276 1277 status = CheckBlockRun(indirect, "double indirect->runs", control); 1278 if (status < B_OK) 1279 return status; 1280 1281 int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) / sizeof(block_run); 1282 1283 for (int32 index = 0; index < maxIndex; ) { 1284 block_run *runs = (block_run *)cachedDirect.SetTo(fVolume->ToBlock(indirect) 1285 + index / runsPerBlock); 1286 if (runs == NULL) 1287 return B_IO_ERROR; 1288 1289 do { 1290 // are we finished yet? 1291 if (runs[index % runsPerBlock].IsZero()) 1292 return B_OK; 1293 1294 status = CheckBlockRun(runs[index % runsPerBlock], "double indirect->runs->run", control); 1295 if (status < B_OK) 1296 return status; 1297 } while ((++index % runsPerArray) != 0); 1298 } 1299 } 1300 } 1301 1302 return B_OK; 1303 } 1304 1305