1 /* 2 * Copyright 2001-2007, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 //! block bitmap handling and allocation policies 7 8 9 #include "Debug.h" 10 #include "BlockAllocator.h" 11 #include "Volume.h" 12 #include "Inode.h" 13 #include "BPlusTree.h" 14 #include "bfs_control.h" 15 16 #include "system_dependencies.h" 17 18 19 // Things the BlockAllocator should do: 20 21 // - find a range of blocks of a certain size nearby a specific position 22 // - allocating an unsharp range of blocks for pre-allocation 23 // - free blocks 24 // - know how to deal with each allocation, special handling for directories, 25 // files, symlinks, etc. (type sensitive allocation policies) 26 27 // What makes the code complicated is the fact that we are not just reading 28 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 29 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 30 // of partitions will grow even more - so that's not an option. 31 // Instead we are reading in every block when it's used - since an allocation 32 // group can span several blocks in the block bitmap, the AllocationBlock 33 // class is there to make handling those easier. 34 35 // The current implementation is only slightly optimized and could probably 36 // be improved a lot. Furthermore, the allocation policies used here should 37 // have some real world tests. 38 39 40 struct check_cookie { 41 check_cookie() {} 42 43 block_run current; 44 Inode *parent; 45 mode_t parent_mode; 46 Stack<block_run> stack; 47 TreeIterator *iterator; 48 }; 49 50 51 class AllocationBlock : public CachedBlock { 52 public: 53 AllocationBlock(Volume *volume); 54 55 void Allocate(uint16 start, uint16 numBlocks); 56 void Free(uint16 start, uint16 numBlocks); 57 inline bool IsUsed(uint16 block); 58 59 status_t SetTo(AllocationGroup &group, uint16 block); 60 status_t SetToWritable(Transaction &transaction, AllocationGroup &group, uint16 block); 61 62 uint32 NumBlockBits() const { return fNumBits; } 63 uint32 &Block(int32 index) { return ((uint32 *)fBlock)[index]; } 64 65 private: 66 uint32 fNumBits; 67 #ifdef DEBUG 68 bool fWritable; 69 #endif 70 }; 71 72 73 class AllocationGroup { 74 public: 75 AllocationGroup(); 76 77 void AddFreeRange(int32 start, int32 blocks); 78 bool IsFull() const { return fFreeBits == 0; } 79 80 status_t Allocate(Transaction &transaction, uint16 start, int32 length); 81 status_t Free(Transaction &transaction, uint16 start, int32 length); 82 83 uint32 NumBits() const { return fNumBits; } 84 uint32 NumBlocks() const { return fNumBlocks; } 85 int32 Start() const { return fStart; } 86 87 private: 88 friend class BlockAllocator; 89 90 uint32 fNumBits; 91 uint32 fNumBlocks; 92 int32 fStart; 93 int32 fFirstFree, fLargest, fLargestFirst; 94 // ToDo: fLargest & fLargestFirst are not maintained (and therefore used) yet! 95 int32 fFreeBits; 96 }; 97 98 99 AllocationBlock::AllocationBlock(Volume *volume) 100 : CachedBlock(volume) 101 { 102 } 103 104 105 status_t 106 AllocationBlock::SetTo(AllocationGroup &group, uint16 block) 107 { 108 // 8 blocks per byte 109 fNumBits = fVolume->BlockSize() << 3; 110 // the last group may have less bits than the others 111 if ((block + 1) * fNumBits > group.NumBits()) 112 fNumBits = group.NumBits() % fNumBits; 113 114 #ifdef DEBUG 115 fWritable = false; 116 #endif 117 return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR; 118 } 119 120 121 status_t 122 AllocationBlock::SetToWritable(Transaction &transaction, AllocationGroup &group, uint16 block) 123 { 124 // 8 blocks per byte 125 fNumBits = fVolume->BlockSize() << 3; 126 // the last group may have less bits in the last block 127 if ((block + 1) * fNumBits > group.NumBits()) 128 fNumBits = group.NumBits() % fNumBits; 129 130 #ifdef DEBUG 131 fWritable = true; 132 #endif 133 return CachedBlock::SetToWritable(transaction, group.Start() + block) != NULL 134 ? B_OK : B_ERROR; 135 } 136 137 138 bool 139 AllocationBlock::IsUsed(uint16 block) 140 { 141 if (block > fNumBits) 142 return true; 143 144 // the block bitmap is accessed in 32-bit blocks 145 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 146 } 147 148 149 void 150 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 151 { 152 ASSERT(start < fNumBits); 153 ASSERT(uint32(start + numBlocks) <= fNumBits); 154 #ifdef DEBUG 155 ASSERT(fWritable); 156 #endif 157 158 if (uint32(start + numBlocks) > fNumBits) { 159 FATAL(("Allocation::Allocate(): tried to allocate too many blocks: %u (numBlocks = %u)!\n", numBlocks, (unsigned)fNumBits)); 160 DIE(("Allocation::Allocate(): tried to allocate too many blocks")); 161 } 162 163 int32 block = start >> 5; 164 165 while (numBlocks > 0) { 166 uint32 mask = 0; 167 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 168 mask |= 1UL << (i % 32); 169 170 #ifdef DEBUG 171 // check for already set blocks 172 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32 *)fBlock)[block]) { 173 FATAL(("AllocationBlock::Allocate(): some blocks are already allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 174 DEBUGGER(("blocks already set!")); 175 } 176 #endif 177 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 178 start = 0; 179 } 180 } 181 182 183 void 184 AllocationBlock::Free(uint16 start, uint16 numBlocks) 185 { 186 ASSERT(start < fNumBits); 187 ASSERT(uint32(start + numBlocks) <= fNumBits); 188 #ifdef DEBUG 189 ASSERT(fWritable); 190 #endif 191 192 if (uint32(start + numBlocks) > fNumBits) { 193 FATAL(("Allocation::Free(): tried to free too many blocks: %u (numBlocks = %u)!\n", numBlocks, (unsigned)fNumBits)); 194 DIE(("Allocation::Free(): tried to free too many blocks")); 195 } 196 197 int32 block = start >> 5; 198 199 while (numBlocks > 0) { 200 uint32 mask = 0; 201 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 202 mask |= 1UL << (i % 32); 203 204 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 205 start = 0; 206 } 207 } 208 209 210 // #pragma mark - 211 212 213 /** The allocation groups are created and initialized in 214 * BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap() 215 * respectively. 216 */ 217 218 AllocationGroup::AllocationGroup() 219 : 220 fFirstFree(-1), 221 fLargest(-1), 222 fLargestFirst(-1), 223 fFreeBits(0) 224 { 225 } 226 227 228 void 229 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 230 { 231 //D(if (blocks > 512) 232 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 233 234 if (fFirstFree == -1) 235 fFirstFree = start; 236 237 if (fLargest < blocks) { 238 fLargest = blocks; 239 fLargestFirst = start; 240 } 241 242 fFreeBits += blocks; 243 } 244 245 246 /** Allocates the specified run in the allocation group. 247 * Doesn't check if the run is valid or already allocated 248 * partially, nor does it maintain the free ranges hints 249 * or the volume's used blocks count. 250 * It only does the low-level work of allocating some bits 251 * in the block bitmap. 252 * Assumes that the block bitmap lock is hold. 253 */ 254 255 status_t 256 AllocationGroup::Allocate(Transaction &transaction, uint16 start, int32 length) 257 { 258 if (start > fNumBits) 259 return B_ERROR; 260 261 // Update the allocation group info 262 // ToDo: this info will be incorrect if something goes wrong later 263 // Note, the fFirstFree block doesn't have to be really free 264 if (start == fFirstFree) 265 fFirstFree = start + length; 266 fFreeBits -= length; 267 268 Volume *volume = transaction.GetVolume(); 269 270 // calculate block in the block bitmap and position within 271 uint32 bitsPerBlock = volume->BlockSize() << 3; 272 uint32 block = start / bitsPerBlock; 273 start = start % bitsPerBlock; 274 275 AllocationBlock cached(volume); 276 277 while (length > 0) { 278 if (cached.SetToWritable(transaction, *this, block) < B_OK) 279 RETURN_ERROR(B_IO_ERROR); 280 281 uint32 numBlocks = length; 282 if (start + numBlocks > cached.NumBlockBits()) 283 numBlocks = cached.NumBlockBits() - start; 284 285 cached.Allocate(start, numBlocks); 286 287 length -= numBlocks; 288 start = 0; 289 block++; 290 } 291 292 return B_OK; 293 } 294 295 296 /** Frees the specified run in the allocation group. 297 * Doesn't check if the run is valid or was not completely 298 * allocated, nor does it maintain the free ranges hints 299 * or the volume's used blocks count. 300 * It only does the low-level work of freeing some bits 301 * in the block bitmap. 302 * Assumes that the block bitmap lock is hold. 303 */ 304 305 status_t 306 AllocationGroup::Free(Transaction &transaction, uint16 start, int32 length) 307 { 308 if (start > fNumBits) 309 return B_ERROR; 310 311 // Update the allocation group info 312 // ToDo: this info will be incorrect if something goes wrong later 313 if (fFirstFree > start) 314 fFirstFree = start; 315 fFreeBits += length; 316 317 Volume *volume = transaction.GetVolume(); 318 319 // calculate block in the block bitmap and position within 320 uint32 bitsPerBlock = volume->BlockSize() << 3; 321 uint32 block = start / bitsPerBlock; 322 start = start % bitsPerBlock; 323 324 AllocationBlock cached(volume); 325 326 while (length > 0) { 327 if (cached.SetToWritable(transaction, *this, block) < B_OK) 328 RETURN_ERROR(B_IO_ERROR); 329 330 uint16 freeLength = length; 331 if (uint32(start + length) > cached.NumBlockBits()) 332 freeLength = cached.NumBlockBits() - start; 333 334 cached.Free(start, freeLength); 335 336 length -= freeLength; 337 start = 0; 338 block++; 339 } 340 return B_OK; 341 } 342 343 344 // #pragma mark - 345 346 347 BlockAllocator::BlockAllocator(Volume *volume) 348 : 349 fVolume(volume), 350 fLock("bfs allocator"), 351 fGroups(NULL), 352 fCheckBitmap(NULL) 353 { 354 } 355 356 357 BlockAllocator::~BlockAllocator() 358 { 359 delete[] fGroups; 360 } 361 362 363 status_t 364 BlockAllocator::Initialize(bool full) 365 { 366 if (fLock.InitCheck() < B_OK) 367 return B_ERROR; 368 369 fNumGroups = fVolume->AllocationGroups(); 370 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 371 fGroups = new AllocationGroup[fNumGroups]; 372 if (fGroups == NULL) 373 return B_NO_MEMORY; 374 375 if (!full) 376 return B_OK; 377 378 fLock.Lock(); 379 // the lock will be released by the initialize() function 380 381 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize, 382 "bfs block allocator", B_LOW_PRIORITY, (void *)this); 383 if (id < B_OK) 384 return _Initialize(this); 385 386 return resume_thread(id); 387 } 388 389 390 status_t 391 BlockAllocator::InitializeAndClearBitmap(Transaction &transaction) 392 { 393 status_t status = Initialize(false); 394 if (status < B_OK) 395 return status; 396 397 uint32 blocks = fBlocksPerGroup; 398 uint32 numBits = 8 * blocks * fVolume->BlockSize(); 399 uint32 blockShift = fVolume->BlockShift(); 400 401 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 402 if (buffer == NULL) 403 RETURN_ERROR(B_NO_MEMORY); 404 405 memset(buffer, 0, numBits >> 3); 406 407 off_t offset = 1; 408 409 // initialize the AllocationGroup objects and clear the on-disk bitmap 410 411 for (int32 i = 0; i < fNumGroups; i++) { 412 if (write_pos(fVolume->Device(), offset << blockShift, buffer, blocks << blockShift) < B_OK) 413 return B_ERROR; 414 415 // the last allocation group may contain less blocks than the others 416 if (i == fNumGroups - 1) { 417 fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits; 418 fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) >> (blockShift + 3)); 419 } else { 420 fGroups[i].fNumBits = numBits; 421 fGroups[i].fNumBlocks = blocks; 422 } 423 fGroups[i].fStart = offset; 424 fGroups[i].fFirstFree = fGroups[i].fLargestFirst = 0; 425 fGroups[i].fFreeBits = fGroups[i].fLargest = fGroups[i].fNumBits; 426 427 offset += blocks; 428 } 429 free(buffer); 430 431 // reserve the boot block, the log area, and the block bitmap itself 432 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 433 434 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 435 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 436 return B_ERROR; 437 } 438 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 439 440 return B_OK; 441 } 442 443 444 status_t 445 BlockAllocator::_Initialize(BlockAllocator *allocator) 446 { 447 // The lock must already be held at this point 448 449 Volume *volume = allocator->fVolume; 450 uint32 blocks = allocator->fBlocksPerGroup; 451 uint32 blockShift = volume->BlockShift(); 452 off_t freeBlocks = 0; 453 454 uint32 *buffer = (uint32 *)malloc(blocks << blockShift); 455 if (buffer == NULL) { 456 allocator->fLock.Unlock(); 457 RETURN_ERROR(B_NO_MEMORY); 458 } 459 460 AllocationGroup *groups = allocator->fGroups; 461 off_t offset = 1; 462 uint32 bitsPerGroup = 8 * (blocks << blockShift); 463 int32 numGroups = allocator->fNumGroups; 464 465 for (int32 i = 0; i < numGroups; i++) { 466 if (read_pos(volume->Device(), offset << blockShift, buffer, 467 blocks << blockShift) < B_OK) 468 break; 469 470 // the last allocation group may contain less blocks than the others 471 if (i == numGroups - 1) { 472 groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup; 473 groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) >> (blockShift + 3)); 474 } else { 475 groups[i].fNumBits = bitsPerGroup; 476 groups[i].fNumBlocks = blocks; 477 } 478 groups[i].fStart = offset; 479 480 // finds all free ranges in this allocation group 481 int32 start = -1, range = 0; 482 int32 numBits = groups[i].fNumBits, bit = 0; 483 int32 count = (numBits + 31) / 32; 484 485 for (int32 k = 0; k < count; k++) { 486 for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) { 487 if (buffer[k] & (1UL << j)) { 488 // block is in use 489 if (range > 0) { 490 groups[i].AddFreeRange(start, range); 491 range = 0; 492 } 493 } else if (range++ == 0) { 494 // block is free, start new free range 495 start = bit; 496 } 497 } 498 } 499 if (range) 500 groups[i].AddFreeRange(start, range); 501 502 freeBlocks += groups[i].fFreeBits; 503 504 offset += blocks; 505 } 506 free(buffer); 507 508 // check if block bitmap and log area are reserved 509 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 510 if (allocator->CheckBlockRun(block_run::Run(0, 0, reservedBlocks)) < B_OK) { 511 Transaction transaction(volume, 0); 512 if (groups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 513 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 514 volume->Panic(); 515 } else { 516 FATAL(("space for block bitmap or log area was not reserved!\n")); 517 518 transaction.Done(); 519 } 520 } 521 522 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 523 if (volume->UsedBlocks() != usedBlocks) { 524 // If the disk in a dirty state at mount time, it's 525 // normal that the values don't match 526 INFORM(("volume reports %Ld used blocks, correct is %Ld\n", volume->UsedBlocks(), usedBlocks)); 527 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 528 } 529 530 allocator->fLock.Unlock(); 531 return B_OK; 532 } 533 534 535 status_t 536 BlockAllocator::AllocateBlocks(Transaction &transaction, int32 group, uint16 start, 537 uint16 maximum, uint16 minimum, block_run &run) 538 { 539 if (maximum == 0) 540 return B_BAD_VALUE; 541 542 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", group, start, maximum, minimum)); 543 544 AllocationBlock cached(fVolume); 545 Locker lock(fLock); 546 547 // the first scan through all allocation groups will look for the 548 // wanted maximum of blocks, the second scan will just look to 549 // satisfy the minimal requirement 550 uint16 numBlocks = maximum; 551 552 for (int32 i = 0; i < fNumGroups * 2; i++, group++, start = 0) { 553 group = group % fNumGroups; 554 555 if (start >= fGroups[group].NumBits() || fGroups[group].IsFull()) 556 continue; 557 558 if (i >= fNumGroups) { 559 // if the minimum is the same as the maximum, it's not necessary to 560 // search for in the allocation groups a second time 561 if (maximum == minimum) 562 return B_DEVICE_FULL; 563 564 numBlocks = minimum; 565 } 566 567 // The wanted maximum is smaller than the largest free block in the group 568 // or already smaller than the minimum 569 // ToDo: disabled because it's currently not maintained after the first allocation 570 //if (numBlocks > fGroups[group].fLargest) 571 // continue; 572 573 if (start < fGroups[group].fFirstFree) 574 start = fGroups[group].fFirstFree; 575 576 // there may be more than one block per allocation group - and 577 // we iterate through it to find a place for the allocation. 578 // (one allocation can't exceed one allocation group) 579 580 uint32 block = start / (fVolume->BlockSize() << 3); 581 int32 range = 0, rangeStart = 0; 582 583 for (; block < fGroups[group].NumBlocks(); block++) { 584 if (cached.SetTo(fGroups[group], block) < B_OK) 585 RETURN_ERROR(B_ERROR); 586 587 // find a block large enough to hold the allocation 588 for (uint32 bit = start % cached.NumBlockBits(); 589 bit < cached.NumBlockBits(); bit++) { 590 if (!cached.IsUsed(bit)) { 591 if (range == 0) { 592 // start new range 593 rangeStart = block * cached.NumBlockBits() + bit; 594 } 595 596 // have we found a range large enough to hold numBlocks? 597 if (++range >= maximum) 598 break; 599 } else if (i >= fNumGroups && range >= minimum) { 600 // we have found a block larger than the required minimum (second pass) 601 break; 602 } else { 603 // end of a range 604 range = 0; 605 } 606 } 607 608 // ToDo: we could also remember a "largest free block that fits the minimal 609 // requirement" in the group, and use that - this would avoid the need 610 // for a second run 611 612 // if we found a suitable block, mark the blocks as in use, and write 613 // the updated block bitmap back to disk 614 if (range >= numBlocks) { 615 // adjust allocation size 616 if (numBlocks < maximum) 617 numBlocks = range; 618 619 if (fGroups[group].Allocate(transaction, rangeStart, numBlocks) < B_OK) 620 RETURN_ERROR(B_IO_ERROR); 621 622 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group); 623 run.start = HOST_ENDIAN_TO_BFS_INT16(rangeStart); 624 run.length = HOST_ENDIAN_TO_BFS_INT16(numBlocks); 625 626 fVolume->SuperBlock().used_blocks = 627 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + numBlocks); 628 // We are not writing back the disk's super block - it's 629 // either done by the journaling code, or when the disk 630 // is unmounted. 631 // If the value is not correct at mount time, it will be 632 // fixed anyway. 633 634 return B_OK; 635 } 636 637 // start from the beginning of the next block 638 start = 0; 639 } 640 } 641 return B_DEVICE_FULL; 642 } 643 644 645 status_t 646 BlockAllocator::AllocateForInode(Transaction &transaction, const block_run *parent, 647 mode_t type, block_run &run) 648 { 649 // apply some allocation policies here (AllocateBlocks() will break them 650 // if necessary) - we will start with those described in Dominic Giampaolo's 651 // "Practical File System Design", and see how good they work 652 653 // files are going in the same allocation group as its parent, sub-directories 654 // will be inserted 8 allocation groups after the one of the parent 655 uint16 group = parent->AllocationGroup(); 656 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 657 group += 8; 658 659 return AllocateBlocks(transaction, group, 0, 1, 1, run); 660 } 661 662 663 status_t 664 BlockAllocator::Allocate(Transaction &transaction, Inode *inode, off_t numBlocks, 665 block_run &run, uint16 minimum) 666 { 667 if (numBlocks <= 0) 668 return B_ERROR; 669 670 // one block_run can't hold more data than there is in one allocation group 671 if (numBlocks > fGroups[0].NumBits()) 672 numBlocks = fGroups[0].NumBits(); 673 674 // since block_run.length is uint16, the largest number of blocks that 675 // can be covered by a block_run is 65535 676 // ToDo: if we drop compatibility, couldn't we do this any better? 677 // There are basically two possibilities: 678 // a) since a length of zero doesn't have any sense, take that for 65536 - 679 // but that could cause many problems (bugs) in other areas 680 // b) reduce the maximum amount of blocks per block_run, so that the remaining 681 // number of free blocks can be used in a useful manner (like 4 blocks) - 682 // but that would also reduce the maximum file size 683 // c) have BlockRun::Length() return (length + 1). 684 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 685 numBlocks = MAX_BLOCK_RUN_LENGTH; 686 687 // apply some allocation policies here (AllocateBlocks() will break them 688 // if necessary) 689 uint16 group = inode->BlockRun().AllocationGroup(); 690 uint16 start = 0; 691 692 // are there already allocated blocks? (then just try to allocate near the last one) 693 if (inode->Size() > 0) { 694 const data_stream &data = inode->Node().data; 695 // ToDo: we currently don't care for when the data stream 696 // is already grown into the indirect ranges 697 if (data.max_double_indirect_range == 0 698 && data.max_indirect_range == 0) { 699 // Since size > 0, there must be a valid block run in this stream 700 int32 last = 0; 701 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 702 if (data.direct[last + 1].IsZero()) 703 break; 704 705 group = data.direct[last].AllocationGroup(); 706 start = data.direct[last].Start() + data.direct[last].Length(); 707 } 708 } else if (inode->IsContainer() || inode->IsSymLink()) { 709 // directory and symbolic link data will go in the same allocation 710 // group as the inode is in but after the inode data 711 start = inode->BlockRun().Start(); 712 } else { 713 // file data will start in the next allocation group 714 group = inode->BlockRun().AllocationGroup() + 1; 715 } 716 717 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 718 } 719 720 721 status_t 722 BlockAllocator::Free(Transaction &transaction, block_run run) 723 { 724 Locker lock(fLock); 725 726 int32 group = run.AllocationGroup(); 727 uint16 start = run.Start(); 728 uint16 length = run.Length(); 729 730 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, length)); 731 732 // doesn't use Volume::IsValidBlockRun() here because it can check better 733 // against the group size (the last group may have a different length) 734 if (group < 0 || group >= fNumGroups 735 || start > fGroups[group].NumBits() 736 || uint32(start + length) > fGroups[group].NumBits() 737 || length == 0) { 738 FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group, start, length)); 739 DEBUGGER(("tried to free invalid block_run")); 740 return B_BAD_VALUE; 741 } 742 // check if someone tries to free reserved areas at the beginning of the drive 743 if (group == 0 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 744 FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group, start, length)); 745 DEBUGGER(("tried to free reserved block")); 746 return B_BAD_VALUE; 747 } 748 #ifdef DEBUG 749 if (CheckBlockRun(run) < B_OK) 750 return B_BAD_DATA; 751 #endif 752 753 if (fGroups[group].Free(transaction, start, length) < B_OK) 754 RETURN_ERROR(B_IO_ERROR); 755 756 #ifdef DEBUG 757 if (CheckBlockRun(run, NULL, NULL, false) < B_OK) 758 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just freed)\n")); 759 #endif 760 761 fVolume->SuperBlock().used_blocks = 762 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 763 return B_OK; 764 } 765 766 767 size_t 768 BlockAllocator::BitmapSize() const 769 { 770 return fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 771 } 772 773 774 // #pragma mark - 775 // Functions to check the validity of the bitmap - they are used from 776 // the "chkbfs" command 777 778 779 bool 780 BlockAllocator::_IsValidCheckControl(check_control *control) 781 { 782 if (control == NULL 783 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 784 FATAL(("invalid check_control (%p)!\n", control)); 785 return false; 786 } 787 788 return true; 789 } 790 791 792 status_t 793 BlockAllocator::StartChecking(check_control *control) 794 { 795 if (!_IsValidCheckControl(control)) 796 return B_BAD_VALUE; 797 798 status_t status = fLock.Lock(); 799 if (status < B_OK) 800 return status; 801 802 size_t size = BitmapSize(); 803 fCheckBitmap = (uint32 *)malloc(size); 804 if (fCheckBitmap == NULL) { 805 fLock.Unlock(); 806 return B_NO_MEMORY; 807 } 808 809 check_cookie *cookie = new check_cookie(); 810 if (cookie == NULL) { 811 free(fCheckBitmap); 812 fCheckBitmap = NULL; 813 fLock.Unlock(); 814 815 return B_NO_MEMORY; 816 } 817 818 // initialize bitmap 819 memset(fCheckBitmap, 0, size); 820 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); block-- > 0;) { 821 _SetCheckBitmapAt(block); 822 } 823 824 cookie->stack.Push(fVolume->Root()); 825 cookie->stack.Push(fVolume->Indices()); 826 cookie->iterator = NULL; 827 control->cookie = cookie; 828 829 fCheckCookie = cookie; 830 // to be able to restore nicely if "chkbfs" exited abnormally 831 832 // ToDo: check reserved area in bitmap! 833 834 return B_OK; 835 } 836 837 838 status_t 839 BlockAllocator::StopChecking(check_control *control) 840 { 841 check_cookie *cookie; 842 if (control == NULL) 843 cookie = fCheckCookie; 844 else 845 cookie = (check_cookie *)control->cookie; 846 847 if (cookie == NULL) 848 return B_ERROR; 849 850 if (cookie->iterator != NULL) { 851 delete cookie->iterator; 852 cookie->iterator = NULL; 853 854 // the current directory inode is still locked in memory 855 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 856 } 857 858 // if CheckNextNode() could completely work through, we can 859 // fix any damages of the bitmap 860 if (control != NULL && control->status == B_ENTRY_NOT_FOUND) { 861 // calculate the number of used blocks in the check bitmap 862 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 863 off_t usedBlocks = 0LL; 864 865 // ToDo: update the allocation groups used blocks info 866 for (uint32 i = size >> 2; i-- > 0;) { 867 uint32 compare = 1; 868 for (int16 j = 0; j < 32; j++, compare <<= 1) { 869 if (compare & fCheckBitmap[i]) 870 usedBlocks++; 871 } 872 } 873 874 control->stats.freed = fVolume->UsedBlocks() - usedBlocks + control->stats.missing; 875 if (control->stats.freed < 0) 876 control->stats.freed = 0; 877 878 // Should we fix errors? Were there any errors we can fix? 879 if (control->flags & BFS_FIX_BITMAP_ERRORS 880 && (control->stats.freed != 0 || control->stats.missing != 0)) { 881 // if so, write the check bitmap back over the original one, 882 // and use transactions here to play safe - we even use several 883 // transactions, so that we don't blow the maximum log size 884 // on large disks; since we don't need to make this atomic 885 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 886 887 int32 blocksInBitmap = fNumGroups * fBlocksPerGroup; 888 int32 blockSize = fVolume->BlockSize(); 889 890 for (int32 i = 0; i < blocksInBitmap; i += 512) { 891 Transaction transaction(fVolume, 1 + i); 892 893 int32 blocksToWrite = 512; 894 if (blocksToWrite + i > blocksInBitmap) 895 blocksToWrite = blocksInBitmap - i; 896 897 status_t status = transaction.WriteBlocks(1 + i, 898 (uint8 *)fCheckBitmap + i * blockSize, blocksToWrite); 899 if (status < B_OK) { 900 FATAL(("error writing bitmap: %s\n", strerror(status))); 901 break; 902 } 903 transaction.Done(); 904 } 905 } 906 } else 907 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 908 909 free(fCheckBitmap); 910 fCheckBitmap = NULL; 911 fCheckCookie = NULL; 912 delete cookie; 913 fLock.Unlock(); 914 915 return B_OK; 916 } 917 918 919 status_t 920 BlockAllocator::CheckNextNode(check_control *control) 921 { 922 if (!_IsValidCheckControl(control)) 923 return B_BAD_VALUE; 924 925 check_cookie *cookie = (check_cookie *)control->cookie; 926 927 while (true) { 928 if (cookie->iterator == NULL) { 929 if (!cookie->stack.Pop(&cookie->current)) { 930 // no more runs on the stack, we are obviously finished! 931 control->status = B_ENTRY_NOT_FOUND; 932 return B_ENTRY_NOT_FOUND; 933 } 934 935 // get iterator for the next directory 936 937 Vnode vnode(fVolume, cookie->current); 938 Inode *inode; 939 if (vnode.Get(&inode) < B_OK) { 940 FATAL(("check: Could not open inode at %Ld\n", fVolume->ToBlock(cookie->current))); 941 continue; 942 } 943 944 if (!inode->IsContainer()) { 945 FATAL(("check: inode at %Ld should have been a directory\n", fVolume->ToBlock(cookie->current))); 946 continue; 947 } 948 949 BPlusTree *tree; 950 if (inode->GetTree(&tree) != B_OK) { 951 FATAL(("check: could not open b+tree from inode at %Ld\n", fVolume->ToBlock(cookie->current))); 952 continue; 953 } 954 955 cookie->parent = inode; 956 cookie->parent_mode = inode->Mode(); 957 958 cookie->iterator = new TreeIterator(tree); 959 if (cookie->iterator == NULL) 960 RETURN_ERROR(B_NO_MEMORY); 961 962 // the inode must stay locked in memory until the iterator is freed 963 vnode.Keep(); 964 965 // check the inode of the directory 966 control->errors = 0; 967 control->status = CheckInode(inode, control); 968 969 if (inode->GetName(control->name) < B_OK) 970 strcpy(control->name, "(node has no name)"); 971 972 control->inode = inode->ID(); 973 control->mode = inode->Mode(); 974 975 return B_OK; 976 } 977 978 char name[B_FILE_NAME_LENGTH]; 979 uint16 length; 980 ino_t id; 981 982 status_t status = cookie->iterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); 983 if (status == B_ENTRY_NOT_FOUND) { 984 // there are no more entries in this iterator, free it and go on 985 delete cookie->iterator; 986 cookie->iterator = NULL; 987 988 // unlock the directory's inode from memory 989 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 990 991 continue; 992 } else if (status == B_OK) { 993 // ignore "." and ".." entries 994 if (!strcmp(name, ".") || !strcmp(name, "..")) 995 continue; 996 997 // fill in the control data as soon as we have them 998 strlcpy(control->name, name, B_FILE_NAME_LENGTH); 999 control->inode = id; 1000 control->errors = 0; 1001 1002 Vnode vnode(fVolume, id); 1003 Inode *inode; 1004 if (vnode.Get(&inode) < B_OK) { 1005 FATAL(("Could not open inode ID %Ld!\n", id)); 1006 control->errors |= BFS_COULD_NOT_OPEN; 1007 control->status = B_ERROR; 1008 return B_OK; 1009 } 1010 1011 // check if the inode's name is the same as in the b+tree 1012 if (inode->IsRegularNode()) { 1013 SimpleLocker locker(inode->SmallDataLock()); 1014 NodeGetter node(fVolume, inode); 1015 1016 const char *localName = inode->Name(node.Node()); 1017 if (localName == NULL || strcmp(localName, name)) { 1018 control->errors |= BFS_NAMES_DONT_MATCH; 1019 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, localName)); 1020 } 1021 } 1022 1023 control->mode = inode->Mode(); 1024 1025 // Check for the correct mode of the node (if the mode of the 1026 // file don't fit to its parent, there is a serious problem) 1027 if (((cookie->parent_mode & S_ATTR_DIR) != 0 && !inode->IsAttribute()) 1028 || ((cookie->parent_mode & S_INDEX_DIR) != 0 && !inode->IsIndex()) 1029 || ((cookie->parent_mode & (S_DIRECTORY | S_ATTR_DIR | S_INDEX_DIR)) 1030 == S_DIRECTORY 1031 && (inode->Mode() & (S_ATTR | S_ATTR_DIR | S_INDEX_DIR)) != 0)) { 1032 FATAL(("inode at %Ld is of wrong type: %o (parent %o at %Ld)!\n", 1033 inode->BlockNumber(), inode->Mode(), cookie->parent_mode, cookie->parent->BlockNumber())); 1034 1035 // if we are allowed to fix errors, we should remove the file 1036 if (control->flags & BFS_REMOVE_WRONG_TYPES 1037 && control->flags & BFS_FIX_BITMAP_ERRORS) { 1038 // it's safe to start a transaction, because Inode::Remove() 1039 // won't touch the block bitmap (which we hold the lock for) 1040 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 1041 // the bitmap anyway 1042 Transaction transaction(fVolume, cookie->parent->BlockNumber()); 1043 1044 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE); 1045 status = cookie->parent->Remove(transaction, name, NULL, inode->IsContainer()); 1046 if (status == B_OK) 1047 transaction.Done(); 1048 } else 1049 status = B_ERROR; 1050 1051 control->errors |= BFS_WRONG_TYPE; 1052 control->status = status; 1053 return B_OK; 1054 } 1055 1056 // If the inode has an attribute directory, push it on the stack 1057 if (!inode->Attributes().IsZero()) 1058 cookie->stack.Push(inode->Attributes()); 1059 1060 // push the directory on the stack so that it will be scanned later 1061 if (inode->IsContainer() && !inode->IsIndex()) 1062 cookie->stack.Push(inode->BlockRun()); 1063 else { 1064 // check it now 1065 control->status = CheckInode(inode, control); 1066 1067 return B_OK; 1068 } 1069 } 1070 } 1071 // is never reached 1072 } 1073 1074 1075 bool 1076 BlockAllocator::_CheckBitmapIsUsedAt(off_t block) const 1077 { 1078 size_t size = BitmapSize(); 1079 uint32 index = block / 32; // 32bit resolution 1080 if (index > size / 4) 1081 return false; 1082 1083 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) & (1UL << (block & 0x1f)); 1084 } 1085 1086 1087 void 1088 BlockAllocator::_SetCheckBitmapAt(off_t block) 1089 { 1090 size_t size = BitmapSize(); 1091 uint32 index = block / 32; // 32bit resolution 1092 if (index > size / 4) 1093 return; 1094 1095 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1096 } 1097 1098 1099 status_t 1100 BlockAllocator::CheckBlockRun(block_run run, const char *type, check_control *control, bool allocated) 1101 { 1102 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1103 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1104 || uint32(run.Start() + run.Length()) > fGroups[run.AllocationGroup()].fNumBits 1105 || run.length == 0) { 1106 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, run.AllocationGroup(), run.Start(), run.Length())); 1107 if (control == NULL) 1108 return B_BAD_DATA; 1109 1110 control->errors |= BFS_INVALID_BLOCK_RUN; 1111 return B_OK; 1112 } 1113 1114 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1115 uint32 block = run.Start() / bitsPerBlock; 1116 uint32 pos = run.Start() % bitsPerBlock; 1117 int32 length = 0; 1118 off_t firstMissing = -1, firstSet = -1; 1119 off_t firstGroupBlock = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1120 1121 AllocationBlock cached(fVolume); 1122 1123 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1124 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1125 RETURN_ERROR(B_IO_ERROR); 1126 1127 if (pos >= cached.NumBlockBits()) { 1128 // something very strange has happened... 1129 RETURN_ERROR(B_ERROR); 1130 } 1131 1132 while (length < run.Length() && pos < cached.NumBlockBits()) { 1133 if (cached.IsUsed(pos) != allocated) { 1134 if (control == NULL) { 1135 PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated (pos = %ld, length = %ld)!\n", 1136 type, run.AllocationGroup(), run.Start(), run.Length(), pos, length)); 1137 return B_BAD_DATA; 1138 } 1139 if (firstMissing == -1) { 1140 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1141 control->errors |= BFS_MISSING_BLOCKS; 1142 } 1143 control->stats.missing++; 1144 } else if (firstMissing != -1) { 1145 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are %sallocated!\n", 1146 type, run.allocation_group, run.start, run.length, firstMissing, 1147 firstGroupBlock + pos + block * bitsPerBlock - 1, allocated ? "not " : "")); 1148 firstMissing = -1; 1149 } 1150 1151 if (fCheckBitmap != NULL) { 1152 // Set the block in the check bitmap as well, but have a look if it 1153 // is already allocated first 1154 uint32 offset = pos + block * bitsPerBlock; 1155 if (_CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1156 if (firstSet == -1) { 1157 firstSet = firstGroupBlock + offset; 1158 control->errors |= BFS_BLOCKS_ALREADY_SET; 1159 } 1160 control->stats.already_set++; 1161 } else { 1162 if (firstSet != -1) { 1163 FATAL(("%s: block_run(%d, %u, %u): blocks %Ld - %Ld are already set!\n", 1164 type, (int)run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + offset - 1)); 1165 firstSet = -1; 1166 } 1167 _SetCheckBitmapAt(firstGroupBlock + offset); 1168 } 1169 } 1170 length++; 1171 pos++; 1172 } 1173 1174 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1175 if (firstMissing != -1) 1176 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1177 if (firstSet != -1) 1178 FATAL(("%s: block_run(%d, %u, %u): blocks %Ld - %Ld are already set!\n", type, (int)run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1179 } 1180 } 1181 1182 return B_OK; 1183 } 1184 1185 1186 status_t 1187 BlockAllocator::CheckInode(Inode *inode, check_control *control) 1188 { 1189 if (control != NULL && fCheckBitmap == NULL) 1190 return B_NO_INIT; 1191 if (inode == NULL) 1192 return B_BAD_VALUE; 1193 1194 status_t status = CheckBlockRun(inode->BlockRun(), "inode", control); 1195 if (status < B_OK) 1196 return status; 1197 1198 if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) { 1199 // symlinks may not have a valid data stream 1200 if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH) 1201 return B_BAD_DATA; 1202 1203 return B_OK; 1204 } 1205 1206 data_stream *data = &inode->Node().data; 1207 1208 // check the direct range 1209 1210 if (data->max_direct_range) { 1211 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1212 if (data->direct[i].IsZero()) 1213 break; 1214 1215 status = CheckBlockRun(data->direct[i], "direct", control); 1216 if (status < B_OK) 1217 return status; 1218 } 1219 } 1220 1221 CachedBlock cached(fVolume); 1222 1223 // check the indirect range 1224 1225 if (data->max_indirect_range) { 1226 status = CheckBlockRun(data->indirect, "indirect", control); 1227 if (status < B_OK) 1228 return status; 1229 1230 off_t block = fVolume->ToBlock(data->indirect); 1231 1232 for (int32 i = 0; i < data->indirect.Length(); i++) { 1233 block_run *runs = (block_run *)cached.SetTo(block + i); 1234 if (runs == NULL) 1235 RETURN_ERROR(B_IO_ERROR); 1236 1237 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1238 int32 index = 0; 1239 for (; index < runsPerBlock; index++) { 1240 if (runs[index].IsZero()) 1241 break; 1242 1243 status = CheckBlockRun(runs[index], "indirect->run", control); 1244 if (status < B_OK) 1245 return status; 1246 } 1247 if (index < runsPerBlock) 1248 break; 1249 } 1250 } 1251 1252 // check the double indirect range 1253 1254 if (data->max_double_indirect_range) { 1255 status = CheckBlockRun(data->double_indirect, "double indirect", control); 1256 if (status < B_OK) 1257 return status; 1258 1259 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1260 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1261 1262 CachedBlock cachedDirect(fVolume); 1263 int32 maxIndirectIndex = (data->double_indirect.Length() << fVolume->BlockShift()) 1264 / sizeof(block_run); 1265 1266 for (int32 indirectIndex = 0; indirectIndex < maxIndirectIndex; indirectIndex++) { 1267 // get the indirect array block 1268 block_run *array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) 1269 + indirectIndex / runsPerBlock); 1270 if (array == NULL) 1271 return B_IO_ERROR; 1272 1273 block_run indirect = array[indirectIndex % runsPerBlock]; 1274 // are we finished yet? 1275 if (indirect.IsZero()) 1276 return B_OK; 1277 1278 status = CheckBlockRun(indirect, "double indirect->runs", control); 1279 if (status < B_OK) 1280 return status; 1281 1282 int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) / sizeof(block_run); 1283 1284 for (int32 index = 0; index < maxIndex; ) { 1285 block_run *runs = (block_run *)cachedDirect.SetTo(fVolume->ToBlock(indirect) 1286 + index / runsPerBlock); 1287 if (runs == NULL) 1288 return B_IO_ERROR; 1289 1290 do { 1291 // are we finished yet? 1292 if (runs[index % runsPerBlock].IsZero()) 1293 return B_OK; 1294 1295 status = CheckBlockRun(runs[index % runsPerBlock], "double indirect->runs->run", control); 1296 if (status < B_OK) 1297 return status; 1298 } while ((++index % runsPerArray) != 0); 1299 } 1300 } 1301 } 1302 1303 return B_OK; 1304 } 1305 1306