1 /* BlockAllocator - block bitmap handling and allocation policies 2 * 3 * Copyright 2001-2006, Axel Dörfler, axeld@pinc-software.de. 4 * This file may be used under the terms of the MIT License. 5 */ 6 7 8 #include "Debug.h" 9 #include "BlockAllocator.h" 10 #include "Volume.h" 11 #include "Inode.h" 12 #include "BPlusTree.h" 13 #include "bfs_control.h" 14 15 #include <util/Stack.h> 16 #include <util/kernel_cpp.h> 17 #include <stdlib.h> 18 19 #ifdef USER 20 # define spawn_kernel_thread spawn_thread 21 #endif 22 23 24 // Things the BlockAllocator should do: 25 26 // - find a range of blocks of a certain size nearby a specific position 27 // - allocating an unsharp range of blocks for pre-allocation 28 // - free blocks 29 // - know how to deal with each allocation, special handling for directories, 30 // files, symlinks, etc. (type sensitive allocation policies) 31 32 // What makes the code complicated is the fact that we are not just reading 33 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 34 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 35 // of partitions will grow even more - so that's not an option. 36 // Instead we are reading in every block when it's used - since an allocation 37 // group can span several blocks in the block bitmap, the AllocationBlock 38 // class is there to make handling those easier. 39 40 // The current implementation is only slightly optimized and could probably 41 // be improved a lot. Furthermore, the allocation policies used here should 42 // have some real world tests. 43 44 45 struct check_cookie { 46 check_cookie() {} 47 48 block_run current; 49 Inode *parent; 50 mode_t parent_mode; 51 Stack<block_run> stack; 52 TreeIterator *iterator; 53 }; 54 55 56 class AllocationBlock : public CachedBlock { 57 public: 58 AllocationBlock(Volume *volume); 59 60 void Allocate(uint16 start, uint16 numBlocks); 61 void Free(uint16 start, uint16 numBlocks); 62 inline bool IsUsed(uint16 block); 63 64 status_t SetTo(AllocationGroup &group, uint16 block); 65 status_t SetToWritable(Transaction &transaction, AllocationGroup &group, uint16 block); 66 67 uint32 NumBlockBits() const { return fNumBits; } 68 uint32 &Block(int32 index) { return ((uint32 *)fBlock)[index]; } 69 70 private: 71 uint32 fNumBits; 72 #ifdef DEBUG 73 bool fWritable; 74 #endif 75 }; 76 77 78 class AllocationGroup { 79 public: 80 AllocationGroup(); 81 82 void AddFreeRange(int32 start, int32 blocks); 83 bool IsFull() const { return fFreeBits == 0; } 84 85 status_t Allocate(Transaction &transaction, uint16 start, int32 length); 86 status_t Free(Transaction &transaction, uint16 start, int32 length); 87 88 uint32 NumBits() const { return fNumBits; } 89 uint32 NumBlocks() const { return fNumBlocks; } 90 int32 Start() const { return fStart; } 91 92 private: 93 friend class BlockAllocator; 94 95 uint32 fNumBits; 96 uint32 fNumBlocks; 97 int32 fStart; 98 int32 fFirstFree, fLargest, fLargestFirst; 99 // ToDo: fLargest & fLargestFirst are not maintained (and therefore used) yet! 100 int32 fFreeBits; 101 }; 102 103 104 AllocationBlock::AllocationBlock(Volume *volume) 105 : CachedBlock(volume) 106 { 107 } 108 109 110 status_t 111 AllocationBlock::SetTo(AllocationGroup &group, uint16 block) 112 { 113 // 8 blocks per byte 114 fNumBits = fVolume->BlockSize() << 3; 115 // the last group may have less bits than the others 116 if ((block + 1) * fNumBits > group.NumBits()) 117 fNumBits = group.NumBits() % fNumBits; 118 119 #ifdef DEBUG 120 fWritable = false; 121 #endif 122 return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR; 123 } 124 125 126 status_t 127 AllocationBlock::SetToWritable(Transaction &transaction, AllocationGroup &group, uint16 block) 128 { 129 // 8 blocks per byte 130 fNumBits = fVolume->BlockSize() << 3; 131 // the last group may have less bits in the last block 132 if ((block + 1) * fNumBits > group.NumBits()) 133 fNumBits = group.NumBits() % fNumBits; 134 135 #ifdef DEBUG 136 fWritable = true; 137 #endif 138 return CachedBlock::SetToWritable(transaction, group.Start() + block) != NULL 139 ? B_OK : B_ERROR; 140 } 141 142 143 bool 144 AllocationBlock::IsUsed(uint16 block) 145 { 146 if (block > fNumBits) 147 return true; 148 149 // the block bitmap is accessed in 32-bit blocks 150 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 151 } 152 153 154 void 155 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 156 { 157 ASSERT(start < fNumBits); 158 ASSERT(uint32(start + numBlocks) <= fNumBits); 159 #ifdef DEBUG 160 ASSERT(fWritable); 161 #endif 162 163 if (uint32(start + numBlocks) > fNumBits) { 164 FATAL(("Allocation::Allocate(): tried to allocate too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 165 DIE(("Allocation::Allocate(): tried to allocate too many blocks")); 166 } 167 168 int32 block = start >> 5; 169 170 while (numBlocks > 0) { 171 uint32 mask = 0; 172 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 173 mask |= 1UL << (i % 32); 174 175 #ifdef DEBUG 176 // check for already set blocks 177 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32 *)fBlock)[block]) { 178 FATAL(("AllocationBlock::Allocate(): some blocks are already allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 179 DEBUGGER(("blocks already set!")); 180 } 181 #endif 182 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 183 start = 0; 184 } 185 } 186 187 188 void 189 AllocationBlock::Free(uint16 start, uint16 numBlocks) 190 { 191 ASSERT(start < fNumBits); 192 ASSERT(uint32(start + numBlocks) <= fNumBits); 193 #ifdef DEBUG 194 ASSERT(fWritable); 195 #endif 196 197 if (uint32(start + numBlocks) > fNumBits) { 198 FATAL(("Allocation::Free(): tried to free too many blocks: %u (numBlocks = %lu)!\n", numBlocks, fNumBits)); 199 DIE(("Allocation::Free(): tried to free too many blocks")); 200 } 201 202 int32 block = start >> 5; 203 204 while (numBlocks > 0) { 205 uint32 mask = 0; 206 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 207 mask |= 1UL << (i % 32); 208 209 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 210 start = 0; 211 } 212 } 213 214 215 // #pragma mark - 216 217 218 /** The allocation groups are created and initialized in 219 * BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap() 220 * respectively. 221 */ 222 223 AllocationGroup::AllocationGroup() 224 : 225 fFirstFree(-1), 226 fLargest(-1), 227 fLargestFirst(-1), 228 fFreeBits(0) 229 { 230 } 231 232 233 void 234 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 235 { 236 //D(if (blocks > 512) 237 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 238 239 if (fFirstFree == -1) 240 fFirstFree = start; 241 242 if (fLargest < blocks) { 243 fLargest = blocks; 244 fLargestFirst = start; 245 } 246 247 fFreeBits += blocks; 248 } 249 250 251 /** Allocates the specified run in the allocation group. 252 * Doesn't check if the run is valid or already allocated 253 * partially, nor does it maintain the free ranges hints 254 * or the volume's used blocks count. 255 * It only does the low-level work of allocating some bits 256 * in the block bitmap. 257 * Assumes that the block bitmap lock is hold. 258 */ 259 260 status_t 261 AllocationGroup::Allocate(Transaction &transaction, uint16 start, int32 length) 262 { 263 if (start > fNumBits) 264 return B_ERROR; 265 266 // Update the allocation group info 267 // ToDo: this info will be incorrect if something goes wrong later 268 // Note, the fFirstFree block doesn't have to be really free 269 if (start == fFirstFree) 270 fFirstFree = start + length; 271 fFreeBits -= length; 272 273 Volume *volume = transaction.GetVolume(); 274 275 // calculate block in the block bitmap and position within 276 uint32 bitsPerBlock = volume->BlockSize() << 3; 277 uint32 block = start / bitsPerBlock; 278 start = start % bitsPerBlock; 279 280 AllocationBlock cached(volume); 281 282 while (length > 0) { 283 if (cached.SetToWritable(transaction, *this, block) < B_OK) 284 RETURN_ERROR(B_IO_ERROR); 285 286 uint32 numBlocks = length; 287 if (start + numBlocks > cached.NumBlockBits()) 288 numBlocks = cached.NumBlockBits() - start; 289 290 cached.Allocate(start, numBlocks); 291 292 length -= numBlocks; 293 start = 0; 294 block++; 295 } 296 297 return B_OK; 298 } 299 300 301 /** Frees the specified run in the allocation group. 302 * Doesn't check if the run is valid or was not completely 303 * allocated, nor does it maintain the free ranges hints 304 * or the volume's used blocks count. 305 * It only does the low-level work of freeing some bits 306 * in the block bitmap. 307 * Assumes that the block bitmap lock is hold. 308 */ 309 310 status_t 311 AllocationGroup::Free(Transaction &transaction, uint16 start, int32 length) 312 { 313 if (start > fNumBits) 314 return B_ERROR; 315 316 // Update the allocation group info 317 // ToDo: this info will be incorrect if something goes wrong later 318 if (fFirstFree > start) 319 fFirstFree = start; 320 fFreeBits += length; 321 322 Volume *volume = transaction.GetVolume(); 323 324 // calculate block in the block bitmap and position within 325 uint32 bitsPerBlock = volume->BlockSize() << 3; 326 uint32 block = start / bitsPerBlock; 327 start = start % bitsPerBlock; 328 329 AllocationBlock cached(volume); 330 331 while (length > 0) { 332 if (cached.SetToWritable(transaction, *this, block) < B_OK) 333 RETURN_ERROR(B_IO_ERROR); 334 335 uint16 freeLength = length; 336 if (uint32(start + length) > cached.NumBlockBits()) 337 freeLength = cached.NumBlockBits() - start; 338 339 cached.Free(start, freeLength); 340 341 length -= freeLength; 342 start = 0; 343 block++; 344 } 345 return B_OK; 346 } 347 348 349 // #pragma mark - 350 351 352 BlockAllocator::BlockAllocator(Volume *volume) 353 : 354 fVolume(volume), 355 fLock("bfs allocator"), 356 fGroups(NULL), 357 fCheckBitmap(NULL) 358 { 359 } 360 361 362 BlockAllocator::~BlockAllocator() 363 { 364 delete[] fGroups; 365 } 366 367 368 status_t 369 BlockAllocator::Initialize(bool full) 370 { 371 if (fLock.InitCheck() < B_OK) 372 return B_ERROR; 373 374 fNumGroups = fVolume->AllocationGroups(); 375 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 376 fGroups = new AllocationGroup[fNumGroups]; 377 if (fGroups == NULL) 378 return B_NO_MEMORY; 379 380 if (!full) 381 return B_OK; 382 383 fLock.Lock(); 384 // the lock will be released by the initialize() function 385 386 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize, 387 "bfs block allocator", B_LOW_PRIORITY, (void *)this); 388 if (id < B_OK) 389 return _Initialize(this); 390 391 return resume_thread(id); 392 } 393 394 395 status_t 396 BlockAllocator::InitializeAndClearBitmap(Transaction &transaction) 397 { 398 status_t status = Initialize(false); 399 if (status < B_OK) 400 return status; 401 402 uint32 blocks = fBlocksPerGroup; 403 uint32 numBits = 8 * blocks * fVolume->BlockSize(); 404 uint32 blockShift = fVolume->BlockShift(); 405 406 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 407 if (buffer == NULL) 408 RETURN_ERROR(B_NO_MEMORY); 409 410 memset(buffer, 0, numBits >> 3); 411 412 off_t offset = 1; 413 414 // initialize the AllocationGroup objects and clear the on-disk bitmap 415 416 for (int32 i = 0; i < fNumGroups; i++) { 417 if (write_pos(fVolume->Device(), offset << blockShift, buffer, blocks << blockShift) < B_OK) 418 return B_ERROR; 419 420 // the last allocation group may contain less blocks than the others 421 if (i == fNumGroups - 1) { 422 fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits; 423 fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) >> (blockShift + 3)); 424 } else { 425 fGroups[i].fNumBits = numBits; 426 fGroups[i].fNumBlocks = blocks; 427 } 428 fGroups[i].fStart = offset; 429 fGroups[i].fFirstFree = fGroups[i].fLargestFirst = 0; 430 fGroups[i].fFreeBits = fGroups[i].fLargest = fGroups[i].fNumBits; 431 432 offset += blocks; 433 } 434 free(buffer); 435 436 // reserve the boot block, the log area, and the block bitmap itself 437 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 438 439 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 440 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 441 return B_ERROR; 442 } 443 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 444 445 return B_OK; 446 } 447 448 449 status_t 450 BlockAllocator::_Initialize(BlockAllocator *allocator) 451 { 452 // The lock must already be held at this point 453 454 Volume *volume = allocator->fVolume; 455 uint32 blocks = allocator->fBlocksPerGroup; 456 uint32 numBits = 8 * blocks * volume->BlockSize(); 457 uint32 blockShift = volume->BlockShift(); 458 off_t freeBlocks = 0; 459 460 uint32 *buffer = (uint32 *)malloc(numBits >> 3); 461 if (buffer == NULL) { 462 allocator->fLock.Unlock(); 463 RETURN_ERROR(B_NO_MEMORY); 464 } 465 466 AllocationGroup *groups = allocator->fGroups; 467 off_t offset = 1; 468 int32 num = allocator->fNumGroups; 469 470 for (int32 i = 0; i < num; i++) { 471 if (read_pos(volume->Device(), offset << blockShift, buffer, 472 blocks << blockShift) < B_OK) 473 break; 474 475 // the last allocation group may contain less blocks than the others 476 if (i == num - 1) { 477 groups[i].fNumBits = volume->NumBlocks() - i * numBits; 478 groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) >> (blockShift + 3)); 479 } else { 480 groups[i].fNumBits = numBits; 481 groups[i].fNumBlocks = blocks; 482 } 483 groups[i].fStart = offset; 484 485 // finds all free ranges in this allocation group 486 int32 start = -1, range = 0; 487 int32 size = groups[i].fNumBits, num = 0; 488 489 for (int32 k = 0;k < (size >> 2);k++) { 490 for (int32 j = 0; j < 32 && num < size; j++, num++) { 491 if (buffer[k] & (1UL << j)) { 492 if (range > 0) { 493 groups[i].AddFreeRange(start, range); 494 range = 0; 495 } 496 } else if (range++ == 0) 497 start = num; 498 } 499 } 500 if (range) 501 groups[i].AddFreeRange(start, range); 502 503 freeBlocks += groups[i].fFreeBits; 504 505 offset += blocks; 506 } 507 free(buffer); 508 509 // check if block bitmap and log area are reserved 510 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 511 if (allocator->CheckBlockRun(block_run::Run(0, 0, reservedBlocks)) < B_OK) { 512 Transaction transaction(volume, 0); 513 if (groups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 514 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 515 volume->Panic(); 516 } else { 517 FATAL(("space for block bitmap or log area was not reserved!\n")); 518 519 transaction.Done(); 520 } 521 } 522 523 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 524 if (volume->UsedBlocks() != usedBlocks) { 525 // If the disk in a dirty state at mount time, it's 526 // normal that the values don't match 527 INFORM(("volume reports %Ld used blocks, correct is %Ld\n", volume->UsedBlocks(), usedBlocks)); 528 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 529 } 530 531 allocator->fLock.Unlock(); 532 return B_OK; 533 } 534 535 536 status_t 537 BlockAllocator::AllocateBlocks(Transaction &transaction, int32 group, uint16 start, 538 uint16 maximum, uint16 minimum, block_run &run) 539 { 540 if (maximum == 0) 541 return B_BAD_VALUE; 542 543 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", group, start, maximum, minimum)); 544 545 AllocationBlock cached(fVolume); 546 Locker lock(fLock); 547 548 // the first scan through all allocation groups will look for the 549 // wanted maximum of blocks, the second scan will just look to 550 // satisfy the minimal requirement 551 uint16 numBlocks = maximum; 552 553 for (int32 i = 0; i < fNumGroups * 2; i++, group++, start = 0) { 554 group = group % fNumGroups; 555 556 if (start >= fGroups[group].NumBits() || fGroups[group].IsFull()) 557 continue; 558 559 if (i >= fNumGroups) { 560 // if the minimum is the same as the maximum, it's not necessary to 561 // search for in the allocation groups a second time 562 if (maximum == minimum) 563 return B_DEVICE_FULL; 564 565 numBlocks = minimum; 566 } 567 568 // The wanted maximum is smaller than the largest free block in the group 569 // or already smaller than the minimum 570 // ToDo: disabled because it's currently not maintained after the first allocation 571 //if (numBlocks > fGroups[group].fLargest) 572 // continue; 573 574 if (start < fGroups[group].fFirstFree) 575 start = fGroups[group].fFirstFree; 576 577 // there may be more than one block per allocation group - and 578 // we iterate through it to find a place for the allocation. 579 // (one allocation can't exceed one allocation group) 580 581 uint32 block = start / (fVolume->BlockSize() << 3); 582 int32 range = 0, rangeStart = 0; 583 584 for (; block < fGroups[group].NumBlocks(); block++) { 585 if (cached.SetTo(fGroups[group], block) < B_OK) 586 RETURN_ERROR(B_ERROR); 587 588 // find a block large enough to hold the allocation 589 for (uint32 bit = start % cached.NumBlockBits(); 590 bit < cached.NumBlockBits(); bit++) { 591 if (!cached.IsUsed(bit)) { 592 if (range == 0) { 593 // start new range 594 rangeStart = block * cached.NumBlockBits() + bit; 595 } 596 597 // have we found a range large enough to hold numBlocks? 598 if (++range >= maximum) 599 break; 600 } else if (i >= fNumGroups && range >= minimum) { 601 // we have found a block larger than the required minimum (second pass) 602 break; 603 } else { 604 // end of a range 605 range = 0; 606 } 607 } 608 609 // ToDo: we could also remember a "largest free block that fits the minimal 610 // requirement" in the group, and use that - this would avoid the need 611 // for a second run 612 613 // if we found a suitable block, mark the blocks as in use, and write 614 // the updated block bitmap back to disk 615 if (range >= numBlocks) { 616 // adjust allocation size 617 if (numBlocks < maximum) 618 numBlocks = range; 619 620 if (fGroups[group].Allocate(transaction, rangeStart, numBlocks) < B_OK) 621 RETURN_ERROR(B_IO_ERROR); 622 623 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(group); 624 run.start = HOST_ENDIAN_TO_BFS_INT16(rangeStart); 625 run.length = HOST_ENDIAN_TO_BFS_INT16(numBlocks); 626 627 fVolume->SuperBlock().used_blocks = 628 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + numBlocks); 629 // We are not writing back the disk's super block - it's 630 // either done by the journaling code, or when the disk 631 // is unmounted. 632 // If the value is not correct at mount time, it will be 633 // fixed anyway. 634 635 return B_OK; 636 } 637 638 // start from the beginning of the next block 639 start = 0; 640 } 641 } 642 return B_DEVICE_FULL; 643 } 644 645 646 status_t 647 BlockAllocator::AllocateForInode(Transaction &transaction, const block_run *parent, 648 mode_t type, block_run &run) 649 { 650 // apply some allocation policies here (AllocateBlocks() will break them 651 // if necessary) - we will start with those described in Dominic Giampaolo's 652 // "Practical File System Design", and see how good they work 653 654 // files are going in the same allocation group as its parent, sub-directories 655 // will be inserted 8 allocation groups after the one of the parent 656 uint16 group = parent->AllocationGroup(); 657 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 658 group += 8; 659 660 return AllocateBlocks(transaction, group, 0, 1, 1, run); 661 } 662 663 664 status_t 665 BlockAllocator::Allocate(Transaction &transaction, Inode *inode, off_t numBlocks, 666 block_run &run, uint16 minimum) 667 { 668 if (numBlocks <= 0) 669 return B_ERROR; 670 671 // one block_run can't hold more data than there is in one allocation group 672 if (numBlocks > fGroups[0].NumBits()) 673 numBlocks = fGroups[0].NumBits(); 674 675 // since block_run.length is uint16, the largest number of blocks that 676 // can be covered by a block_run is 65535 677 // ToDo: if we drop compatibility, couldn't we do this any better? 678 // There are basically two possibilities: 679 // a) since a length of zero doesn't have any sense, take that for 65536 - 680 // but that could cause many problems (bugs) in other areas 681 // b) reduce the maximum amount of blocks per block_run, so that the remaining 682 // number of free blocks can be used in a useful manner (like 4 blocks) - 683 // but that would also reduce the maximum file size 684 // c) have BlockRun::Length() return (length + 1). 685 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 686 numBlocks = MAX_BLOCK_RUN_LENGTH; 687 688 // apply some allocation policies here (AllocateBlocks() will break them 689 // if necessary) 690 uint16 group = inode->BlockRun().AllocationGroup(); 691 uint16 start = 0; 692 693 // are there already allocated blocks? (then just try to allocate near the last one) 694 if (inode->Size() > 0) { 695 const data_stream &data = inode->Node().data; 696 // ToDo: we currently don't care for when the data stream 697 // is already grown into the indirect ranges 698 if (data.max_double_indirect_range == 0 699 && data.max_indirect_range == 0) { 700 // Since size > 0, there must be a valid block run in this stream 701 int32 last = 0; 702 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 703 if (data.direct[last + 1].IsZero()) 704 break; 705 706 group = data.direct[last].AllocationGroup(); 707 start = data.direct[last].Start() + data.direct[last].Length(); 708 } 709 } else if (inode->IsContainer() || inode->IsSymLink()) { 710 // directory and symbolic link data will go in the same allocation 711 // group as the inode is in but after the inode data 712 start = inode->BlockRun().Start(); 713 } else { 714 // file data will start in the next allocation group 715 group = inode->BlockRun().AllocationGroup() + 1; 716 } 717 718 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 719 } 720 721 722 status_t 723 BlockAllocator::Free(Transaction &transaction, block_run run) 724 { 725 Locker lock(fLock); 726 727 int32 group = run.AllocationGroup(); 728 uint16 start = run.Start(); 729 uint16 length = run.Length(); 730 731 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, length)); 732 733 // doesn't use Volume::IsValidBlockRun() here because it can check better 734 // against the group size (the last group may have a different length) 735 if (group < 0 || group >= fNumGroups 736 || start > fGroups[group].NumBits() 737 || uint32(start + length) > fGroups[group].NumBits() 738 || length == 0) { 739 FATAL(("tried to free an invalid block_run (%ld, %u, %u)\n", group, start, length)); 740 DEBUGGER(("tried to free invalid block_run")); 741 return B_BAD_VALUE; 742 } 743 // check if someone tries to free reserved areas at the beginning of the drive 744 if (group == 0 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 745 FATAL(("tried to free a reserved block_run (%ld, %u, %u)\n", group, start, length)); 746 DEBUGGER(("tried to free reserved block")); 747 return B_BAD_VALUE; 748 } 749 #ifdef DEBUG 750 if (CheckBlockRun(run) < B_OK) 751 return B_BAD_DATA; 752 #endif 753 754 if (fGroups[group].Free(transaction, start, length) < B_OK) 755 RETURN_ERROR(B_IO_ERROR); 756 757 #ifdef DEBUG 758 if (CheckBlockRun(run, NULL, NULL, false) < B_OK) 759 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just freed)\n")); 760 #endif 761 762 fVolume->SuperBlock().used_blocks = 763 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 764 return B_OK; 765 } 766 767 768 size_t 769 BlockAllocator::BitmapSize() const 770 { 771 return fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 772 } 773 774 775 // #pragma mark - 776 // Functions to check the validity of the bitmap - they are used from 777 // the "chkbfs" command 778 779 780 bool 781 BlockAllocator::_IsValidCheckControl(check_control *control) 782 { 783 if (control == NULL 784 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 785 FATAL(("invalid check_control (%p)!\n", control)); 786 return false; 787 } 788 789 return true; 790 } 791 792 793 status_t 794 BlockAllocator::StartChecking(check_control *control) 795 { 796 if (!_IsValidCheckControl(control)) 797 return B_BAD_VALUE; 798 799 status_t status = fLock.Lock(); 800 if (status < B_OK) 801 return status; 802 803 size_t size = BitmapSize(); 804 fCheckBitmap = (uint32 *)malloc(size); 805 if (fCheckBitmap == NULL) { 806 fLock.Unlock(); 807 return B_NO_MEMORY; 808 } 809 810 check_cookie *cookie = new check_cookie(); 811 if (cookie == NULL) { 812 free(fCheckBitmap); 813 fCheckBitmap = NULL; 814 fLock.Unlock(); 815 816 return B_NO_MEMORY; 817 } 818 819 // initialize bitmap 820 memset(fCheckBitmap, 0, size); 821 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); block-- > 0;) { 822 _SetCheckBitmapAt(block); 823 } 824 825 cookie->stack.Push(fVolume->Root()); 826 cookie->stack.Push(fVolume->Indices()); 827 cookie->iterator = NULL; 828 control->cookie = cookie; 829 830 fCheckCookie = cookie; 831 // to be able to restore nicely if "chkbfs" exited abnormally 832 833 // ToDo: check reserved area in bitmap! 834 835 return B_OK; 836 } 837 838 839 status_t 840 BlockAllocator::StopChecking(check_control *control) 841 { 842 check_cookie *cookie; 843 if (control == NULL) 844 cookie = fCheckCookie; 845 else 846 cookie = (check_cookie *)control->cookie; 847 848 if (cookie == NULL) 849 return B_ERROR; 850 851 if (cookie->iterator != NULL) { 852 delete cookie->iterator; 853 cookie->iterator = NULL; 854 855 // the current directory inode is still locked in memory 856 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 857 } 858 859 // if CheckNextNode() could completely work through, we can 860 // fix any damages of the bitmap 861 if (control != NULL && control->status == B_ENTRY_NOT_FOUND) { 862 // calculate the number of used blocks in the check bitmap 863 size_t size = fVolume->BlockSize() * fNumGroups * fBlocksPerGroup; 864 off_t usedBlocks = 0LL; 865 866 // ToDo: update the allocation groups used blocks info 867 for (uint32 i = size >> 2; i-- > 0;) { 868 uint32 compare = 1; 869 for (int16 j = 0; j < 32; j++, compare <<= 1) { 870 if (compare & fCheckBitmap[i]) 871 usedBlocks++; 872 } 873 } 874 875 control->stats.freed = fVolume->UsedBlocks() - usedBlocks + control->stats.missing; 876 if (control->stats.freed < 0) 877 control->stats.freed = 0; 878 879 // Should we fix errors? Were there any errors we can fix? 880 if (control->flags & BFS_FIX_BITMAP_ERRORS 881 && (control->stats.freed != 0 || control->stats.missing != 0)) { 882 // if so, write the check bitmap back over the original one, 883 // and use transactions here to play safe - we even use several 884 // transactions, so that we don't blow the maximum log size 885 // on large disks; since we don't need to make this atomic 886 fVolume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 887 888 int32 blocksInBitmap = fNumGroups * fBlocksPerGroup; 889 int32 blockSize = fVolume->BlockSize(); 890 891 for (int32 i = 0; i < blocksInBitmap; i += 512) { 892 Transaction transaction(fVolume, 1 + i); 893 894 int32 blocksToWrite = 512; 895 if (blocksToWrite + i > blocksInBitmap) 896 blocksToWrite = blocksInBitmap - i; 897 898 status_t status = transaction.WriteBlocks(1 + i, 899 (uint8 *)fCheckBitmap + i * blockSize, blocksToWrite); 900 if (status < B_OK) { 901 FATAL(("error writing bitmap: %s\n", strerror(status))); 902 break; 903 } 904 transaction.Done(); 905 } 906 } 907 } else 908 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 909 910 free(fCheckBitmap); 911 fCheckBitmap = NULL; 912 fCheckCookie = NULL; 913 delete cookie; 914 fLock.Unlock(); 915 916 return B_OK; 917 } 918 919 920 status_t 921 BlockAllocator::CheckNextNode(check_control *control) 922 { 923 if (!_IsValidCheckControl(control)) 924 return B_BAD_VALUE; 925 926 check_cookie *cookie = (check_cookie *)control->cookie; 927 928 while (true) { 929 if (cookie->iterator == NULL) { 930 if (!cookie->stack.Pop(&cookie->current)) { 931 // no more runs on the stack, we are obviously finished! 932 control->status = B_ENTRY_NOT_FOUND; 933 return B_ENTRY_NOT_FOUND; 934 } 935 936 // get iterator for the next directory 937 938 #ifdef UNSAFE_GET_VNODE 939 RecursiveLocker locker(fVolume->Lock()); 940 #endif 941 Vnode vnode(fVolume, cookie->current); 942 Inode *inode; 943 if (vnode.Get(&inode) < B_OK) { 944 FATAL(("check: Could not open inode at %Ld\n", fVolume->ToBlock(cookie->current))); 945 continue; 946 } 947 948 if (!inode->IsContainer()) { 949 FATAL(("check: inode at %Ld should have been a directory\n", fVolume->ToBlock(cookie->current))); 950 continue; 951 } 952 953 BPlusTree *tree; 954 if (inode->GetTree(&tree) != B_OK) { 955 FATAL(("check: could not open b+tree from inode at %Ld\n", fVolume->ToBlock(cookie->current))); 956 continue; 957 } 958 959 cookie->parent = inode; 960 cookie->parent_mode = inode->Mode(); 961 962 cookie->iterator = new TreeIterator(tree); 963 if (cookie->iterator == NULL) 964 RETURN_ERROR(B_NO_MEMORY); 965 966 // the inode must stay locked in memory until the iterator is freed 967 vnode.Keep(); 968 969 // check the inode of the directory 970 control->errors = 0; 971 control->status = CheckInode(inode, control); 972 973 if (inode->GetName(control->name) < B_OK) 974 strcpy(control->name, "(node has no name)"); 975 976 control->inode = inode->ID(); 977 control->mode = inode->Mode(); 978 979 return B_OK; 980 } 981 982 char name[B_FILE_NAME_LENGTH]; 983 uint16 length; 984 vnode_id id; 985 986 status_t status = cookie->iterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); 987 if (status == B_ENTRY_NOT_FOUND) { 988 // there are no more entries in this iterator, free it and go on 989 delete cookie->iterator; 990 cookie->iterator = NULL; 991 992 // unlock the directory's inode from memory 993 put_vnode(fVolume->ID(), fVolume->ToVnode(cookie->current)); 994 995 continue; 996 } else if (status == B_OK) { 997 // ignore "." and ".." entries 998 if (!strcmp(name, ".") || !strcmp(name, "..")) 999 continue; 1000 1001 // fill in the control data as soon as we have them 1002 strlcpy(control->name, name, B_FILE_NAME_LENGTH); 1003 control->inode = id; 1004 control->errors = 0; 1005 1006 Vnode vnode(fVolume, id); 1007 Inode *inode; 1008 if (vnode.Get(&inode) < B_OK) { 1009 FATAL(("Could not open inode ID %Ld!\n", id)); 1010 control->errors |= BFS_COULD_NOT_OPEN; 1011 control->status = B_ERROR; 1012 return B_OK; 1013 } 1014 1015 // check if the inode's name is the same as in the b+tree 1016 if (inode->IsRegularNode()) { 1017 SimpleLocker locker(inode->SmallDataLock()); 1018 NodeGetter node(fVolume, inode); 1019 1020 const char *localName = inode->Name(node.Node()); 1021 if (localName == NULL || strcmp(localName, name)) { 1022 control->errors |= BFS_NAMES_DONT_MATCH; 1023 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, localName)); 1024 } 1025 } 1026 1027 control->mode = inode->Mode(); 1028 1029 // Check for the correct mode of the node (if the mode of the 1030 // file don't fit to its parent, there is a serious problem) 1031 if (((cookie->parent_mode & S_ATTR_DIR) != 0 && !inode->IsAttribute()) 1032 || ((cookie->parent_mode & S_INDEX_DIR) != 0 && !inode->IsIndex()) 1033 || ((cookie->parent_mode & (S_DIRECTORY | S_ATTR_DIR | S_INDEX_DIR)) 1034 == S_DIRECTORY 1035 && (inode->Mode() & (S_ATTR | S_ATTR_DIR | S_INDEX_DIR)) != 0)) { 1036 FATAL(("inode at %Ld is of wrong type: %o (parent %o at %Ld)!\n", 1037 inode->BlockNumber(), inode->Mode(), cookie->parent_mode, cookie->parent->BlockNumber())); 1038 1039 // if we are allowed to fix errors, we should remove the file 1040 if (control->flags & BFS_REMOVE_WRONG_TYPES 1041 && control->flags & BFS_FIX_BITMAP_ERRORS) { 1042 #ifdef UNSAFE_GET_VNODE 1043 RecursiveLocker locker(fVolume->Lock()); 1044 #endif 1045 // it's safe to start a transaction, because Inode::Remove() 1046 // won't touch the block bitmap (which we hold the lock for) 1047 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 1048 // the bitmap anyway 1049 Transaction transaction(fVolume, cookie->parent->BlockNumber()); 1050 1051 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE); 1052 status = cookie->parent->Remove(transaction, name, NULL, inode->IsContainer()); 1053 if (status == B_OK) 1054 transaction.Done(); 1055 } else 1056 status = B_ERROR; 1057 1058 control->errors |= BFS_WRONG_TYPE; 1059 control->status = status; 1060 return B_OK; 1061 } 1062 1063 // If the inode has an attribute directory, push it on the stack 1064 if (!inode->Attributes().IsZero()) 1065 cookie->stack.Push(inode->Attributes()); 1066 1067 // push the directory on the stack so that it will be scanned later 1068 if (inode->IsContainer() && !inode->IsIndex()) 1069 cookie->stack.Push(inode->BlockRun()); 1070 else { 1071 // check it now 1072 control->status = CheckInode(inode, control); 1073 1074 return B_OK; 1075 } 1076 } 1077 } 1078 // is never reached 1079 } 1080 1081 1082 bool 1083 BlockAllocator::_CheckBitmapIsUsedAt(off_t block) const 1084 { 1085 size_t size = BitmapSize(); 1086 uint32 index = block / 32; // 32bit resolution 1087 if (index > size / 4) 1088 return false; 1089 1090 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) & (1UL << (block & 0x1f)); 1091 } 1092 1093 1094 void 1095 BlockAllocator::_SetCheckBitmapAt(off_t block) 1096 { 1097 size_t size = BitmapSize(); 1098 uint32 index = block / 32; // 32bit resolution 1099 if (index > size / 4) 1100 return; 1101 1102 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1103 } 1104 1105 1106 status_t 1107 BlockAllocator::CheckBlockRun(block_run run, const char *type, check_control *control, bool allocated) 1108 { 1109 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1110 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1111 || uint32(run.Start() + run.Length()) > fGroups[run.AllocationGroup()].fNumBits 1112 || run.length == 0) { 1113 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, run.AllocationGroup(), run.Start(), run.Length())); 1114 if (control == NULL) 1115 return B_BAD_DATA; 1116 1117 control->errors |= BFS_INVALID_BLOCK_RUN; 1118 return B_OK; 1119 } 1120 1121 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1122 uint32 block = run.Start() / bitsPerBlock; 1123 uint32 pos = run.Start() % bitsPerBlock; 1124 int32 length = 0; 1125 off_t firstMissing = -1, firstSet = -1; 1126 off_t firstGroupBlock = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1127 1128 AllocationBlock cached(fVolume); 1129 1130 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1131 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1132 RETURN_ERROR(B_IO_ERROR); 1133 1134 if (pos >= cached.NumBlockBits()) { 1135 // something very strange has happened... 1136 RETURN_ERROR(B_ERROR); 1137 } 1138 1139 while (length < run.Length() && pos < cached.NumBlockBits()) { 1140 if (cached.IsUsed(pos) != allocated) { 1141 if (control == NULL) { 1142 PRINT(("%s: block_run(%ld, %u, %u) is only partially allocated (pos = %ld, length = %ld)!\n", 1143 type, run.AllocationGroup(), run.Start(), run.Length(), pos, length)); 1144 return B_BAD_DATA; 1145 } 1146 if (firstMissing == -1) { 1147 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1148 control->errors |= BFS_MISSING_BLOCKS; 1149 } 1150 control->stats.missing++; 1151 } else if (firstMissing != -1) { 1152 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are %sallocated!\n", 1153 type, run.allocation_group, run.start, run.length, firstMissing, 1154 firstGroupBlock + pos + block * bitsPerBlock - 1, allocated ? "not " : "")); 1155 firstMissing = -1; 1156 } 1157 1158 if (fCheckBitmap != NULL) { 1159 // Set the block in the check bitmap as well, but have a look if it 1160 // is already allocated first 1161 uint32 offset = pos + block * bitsPerBlock; 1162 if (_CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1163 if (firstSet == -1) { 1164 firstSet = firstGroupBlock + offset; 1165 control->errors |= BFS_BLOCKS_ALREADY_SET; 1166 } 1167 control->stats.already_set++; 1168 } else { 1169 if (firstSet != -1) { 1170 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", 1171 type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + offset - 1)); 1172 firstSet = -1; 1173 } 1174 _SetCheckBitmapAt(firstGroupBlock + offset); 1175 } 1176 } 1177 length++; 1178 pos++; 1179 } 1180 1181 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1182 if (firstMissing != -1) 1183 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not allocated!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstMissing, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1184 if (firstSet != -1) 1185 FATAL(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are already set!\n", type, run.AllocationGroup(), run.Start(), run.Length(), firstSet, firstGroupBlock + pos + block * bitsPerBlock - 1)); 1186 } 1187 } 1188 1189 return B_OK; 1190 } 1191 1192 1193 status_t 1194 BlockAllocator::CheckInode(Inode *inode, check_control *control) 1195 { 1196 if (control != NULL && fCheckBitmap == NULL) 1197 return B_NO_INIT; 1198 if (inode == NULL) 1199 return B_BAD_VALUE; 1200 1201 status_t status = CheckBlockRun(inode->BlockRun(), "inode", control); 1202 if (status < B_OK) 1203 return status; 1204 1205 if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) { 1206 // symlinks may not have a valid data stream 1207 if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH) 1208 return B_BAD_DATA; 1209 1210 return B_OK; 1211 } 1212 1213 data_stream *data = &inode->Node().data; 1214 1215 // check the direct range 1216 1217 if (data->max_direct_range) { 1218 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1219 if (data->direct[i].IsZero()) 1220 break; 1221 1222 status = CheckBlockRun(data->direct[i], "direct", control); 1223 if (status < B_OK) 1224 return status; 1225 } 1226 } 1227 1228 CachedBlock cached(fVolume); 1229 1230 // check the indirect range 1231 1232 if (data->max_indirect_range) { 1233 status = CheckBlockRun(data->indirect, "indirect", control); 1234 if (status < B_OK) 1235 return status; 1236 1237 off_t block = fVolume->ToBlock(data->indirect); 1238 1239 for (int32 i = 0; i < data->indirect.Length(); i++) { 1240 block_run *runs = (block_run *)cached.SetTo(block + i); 1241 if (runs == NULL) 1242 RETURN_ERROR(B_IO_ERROR); 1243 1244 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1245 int32 index = 0; 1246 for (; index < runsPerBlock; index++) { 1247 if (runs[index].IsZero()) 1248 break; 1249 1250 status = CheckBlockRun(runs[index], "indirect->run", control); 1251 if (status < B_OK) 1252 return status; 1253 } 1254 if (index < runsPerBlock) 1255 break; 1256 } 1257 } 1258 1259 // check the double indirect range 1260 1261 if (data->max_double_indirect_range) { 1262 status = CheckBlockRun(data->double_indirect, "double indirect", control); 1263 if (status < B_OK) 1264 return status; 1265 1266 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1267 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1268 1269 CachedBlock cachedDirect(fVolume); 1270 int32 maxIndirectIndex = (data->double_indirect.Length() << fVolume->BlockShift()) 1271 / sizeof(block_run); 1272 1273 for (int32 indirectIndex = 0; indirectIndex < maxIndirectIndex; indirectIndex++) { 1274 // get the indirect array block 1275 block_run *array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) 1276 + indirectIndex / runsPerBlock); 1277 if (array == NULL) 1278 return B_IO_ERROR; 1279 1280 block_run indirect = array[indirectIndex % runsPerBlock]; 1281 // are we finished yet? 1282 if (indirect.IsZero()) 1283 return B_OK; 1284 1285 status = CheckBlockRun(indirect, "double indirect->runs", control); 1286 if (status < B_OK) 1287 return status; 1288 1289 int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) / sizeof(block_run); 1290 1291 for (int32 index = 0; index < maxIndex; ) { 1292 block_run *runs = (block_run *)cachedDirect.SetTo(fVolume->ToBlock(indirect) 1293 + index / runsPerBlock); 1294 if (runs == NULL) 1295 return B_IO_ERROR; 1296 1297 do { 1298 // are we finished yet? 1299 if (runs[index % runsPerBlock].IsZero()) 1300 return B_OK; 1301 1302 status = CheckBlockRun(runs[index % runsPerBlock], "double indirect->runs->run", control); 1303 if (status < B_OK) 1304 return status; 1305 } while ((++index % runsPerArray) != 0); 1306 } 1307 } 1308 } 1309 1310 return B_OK; 1311 } 1312 1313