1 /* 2 * Copyright 2001-2019, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 7 //! superblock, mounting, etc. 8 9 10 #include "Attribute.h" 11 #include "CheckVisitor.h" 12 #include "Debug.h" 13 #include "file_systems/DeviceOpener.h" 14 #include "Inode.h" 15 #include "Journal.h" 16 #include "Query.h" 17 #include "Volume.h" 18 19 20 static const int32 kDesiredAllocationGroups = 56; 21 // This is the number of allocation groups that will be tried 22 // to be given for newly initialized disks. 23 // That's only relevant for smaller disks, though, since any 24 // of today's disk sizes already reach the maximum length 25 // of an allocation group (65536 blocks). 26 // It seems to create appropriate numbers for smaller disks 27 // with this setting, though (i.e. you can create a 400 MB 28 // file on a 1 GB disk without the need for double indirect 29 // blocks). 30 31 32 // #pragma mark - 33 34 35 bool 36 disk_super_block::IsValid() const 37 { 38 if (Magic1() != (int32)SUPER_BLOCK_MAGIC1 39 || Magic2() != (int32)SUPER_BLOCK_MAGIC2 40 || Magic3() != (int32)SUPER_BLOCK_MAGIC3 41 || (int32)block_size != inode_size 42 || ByteOrder() != SUPER_BLOCK_FS_LENDIAN 43 || (1UL << BlockShift()) != BlockSize() 44 || AllocationGroups() < 1 45 || AllocationGroupShift() < 1 46 || BlocksPerAllocationGroup() < 1 47 || NumBlocks() < 10 48 || AllocationGroups() != divide_roundup(NumBlocks(), 49 1L << AllocationGroupShift())) 50 return false; 51 52 return true; 53 } 54 55 56 void 57 disk_super_block::Initialize(const char* diskName, off_t numBlocks, 58 uint32 blockSize) 59 { 60 memset(this, 0, sizeof(disk_super_block)); 61 62 magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1); 63 magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2); 64 magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3); 65 fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN); 66 flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN); 67 68 strlcpy(name, diskName, sizeof(name)); 69 70 int32 blockShift = 9; 71 while ((1UL << blockShift) < blockSize) { 72 blockShift++; 73 } 74 75 block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize); 76 block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift); 77 78 num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks); 79 used_blocks = 0; 80 81 // Get the minimum ag_shift (that's determined by the block size) 82 83 int32 bitsPerBlock = blockSize << 3; 84 off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock; 85 int32 bitmapBlocksPerGroup = 1; 86 int32 groupShift = 13; 87 88 for (int32 i = 8192; i < bitsPerBlock; i *= 2) { 89 groupShift++; 90 } 91 92 // Many allocation groups help applying allocation policies, but if 93 // they are too small, we will need to many block_runs to cover large 94 // files (see above to get an explanation of the kDesiredAllocationGroups 95 // constant). 96 97 int32 numGroups; 98 99 while (true) { 100 numGroups = (bitmapBlocks + bitmapBlocksPerGroup - 1) / bitmapBlocksPerGroup; 101 if (numGroups > kDesiredAllocationGroups) { 102 if (groupShift == 16) 103 break; 104 105 groupShift++; 106 bitmapBlocksPerGroup *= 2; 107 } else 108 break; 109 } 110 111 num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups); 112 // blocks_per_ag holds the number of bitmap blocks that are in each allocation group 113 blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(bitmapBlocksPerGroup); 114 ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift); 115 } 116 117 118 // #pragma mark - 119 120 121 Volume::Volume(fs_volume* volume) 122 : 123 fVolume(volume), 124 fBlockAllocator(this), 125 fRootNode(NULL), 126 fIndicesNode(NULL), 127 fDirtyCachedBlocks(0), 128 fFlags(0), 129 fCheckingThread(-1), 130 fCheckVisitor(NULL) 131 { 132 mutex_init(&fLock, "bfs volume"); 133 mutex_init(&fQueryLock, "bfs queries"); 134 } 135 136 137 Volume::~Volume() 138 { 139 mutex_destroy(&fQueryLock); 140 mutex_destroy(&fLock); 141 } 142 143 144 bool 145 Volume::IsValidSuperBlock() const 146 { 147 return fSuperBlock.IsValid(); 148 } 149 150 151 /*! Checks whether the given block number may be the location of an inode block. 152 */ 153 bool 154 Volume::IsValidInodeBlock(off_t block) const 155 { 156 return block > fSuperBlock.LogEnd() && block < NumBlocks(); 157 } 158 159 160 void 161 Volume::Panic() 162 { 163 FATAL(("Disk corrupted... switch to read-only mode!\n")); 164 fFlags |= VOLUME_READ_ONLY; 165 #if KDEBUG 166 kernel_debugger("BFS panics!"); 167 #endif 168 } 169 170 171 status_t 172 Volume::Mount(const char* deviceName, uint32 flags) 173 { 174 // TODO: validate the FS in write mode as well! 175 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \ 176 || (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY)) 177 // in big endian mode, we only mount read-only for now 178 flags |= B_MOUNT_READ_ONLY; 179 #endif 180 181 DeviceOpener opener(deviceName, (flags & B_MOUNT_READ_ONLY) != 0 182 ? O_RDONLY : O_RDWR); 183 fDevice = opener.Device(); 184 if (fDevice < B_OK) 185 RETURN_ERROR(fDevice); 186 187 if (opener.IsReadOnly()) 188 fFlags |= VOLUME_READ_ONLY; 189 190 // read the superblock 191 if (Identify(fDevice, &fSuperBlock) != B_OK) { 192 FATAL(("invalid superblock!\n")); 193 return B_BAD_VALUE; 194 } 195 196 // initialize short hands to the superblock (to save byte swapping) 197 fBlockSize = fSuperBlock.BlockSize(); 198 fBlockShift = fSuperBlock.BlockShift(); 199 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 200 201 // check if the device size is large enough to hold the file system 202 off_t diskSize; 203 if (opener.GetSize(&diskSize, &fDeviceBlockSize) != B_OK) 204 RETURN_ERROR(B_ERROR); 205 if (diskSize < (NumBlocks() << BlockShift())) { 206 FATAL(("Disk size (%" B_PRIdOFF " bytes) < file system size (%" 207 B_PRIdOFF " bytes)!\n", diskSize, NumBlocks() << BlockShift())); 208 RETURN_ERROR(B_BAD_VALUE); 209 } 210 211 // set the current log pointers, so that journaling will work correctly 212 fLogStart = fSuperBlock.LogStart(); 213 fLogEnd = fSuperBlock.LogEnd(); 214 215 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL) 216 return B_ERROR; 217 218 fJournal = new(std::nothrow) Journal(this); 219 if (fJournal == NULL) 220 return B_NO_MEMORY; 221 222 status_t status = fJournal->InitCheck(); 223 if (status < B_OK) { 224 FATAL(("could not initialize journal: %s!\n", strerror(status))); 225 return status; 226 } 227 228 // replaying the log is the first thing we will do on this disk 229 status = fJournal->ReplayLog(); 230 if (status != B_OK) { 231 FATAL(("Replaying log failed, data may be corrupted, volume " 232 "read-only.\n")); 233 fFlags |= VOLUME_READ_ONLY; 234 // TODO: if this is the boot volume, Bootscript will assume this 235 // is a CD... 236 // TODO: it would be nice to have a user visible alert instead 237 // of letting him just find this in the syslog. 238 } 239 240 status = fBlockAllocator.Initialize(); 241 if (status != B_OK) { 242 FATAL(("could not initialize block bitmap allocator!\n")); 243 return status; 244 } 245 246 fRootNode = new(std::nothrow) Inode(this, ToVnode(Root())); 247 if (fRootNode != NULL && fRootNode->InitCheck() == B_OK) { 248 status = publish_vnode(fVolume, ToVnode(Root()), (void*)fRootNode, 249 &gBFSVnodeOps, fRootNode->Mode(), 0); 250 if (status == B_OK) { 251 // try to get indices root dir 252 253 if (!Indices().IsZero()) { 254 fIndicesNode = new(std::nothrow) Inode(this, 255 ToVnode(Indices())); 256 } 257 258 if (fIndicesNode == NULL 259 || fIndicesNode->InitCheck() < B_OK 260 || !fIndicesNode->IsContainer()) { 261 INFORM(("bfs: volume doesn't have indices!\n")); 262 263 if (fIndicesNode) { 264 // if this is the case, the index root node is gone bad, 265 // and BFS switch to read-only mode 266 fFlags |= VOLUME_READ_ONLY; 267 delete fIndicesNode; 268 fIndicesNode = NULL; 269 } 270 } else { 271 // we don't use the vnode layer to access the indices node 272 } 273 } else { 274 FATAL(("could not create root node: publish_vnode() failed!\n")); 275 delete fRootNode; 276 return status; 277 } 278 } else { 279 status = B_BAD_VALUE; 280 FATAL(("could not create root node!\n")); 281 282 // We need to wait for the block allocator to finish 283 fBlockAllocator.Uninitialize(); 284 return status; 285 } 286 287 // all went fine 288 opener.Keep(); 289 return B_OK; 290 } 291 292 293 status_t 294 Volume::Unmount() 295 { 296 put_vnode(fVolume, ToVnode(Root())); 297 298 fBlockAllocator.Uninitialize(); 299 300 // This will also flush the log & all blocks to disk 301 delete fJournal; 302 fJournal = NULL; 303 304 delete fIndicesNode; 305 306 block_cache_delete(fBlockCache, !IsReadOnly()); 307 close(fDevice); 308 309 return B_OK; 310 } 311 312 313 status_t 314 Volume::Sync() 315 { 316 return fJournal->FlushLogAndBlocks(); 317 } 318 319 320 status_t 321 Volume::ValidateBlockRun(block_run run) 322 { 323 if (run.AllocationGroup() < 0 324 || run.AllocationGroup() > (int32)AllocationGroups() 325 || run.Start() > (1UL << AllocationGroupShift()) 326 || run.length == 0 327 || uint32(run.Length() + run.Start()) 328 > (1UL << AllocationGroupShift())) { 329 Panic(); 330 FATAL(("*** invalid run(%d,%d,%d)\n", (int)run.AllocationGroup(), 331 run.Start(), run.Length())); 332 return B_BAD_DATA; 333 } 334 return B_OK; 335 } 336 337 338 block_run 339 Volume::ToBlockRun(off_t block) const 340 { 341 block_run run; 342 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32( 343 block >> AllocationGroupShift()); 344 run.start = HOST_ENDIAN_TO_BFS_INT16( 345 block & ((1LL << AllocationGroupShift()) - 1)); 346 run.length = HOST_ENDIAN_TO_BFS_INT16(1); 347 return run; 348 } 349 350 351 status_t 352 Volume::CreateIndicesRoot(Transaction& transaction) 353 { 354 off_t id; 355 status_t status = Inode::Create(transaction, NULL, NULL, 356 S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, NULL, &id, 357 &fIndicesNode, NULL, BFS_DO_NOT_PUBLISH_VNODE); 358 if (status < B_OK) 359 RETURN_ERROR(status); 360 361 fSuperBlock.indices = ToBlockRun(id); 362 return WriteSuperBlock(); 363 } 364 365 366 status_t 367 Volume::CreateVolumeID(Transaction& transaction) 368 { 369 Attribute attr(fRootNode); 370 status_t status; 371 attr_cookie* cookie; 372 status = attr.Create("be:volume_id", B_UINT64_TYPE, O_RDWR, &cookie); 373 if (status == B_OK) { 374 static bool seeded = false; 375 if (!seeded) { 376 // seed the random number generator for the be:volume_id attribute. 377 srand(time(NULL)); 378 seeded = true; 379 } 380 uint64_t id; 381 size_t length = sizeof(id); 382 id = ((uint64_t)rand() << 32) | rand(); 383 attr.Write(transaction, cookie, 0, (uint8_t *)&id, &length, NULL); 384 } 385 return status; 386 } 387 388 389 390 status_t 391 Volume::AllocateForInode(Transaction& transaction, const Inode* parent, 392 mode_t type, block_run& run) 393 { 394 return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), 395 type, run); 396 } 397 398 399 status_t 400 Volume::WriteSuperBlock() 401 { 402 if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) 403 != sizeof(disk_super_block)) 404 return B_IO_ERROR; 405 406 return B_OK; 407 } 408 409 410 void 411 Volume::UpdateLiveQueries(Inode* inode, const char* attribute, int32 type, 412 const uint8* oldKey, size_t oldLength, const uint8* newKey, 413 size_t newLength) 414 { 415 MutexLocker _(fQueryLock); 416 417 DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator(); 418 while (iterator.HasNext()) { 419 Query* query = iterator.Next(); 420 query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, 421 newLength); 422 } 423 } 424 425 426 void 427 Volume::UpdateLiveQueriesRenameMove(Inode* inode, ino_t oldDirectoryID, 428 const char* oldName, ino_t newDirectoryID, const char* newName) 429 { 430 MutexLocker _(fQueryLock); 431 432 size_t oldLength = strlen(oldName); 433 size_t newLength = strlen(newName); 434 435 DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator(); 436 while (iterator.HasNext()) { 437 Query* query = iterator.Next(); 438 query->LiveUpdateRenameMove(inode, oldDirectoryID, oldName, oldLength, 439 newDirectoryID, newName, newLength); 440 } 441 } 442 443 444 /*! Checks if there is a live query whose results depend on the presence 445 or value of the specified attribute. 446 Don't use it if you already have all the data together to evaluate 447 the queries - it wouldn't safe you anything in this case. 448 */ 449 bool 450 Volume::CheckForLiveQuery(const char* attribute) 451 { 452 // TODO: check for a live query that depends on the specified attribute 453 return true; 454 } 455 456 457 void 458 Volume::AddQuery(Query* query) 459 { 460 MutexLocker _(fQueryLock); 461 fQueries.Add(query); 462 } 463 464 465 void 466 Volume::RemoveQuery(Query* query) 467 { 468 MutexLocker _(fQueryLock); 469 fQueries.Remove(query); 470 } 471 472 473 status_t 474 Volume::CreateCheckVisitor() 475 { 476 if (fCheckVisitor != NULL) 477 return B_BUSY; 478 479 fCheckVisitor = new(std::nothrow) ::CheckVisitor(this); 480 if (fCheckVisitor == NULL) 481 return B_NO_MEMORY; 482 483 return B_OK; 484 } 485 486 487 void 488 Volume::DeleteCheckVisitor() 489 { 490 delete fCheckVisitor; 491 fCheckVisitor = NULL; 492 } 493 494 495 // #pragma mark - Disk scanning and initialization 496 497 498 /*static*/ status_t 499 Volume::CheckSuperBlock(const uint8* data, uint32* _offset) 500 { 501 disk_super_block* superBlock = (disk_super_block*)(data + 512); 502 if (superBlock->IsValid()) { 503 if (_offset != NULL) 504 *_offset = 512; 505 return B_OK; 506 } 507 508 #ifndef BFS_LITTLE_ENDIAN_ONLY 509 // For PPC, the superblock might be located at offset 0 510 superBlock = (disk_super_block*)data; 511 if (superBlock->IsValid()) { 512 if (_offset != NULL) 513 *_offset = 0; 514 return B_OK; 515 } 516 #endif 517 518 return B_BAD_VALUE; 519 } 520 521 522 /*static*/ status_t 523 Volume::Identify(int fd, disk_super_block* superBlock) 524 { 525 uint8 buffer[1024]; 526 if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer)) 527 return B_IO_ERROR; 528 529 uint32 offset; 530 if (CheckSuperBlock(buffer, &offset) != B_OK) 531 return B_BAD_VALUE; 532 533 memcpy(superBlock, buffer + offset, sizeof(disk_super_block)); 534 return B_OK; 535 } 536 537 538 status_t 539 Volume::Initialize(int fd, const char* name, uint32 blockSize, 540 uint32 flags) 541 { 542 // although there is no really good reason for it, we won't 543 // accept '/' in disk names (mkbfs does this, too - and since 544 // Tracker names mounted volumes like their name) 545 if (strchr(name, '/') != NULL) 546 return B_BAD_VALUE; 547 548 if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 549 && blockSize != 8192) 550 return B_BAD_VALUE; 551 552 DeviceOpener opener(fd, O_RDWR); 553 if (opener.Device() < B_OK) 554 return B_BAD_VALUE; 555 556 if (opener.IsReadOnly()) 557 return B_READ_ONLY_DEVICE; 558 559 fDevice = opener.Device(); 560 561 uint32 deviceBlockSize; 562 off_t deviceSize; 563 if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK) 564 return B_ERROR; 565 566 off_t numBlocks = deviceSize / blockSize; 567 568 // create valid superblock 569 570 fSuperBlock.Initialize(name, numBlocks, blockSize); 571 572 // initialize short hands to the superblock (to save byte swapping) 573 fBlockSize = fSuperBlock.BlockSize(); 574 fBlockShift = fSuperBlock.BlockShift(); 575 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 576 577 // determine log size depending on the size of the volume 578 off_t logSize = 2048; 579 if (numBlocks <= 20480) 580 logSize = 512; 581 if (deviceSize > 1LL * 1024 * 1024 * 1024) 582 logSize = 4096; 583 584 // since the allocator has not been initialized yet, we 585 // cannot use BlockAllocator::BitmapSize() here 586 off_t bitmapBlocks = (numBlocks + blockSize * 8 - 1) / (blockSize * 8); 587 588 fSuperBlock.log_blocks = ToBlockRun(bitmapBlocks + 1); 589 fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(logSize); 590 fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64( 591 ToBlock(Log())); 592 593 // set the current log pointers, so that journaling will work correctly 594 fLogStart = fSuperBlock.LogStart(); 595 fLogEnd = fSuperBlock.LogEnd(); 596 597 if (!IsValidSuperBlock()) 598 RETURN_ERROR(B_ERROR); 599 600 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL) 601 return B_ERROR; 602 603 fJournal = new(std::nothrow) Journal(this); 604 if (fJournal == NULL || fJournal->InitCheck() < B_OK) 605 RETURN_ERROR(B_ERROR); 606 607 // ready to write data to disk 608 609 Transaction transaction(this, 0); 610 611 if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK) 612 RETURN_ERROR(B_ERROR); 613 614 off_t id; 615 status_t status = Inode::Create(transaction, NULL, NULL, 616 S_DIRECTORY | 0755, 0, 0, NULL, &id, &fRootNode); 617 if (status < B_OK) 618 RETURN_ERROR(status); 619 620 fSuperBlock.root_dir = ToBlockRun(id); 621 622 if ((flags & VOLUME_NO_INDICES) == 0) { 623 // The indices root directory will be created automatically 624 // when the standard indices are created (or any other). 625 Index index(this); 626 status = index.Create(transaction, "name", B_STRING_TYPE); 627 if (status < B_OK) 628 return status; 629 630 status = index.Create(transaction, "BEOS:APP_SIG", B_STRING_TYPE); 631 if (status < B_OK) 632 return status; 633 634 status = index.Create(transaction, "last_modified", B_INT64_TYPE); 635 if (status < B_OK) 636 return status; 637 638 status = index.Create(transaction, "size", B_INT64_TYPE); 639 if (status < B_OK) 640 return status; 641 } 642 643 status = CreateVolumeID(transaction); 644 if (status < B_OK) 645 return status; 646 647 status = _EraseUnusedBootBlock(); 648 if (status < B_OK) 649 return status; 650 651 status = WriteSuperBlock(); 652 if (status < B_OK) 653 return status; 654 655 status = transaction.Done(); 656 if (status < B_OK) 657 return status; 658 659 Sync(); 660 opener.RemoveCache(true); 661 return B_OK; 662 } 663 664 665 /*! Erase the first boot block, as we don't use it and there 666 * might be leftovers from other file systems. This can cause 667 * confusion for identifying the partition if not erased. 668 */ 669 status_t 670 Volume::_EraseUnusedBootBlock() 671 { 672 const int32 blockSize = 512; 673 const char emptySector[blockSize] = { 0 }; 674 // Erase boot block if any 675 if (write_pos(fDevice, 0, emptySector, blockSize) != blockSize) 676 return B_IO_ERROR; 677 // Erase ext2 superblock if any 678 if (write_pos(fDevice, 1024, emptySector, blockSize) != blockSize) 679 return B_IO_ERROR; 680 681 return B_OK; 682 } 683