1 /* 2 * Copyright 2001-2019, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 7 //! superblock, mounting, etc. 8 9 10 #include "Attribute.h" 11 #include "CheckVisitor.h" 12 #include "Debug.h" 13 #include "file_systems/DeviceOpener.h" 14 #include "Inode.h" 15 #include "Journal.h" 16 #include "Query.h" 17 #include "Volume.h" 18 19 20 static const int32 kDesiredAllocationGroups = 56; 21 // This is the number of allocation groups that will be tried 22 // to be given for newly initialized disks. 23 // That's only relevant for smaller disks, though, since any 24 // of today's disk sizes already reach the maximum length 25 // of an allocation group (65536 blocks). 26 // It seems to create appropriate numbers for smaller disks 27 // with this setting, though (i.e. you can create a 400 MB 28 // file on a 1 GB disk without the need for double indirect 29 // blocks). 30 31 32 // #pragma mark - 33 34 35 bool 36 disk_super_block::IsValid() const 37 { 38 if (Magic1() != (int32)SUPER_BLOCK_MAGIC1 39 || Magic2() != (int32)SUPER_BLOCK_MAGIC2 40 || Magic3() != (int32)SUPER_BLOCK_MAGIC3 41 || (int32)block_size != inode_size 42 || ByteOrder() != SUPER_BLOCK_FS_LENDIAN 43 || (1UL << BlockShift()) != BlockSize() 44 || AllocationGroups() < 1 45 || AllocationGroupShift() < 1 46 || BlocksPerAllocationGroup() < 1 47 || NumBlocks() < 10 48 || AllocationGroups() != divide_roundup(NumBlocks(), 49 1L << AllocationGroupShift())) 50 return false; 51 52 return true; 53 } 54 55 56 void 57 disk_super_block::Initialize(const char* diskName, off_t numBlocks, 58 uint32 blockSize) 59 { 60 memset(this, 0, sizeof(disk_super_block)); 61 62 magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1); 63 magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2); 64 magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3); 65 fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN); 66 flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN); 67 68 strlcpy(name, diskName, sizeof(name)); 69 70 int32 blockShift = 9; 71 while ((1UL << blockShift) < blockSize) { 72 blockShift++; 73 } 74 75 block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize); 76 block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift); 77 78 num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks); 79 used_blocks = 0; 80 81 // Get the minimum ag_shift (that's determined by the block size) 82 83 int32 bitsPerBlock = blockSize << 3; 84 off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock; 85 int32 blocksPerGroup = 1; 86 int32 groupShift = 13; 87 88 for (int32 i = 8192; i < bitsPerBlock; i *= 2) { 89 groupShift++; 90 } 91 92 // Many allocation groups help applying allocation policies, but if 93 // they are too small, we will need to many block_runs to cover large 94 // files (see above to get an explanation of the kDesiredAllocationGroups 95 // constant). 96 97 int32 numGroups; 98 99 while (true) { 100 numGroups = (bitmapBlocks + blocksPerGroup - 1) / blocksPerGroup; 101 if (numGroups > kDesiredAllocationGroups) { 102 if (groupShift == 16) 103 break; 104 105 groupShift++; 106 blocksPerGroup *= 2; 107 } else 108 break; 109 } 110 111 num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups); 112 blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(blocksPerGroup); 113 ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift); 114 } 115 116 117 // #pragma mark - 118 119 120 Volume::Volume(fs_volume* volume) 121 : 122 fVolume(volume), 123 fBlockAllocator(this), 124 fRootNode(NULL), 125 fIndicesNode(NULL), 126 fDirtyCachedBlocks(0), 127 fFlags(0), 128 fCheckingThread(-1), 129 fCheckVisitor(NULL) 130 { 131 mutex_init(&fLock, "bfs volume"); 132 mutex_init(&fQueryLock, "bfs queries"); 133 } 134 135 136 Volume::~Volume() 137 { 138 mutex_destroy(&fQueryLock); 139 mutex_destroy(&fLock); 140 } 141 142 143 bool 144 Volume::IsValidSuperBlock() const 145 { 146 return fSuperBlock.IsValid(); 147 } 148 149 150 /*! Checks whether the given block number may be the location of an inode block. 151 */ 152 bool 153 Volume::IsValidInodeBlock(off_t block) const 154 { 155 return block > fSuperBlock.LogEnd() && block < NumBlocks(); 156 } 157 158 159 void 160 Volume::Panic() 161 { 162 FATAL(("Disk corrupted... switch to read-only mode!\n")); 163 fFlags |= VOLUME_READ_ONLY; 164 #if KDEBUG 165 kernel_debugger("BFS panics!"); 166 #endif 167 } 168 169 170 status_t 171 Volume::Mount(const char* deviceName, uint32 flags) 172 { 173 // TODO: validate the FS in write mode as well! 174 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \ 175 || (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY)) 176 // in big endian mode, we only mount read-only for now 177 flags |= B_MOUNT_READ_ONLY; 178 #endif 179 180 DeviceOpener opener(deviceName, (flags & B_MOUNT_READ_ONLY) != 0 181 ? O_RDONLY : O_RDWR); 182 fDevice = opener.Device(); 183 if (fDevice < B_OK) 184 RETURN_ERROR(fDevice); 185 186 if (opener.IsReadOnly()) 187 fFlags |= VOLUME_READ_ONLY; 188 189 // read the superblock 190 if (Identify(fDevice, &fSuperBlock) != B_OK) { 191 FATAL(("invalid superblock!\n")); 192 return B_BAD_VALUE; 193 } 194 195 // initialize short hands to the superblock (to save byte swapping) 196 fBlockSize = fSuperBlock.BlockSize(); 197 fBlockShift = fSuperBlock.BlockShift(); 198 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 199 200 // check if the device size is large enough to hold the file system 201 off_t diskSize; 202 if (opener.GetSize(&diskSize, &fDeviceBlockSize) != B_OK) 203 RETURN_ERROR(B_ERROR); 204 if (diskSize < (NumBlocks() << BlockShift())) { 205 FATAL(("Disk size (%" B_PRIdOFF " bytes) < file system size (%" 206 B_PRIdOFF " bytes)!\n", diskSize, NumBlocks() << BlockShift())); 207 RETURN_ERROR(B_BAD_VALUE); 208 } 209 210 // set the current log pointers, so that journaling will work correctly 211 fLogStart = fSuperBlock.LogStart(); 212 fLogEnd = fSuperBlock.LogEnd(); 213 214 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL) 215 return B_ERROR; 216 217 fJournal = new(std::nothrow) Journal(this); 218 if (fJournal == NULL) 219 return B_NO_MEMORY; 220 221 status_t status = fJournal->InitCheck(); 222 if (status < B_OK) { 223 FATAL(("could not initialize journal: %s!\n", strerror(status))); 224 return status; 225 } 226 227 // replaying the log is the first thing we will do on this disk 228 status = fJournal->ReplayLog(); 229 if (status != B_OK) { 230 FATAL(("Replaying log failed, data may be corrupted, volume " 231 "read-only.\n")); 232 fFlags |= VOLUME_READ_ONLY; 233 // TODO: if this is the boot volume, Bootscript will assume this 234 // is a CD... 235 // TODO: it would be nice to have a user visible alert instead 236 // of letting him just find this in the syslog. 237 } 238 239 status = fBlockAllocator.Initialize(); 240 if (status != B_OK) { 241 FATAL(("could not initialize block bitmap allocator!\n")); 242 return status; 243 } 244 245 fRootNode = new(std::nothrow) Inode(this, ToVnode(Root())); 246 if (fRootNode != NULL && fRootNode->InitCheck() == B_OK) { 247 status = publish_vnode(fVolume, ToVnode(Root()), (void*)fRootNode, 248 &gBFSVnodeOps, fRootNode->Mode(), 0); 249 if (status == B_OK) { 250 // try to get indices root dir 251 252 if (!Indices().IsZero()) { 253 fIndicesNode = new(std::nothrow) Inode(this, 254 ToVnode(Indices())); 255 } 256 257 if (fIndicesNode == NULL 258 || fIndicesNode->InitCheck() < B_OK 259 || !fIndicesNode->IsContainer()) { 260 INFORM(("bfs: volume doesn't have indices!\n")); 261 262 if (fIndicesNode) { 263 // if this is the case, the index root node is gone bad, 264 // and BFS switch to read-only mode 265 fFlags |= VOLUME_READ_ONLY; 266 delete fIndicesNode; 267 fIndicesNode = NULL; 268 } 269 } else { 270 // we don't use the vnode layer to access the indices node 271 } 272 } else { 273 FATAL(("could not create root node: publish_vnode() failed!\n")); 274 delete fRootNode; 275 return status; 276 } 277 } else { 278 status = B_BAD_VALUE; 279 FATAL(("could not create root node!\n")); 280 281 // We need to wait for the block allocator to finish 282 fBlockAllocator.Uninitialize(); 283 return status; 284 } 285 286 // all went fine 287 opener.Keep(); 288 return B_OK; 289 } 290 291 292 status_t 293 Volume::Unmount() 294 { 295 put_vnode(fVolume, ToVnode(Root())); 296 297 fBlockAllocator.Uninitialize(); 298 299 // This will also flush the log & all blocks to disk 300 delete fJournal; 301 fJournal = NULL; 302 303 delete fIndicesNode; 304 305 block_cache_delete(fBlockCache, !IsReadOnly()); 306 close(fDevice); 307 308 return B_OK; 309 } 310 311 312 status_t 313 Volume::Sync() 314 { 315 return fJournal->FlushLogAndBlocks(); 316 } 317 318 319 status_t 320 Volume::ValidateBlockRun(block_run run) 321 { 322 if (run.AllocationGroup() < 0 323 || run.AllocationGroup() > (int32)AllocationGroups() 324 || run.Start() > (1UL << AllocationGroupShift()) 325 || run.length == 0 326 || uint32(run.Length() + run.Start()) 327 > (1UL << AllocationGroupShift())) { 328 Panic(); 329 FATAL(("*** invalid run(%d,%d,%d)\n", (int)run.AllocationGroup(), 330 run.Start(), run.Length())); 331 return B_BAD_DATA; 332 } 333 return B_OK; 334 } 335 336 337 block_run 338 Volume::ToBlockRun(off_t block) const 339 { 340 block_run run; 341 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32( 342 block >> AllocationGroupShift()); 343 run.start = HOST_ENDIAN_TO_BFS_INT16( 344 block & ((1LL << AllocationGroupShift()) - 1)); 345 run.length = HOST_ENDIAN_TO_BFS_INT16(1); 346 return run; 347 } 348 349 350 status_t 351 Volume::CreateIndicesRoot(Transaction& transaction) 352 { 353 off_t id; 354 status_t status = Inode::Create(transaction, NULL, NULL, 355 S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, NULL, &id, 356 &fIndicesNode, NULL, BFS_DO_NOT_PUBLISH_VNODE); 357 if (status < B_OK) 358 RETURN_ERROR(status); 359 360 fSuperBlock.indices = ToBlockRun(id); 361 return WriteSuperBlock(); 362 } 363 364 365 status_t 366 Volume::CreateVolumeID(Transaction& transaction) 367 { 368 Attribute attr(fRootNode); 369 status_t status; 370 attr_cookie* cookie; 371 status = attr.Create("be:volume_id", B_UINT64_TYPE, O_RDWR, &cookie); 372 if (status == B_OK) { 373 static bool seeded = false; 374 if (!seeded) { 375 // seed the random number generator for the be:volume_id attribute. 376 srand(time(NULL)); 377 seeded = true; 378 } 379 uint64_t id; 380 size_t length = sizeof(id); 381 id = ((uint64_t)rand() << 32) | rand(); 382 attr.Write(transaction, cookie, 0, (uint8_t *)&id, &length, NULL); 383 } 384 return status; 385 } 386 387 388 389 status_t 390 Volume::AllocateForInode(Transaction& transaction, const Inode* parent, 391 mode_t type, block_run& run) 392 { 393 return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), 394 type, run); 395 } 396 397 398 status_t 399 Volume::WriteSuperBlock() 400 { 401 if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) 402 != sizeof(disk_super_block)) 403 return B_IO_ERROR; 404 405 return B_OK; 406 } 407 408 409 void 410 Volume::UpdateLiveQueries(Inode* inode, const char* attribute, int32 type, 411 const uint8* oldKey, size_t oldLength, const uint8* newKey, 412 size_t newLength) 413 { 414 MutexLocker _(fQueryLock); 415 416 DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator(); 417 while (iterator.HasNext()) { 418 Query* query = iterator.Next(); 419 query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, 420 newLength); 421 } 422 } 423 424 425 void 426 Volume::UpdateLiveQueriesRenameMove(Inode* inode, ino_t oldDirectoryID, 427 const char* oldName, ino_t newDirectoryID, const char* newName) 428 { 429 MutexLocker _(fQueryLock); 430 431 size_t oldLength = strlen(oldName); 432 size_t newLength = strlen(newName); 433 434 DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator(); 435 while (iterator.HasNext()) { 436 Query* query = iterator.Next(); 437 query->LiveUpdateRenameMove(inode, oldDirectoryID, oldName, oldLength, 438 newDirectoryID, newName, newLength); 439 } 440 } 441 442 443 /*! Checks if there is a live query whose results depend on the presence 444 or value of the specified attribute. 445 Don't use it if you already have all the data together to evaluate 446 the queries - it wouldn't safe you anything in this case. 447 */ 448 bool 449 Volume::CheckForLiveQuery(const char* attribute) 450 { 451 // TODO: check for a live query that depends on the specified attribute 452 return true; 453 } 454 455 456 void 457 Volume::AddQuery(Query* query) 458 { 459 MutexLocker _(fQueryLock); 460 fQueries.Add(query); 461 } 462 463 464 void 465 Volume::RemoveQuery(Query* query) 466 { 467 MutexLocker _(fQueryLock); 468 fQueries.Remove(query); 469 } 470 471 472 status_t 473 Volume::CreateCheckVisitor() 474 { 475 if (fCheckVisitor != NULL) 476 return B_BUSY; 477 478 fCheckVisitor = new(std::nothrow) ::CheckVisitor(this); 479 if (fCheckVisitor == NULL) 480 return B_NO_MEMORY; 481 482 return B_OK; 483 } 484 485 486 void 487 Volume::DeleteCheckVisitor() 488 { 489 delete fCheckVisitor; 490 fCheckVisitor = NULL; 491 } 492 493 494 // #pragma mark - Disk scanning and initialization 495 496 497 /*static*/ status_t 498 Volume::CheckSuperBlock(const uint8* data, uint32* _offset) 499 { 500 disk_super_block* superBlock = (disk_super_block*)(data + 512); 501 if (superBlock->IsValid()) { 502 if (_offset != NULL) 503 *_offset = 512; 504 return B_OK; 505 } 506 507 #ifndef BFS_LITTLE_ENDIAN_ONLY 508 // For PPC, the superblock might be located at offset 0 509 superBlock = (disk_super_block*)data; 510 if (superBlock->IsValid()) { 511 if (_offset != NULL) 512 *_offset = 0; 513 return B_OK; 514 } 515 #endif 516 517 return B_BAD_VALUE; 518 } 519 520 521 /*static*/ status_t 522 Volume::Identify(int fd, disk_super_block* superBlock) 523 { 524 uint8 buffer[1024]; 525 if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer)) 526 return B_IO_ERROR; 527 528 uint32 offset; 529 if (CheckSuperBlock(buffer, &offset) != B_OK) 530 return B_BAD_VALUE; 531 532 memcpy(superBlock, buffer + offset, sizeof(disk_super_block)); 533 return B_OK; 534 } 535 536 537 status_t 538 Volume::Initialize(int fd, const char* name, uint32 blockSize, 539 uint32 flags) 540 { 541 // although there is no really good reason for it, we won't 542 // accept '/' in disk names (mkbfs does this, too - and since 543 // Tracker names mounted volumes like their name) 544 if (strchr(name, '/') != NULL) 545 return B_BAD_VALUE; 546 547 if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 548 && blockSize != 8192) 549 return B_BAD_VALUE; 550 551 DeviceOpener opener(fd, O_RDWR); 552 if (opener.Device() < B_OK) 553 return B_BAD_VALUE; 554 555 if (opener.IsReadOnly()) 556 return B_READ_ONLY_DEVICE; 557 558 fDevice = opener.Device(); 559 560 uint32 deviceBlockSize; 561 off_t deviceSize; 562 if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK) 563 return B_ERROR; 564 565 off_t numBlocks = deviceSize / blockSize; 566 567 // create valid superblock 568 569 fSuperBlock.Initialize(name, numBlocks, blockSize); 570 571 // initialize short hands to the superblock (to save byte swapping) 572 fBlockSize = fSuperBlock.BlockSize(); 573 fBlockShift = fSuperBlock.BlockShift(); 574 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 575 576 // determine log size depending on the size of the volume 577 off_t logSize = 2048; 578 if (numBlocks <= 20480) 579 logSize = 512; 580 if (deviceSize > 1LL * 1024 * 1024 * 1024) 581 logSize = 4096; 582 583 // since the allocator has not been initialized yet, we 584 // cannot use BlockAllocator::BitmapSize() here 585 off_t bitmapBlocks = (numBlocks + blockSize * 8 - 1) / (blockSize * 8); 586 587 fSuperBlock.log_blocks = ToBlockRun(bitmapBlocks + 1); 588 fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(logSize); 589 fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64( 590 ToBlock(Log())); 591 592 // set the current log pointers, so that journaling will work correctly 593 fLogStart = fSuperBlock.LogStart(); 594 fLogEnd = fSuperBlock.LogEnd(); 595 596 if (!IsValidSuperBlock()) 597 RETURN_ERROR(B_ERROR); 598 599 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL) 600 return B_ERROR; 601 602 fJournal = new(std::nothrow) Journal(this); 603 if (fJournal == NULL || fJournal->InitCheck() < B_OK) 604 RETURN_ERROR(B_ERROR); 605 606 // ready to write data to disk 607 608 Transaction transaction(this, 0); 609 610 if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK) 611 RETURN_ERROR(B_ERROR); 612 613 off_t id; 614 status_t status = Inode::Create(transaction, NULL, NULL, 615 S_DIRECTORY | 0755, 0, 0, NULL, &id, &fRootNode); 616 if (status < B_OK) 617 RETURN_ERROR(status); 618 619 fSuperBlock.root_dir = ToBlockRun(id); 620 621 if ((flags & VOLUME_NO_INDICES) == 0) { 622 // The indices root directory will be created automatically 623 // when the standard indices are created (or any other). 624 Index index(this); 625 status = index.Create(transaction, "name", B_STRING_TYPE); 626 if (status < B_OK) 627 return status; 628 629 status = index.Create(transaction, "BEOS:APP_SIG", B_STRING_TYPE); 630 if (status < B_OK) 631 return status; 632 633 status = index.Create(transaction, "last_modified", B_INT64_TYPE); 634 if (status < B_OK) 635 return status; 636 637 status = index.Create(transaction, "size", B_INT64_TYPE); 638 if (status < B_OK) 639 return status; 640 } 641 642 status = CreateVolumeID(transaction); 643 if (status < B_OK) 644 return status; 645 646 status = _EraseUnusedBootBlock(); 647 if (status < B_OK) 648 return status; 649 650 status = WriteSuperBlock(); 651 if (status < B_OK) 652 return status; 653 654 status = transaction.Done(); 655 if (status < B_OK) 656 return status; 657 658 Sync(); 659 opener.RemoveCache(true); 660 return B_OK; 661 } 662 663 664 /*! Erase the first boot block, as we don't use it and there 665 * might be leftovers from other file systems. This can cause 666 * confusion for identifying the partition if not erased. 667 */ 668 status_t 669 Volume::_EraseUnusedBootBlock() 670 { 671 const int32 blockSize = 512; 672 const char emptySector[blockSize] = { 0 }; 673 // Erase boot block if any 674 if (write_pos(fDevice, 0, emptySector, blockSize) != blockSize) 675 return B_IO_ERROR; 676 // Erase ext2 superblock if any 677 if (write_pos(fDevice, 1024, emptySector, blockSize) != blockSize) 678 return B_IO_ERROR; 679 680 return B_OK; 681 } 682