1 /* 2 * Copyright 2001-2007, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 //! super block, mounting, etc. 7 8 9 #include "Debug.h" 10 #include "Volume.h" 11 #include "Journal.h" 12 #include "Inode.h" 13 #include "Query.h" 14 15 16 static const int32 kDesiredAllocationGroups = 56; 17 // This is the number of allocation groups that will be tried 18 // to be given for newly initialized disks. 19 // That's only relevant for smaller disks, though, since any 20 // of today's disk sizes already reach the maximum length 21 // of an allocation group (65536 blocks). 22 // It seems to create appropriate numbers for smaller disks 23 // with this setting, though (i.e. you can create a 400 MB 24 // file on a 1 GB disk without the need for double indirect 25 // blocks). 26 27 28 class DeviceOpener { 29 public: 30 DeviceOpener(const char *device, int mode); 31 ~DeviceOpener(); 32 33 int Open(const char *device, int mode); 34 void *InitCache(off_t numBlocks, uint32 blockSize); 35 void RemoveCache(bool allowWrites); 36 37 void Keep(); 38 39 int Device() const { return fDevice; } 40 int Mode() const { return fMode; } 41 42 status_t GetSize(off_t *_size, uint32 *_blockSize = NULL); 43 44 private: 45 int fDevice; 46 int fMode; 47 void *fBlockCache; 48 }; 49 50 51 DeviceOpener::DeviceOpener(const char *device, int mode) 52 : 53 fBlockCache(NULL) 54 { 55 Open(device, mode); 56 } 57 58 59 DeviceOpener::~DeviceOpener() 60 { 61 if (fDevice >= B_OK) { 62 RemoveCache(false); 63 close(fDevice); 64 } 65 } 66 67 68 int 69 DeviceOpener::Open(const char *device, int mode) 70 { 71 fDevice = open(device, mode); 72 if (fDevice < 0) 73 fDevice = errno; 74 75 if (fDevice < 0 && mode == O_RDWR) { 76 // try again to open read-only (don't rely on a specific error code) 77 return Open(device, O_RDONLY); 78 } 79 80 if (fDevice >= 0) { 81 // opening succeeded 82 fMode = mode; 83 if (mode == O_RDWR) { 84 // check out if the device really allows for read/write access 85 device_geometry geometry; 86 if (!ioctl(fDevice, B_GET_GEOMETRY, &geometry)) { 87 if (geometry.read_only) { 88 // reopen device read-only 89 close(fDevice); 90 return Open(device, O_RDONLY); 91 } 92 } 93 } 94 } 95 96 return fDevice; 97 } 98 99 100 void * 101 DeviceOpener::InitCache(off_t numBlocks, uint32 blockSize) 102 { 103 return block_cache_create(fDevice, numBlocks, blockSize, fMode == O_RDONLY); 104 } 105 106 107 void 108 DeviceOpener::RemoveCache(bool allowWrites) 109 { 110 if (fBlockCache == NULL) 111 return; 112 113 block_cache_delete(fBlockCache, allowWrites); 114 fBlockCache = NULL; 115 } 116 117 118 void 119 DeviceOpener::Keep() 120 { 121 fDevice = -1; 122 } 123 124 125 /** Returns the size of the device in bytes. It uses B_GET_GEOMETRY 126 * to compute the size, or fstat() if that failed. 127 */ 128 129 status_t 130 DeviceOpener::GetSize(off_t *_size, uint32 *_blockSize) 131 { 132 device_geometry geometry; 133 if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) { 134 // maybe it's just a file 135 struct stat stat; 136 if (fstat(fDevice, &stat) < 0) 137 return B_ERROR; 138 139 if (_size) 140 *_size = stat.st_size; 141 if (_blockSize) // that shouldn't cause us any problems 142 *_blockSize = 512; 143 144 return B_OK; 145 } 146 147 if (_size) { 148 *_size = 1LL * geometry.head_count * geometry.cylinder_count 149 * geometry.sectors_per_track * geometry.bytes_per_sector; 150 } 151 if (_blockSize) 152 *_blockSize = geometry.bytes_per_sector; 153 154 return B_OK; 155 } 156 157 158 // #pragma mark - 159 160 161 bool 162 disk_super_block::IsValid() 163 { 164 if (Magic1() != (int32)SUPER_BLOCK_MAGIC1 165 || Magic2() != (int32)SUPER_BLOCK_MAGIC2 166 || Magic3() != (int32)SUPER_BLOCK_MAGIC3 167 || (int32)block_size != inode_size 168 || ByteOrder() != SUPER_BLOCK_FS_LENDIAN 169 || (1UL << BlockShift()) != BlockSize() 170 || AllocationGroups() < 1 171 || AllocationGroupShift() < 1 172 || BlocksPerAllocationGroup() < 1 173 || NumBlocks() < 10 174 || AllocationGroups() != divide_roundup(NumBlocks(), 175 1L << AllocationGroupShift())) 176 return false; 177 178 return true; 179 } 180 181 182 void 183 disk_super_block::Initialize(const char *diskName, off_t numBlocks, uint32 blockSize) 184 { 185 memset(this, 0, sizeof(disk_super_block)); 186 187 magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1); 188 magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2); 189 magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3); 190 fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN); 191 flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN); 192 193 strlcpy(name, diskName, sizeof(name)); 194 195 int32 blockShift = 9; 196 while ((1UL << blockShift) < blockSize) { 197 blockShift++; 198 } 199 200 block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize); 201 block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift); 202 203 num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks); 204 used_blocks = 0; 205 206 // Get the minimum ag_shift (that's determined by the block size) 207 208 int32 bitsPerBlock = blockSize << 3; 209 off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock; 210 int32 blocksPerGroup = 1; 211 int32 groupShift = 13; 212 213 for (int32 i = 8192; i < bitsPerBlock; i *= 2) { 214 groupShift++; 215 } 216 217 // Many allocation groups help applying allocation policies, but if 218 // they are too small, we will need to many block_runs to cover large 219 // files (see above to get an explanation of the kDesiredAllocationGroups 220 // constant). 221 222 int32 numGroups; 223 224 while (true) { 225 numGroups = (bitmapBlocks + blocksPerGroup - 1) / blocksPerGroup; 226 if (numGroups > kDesiredAllocationGroups) { 227 if (groupShift == 16) 228 break; 229 230 groupShift++; 231 blocksPerGroup *= 2; 232 } else 233 break; 234 } 235 236 num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups); 237 blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(blocksPerGroup); 238 ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift); 239 } 240 241 242 // #pragma mark - 243 244 245 Volume::Volume(mount_id id) 246 : 247 fID(id), 248 fBlockAllocator(this), 249 fLock("bfs volume"), 250 fRootNode(NULL), 251 fIndicesNode(NULL), 252 fDirtyCachedBlocks(0), 253 fUniqueID(0), 254 fFlags(0) 255 { 256 } 257 258 259 Volume::~Volume() 260 { 261 } 262 263 264 bool 265 Volume::IsValidSuperBlock() 266 { 267 return fSuperBlock.IsValid(); 268 } 269 270 271 void 272 Volume::Panic() 273 { 274 FATAL(("we have to panic... switch to read-only mode!\n")); 275 fFlags |= VOLUME_READ_ONLY; 276 #ifdef DEBUG 277 kernel_debugger("BFS panics!"); 278 #endif 279 } 280 281 282 status_t 283 Volume::Mount(const char *deviceName, uint32 flags) 284 { 285 // ToDo: validate the FS in write mode as well! 286 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \ 287 || (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY)) 288 // in big endian mode, we only mount read-only for now 289 flags |= B_MOUNT_READ_ONLY; 290 #endif 291 292 DeviceOpener opener(deviceName, flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR); 293 fDevice = opener.Device(); 294 if (fDevice < B_OK) 295 RETURN_ERROR(fDevice); 296 297 if (opener.Mode() == O_RDONLY) 298 fFlags |= VOLUME_READ_ONLY; 299 300 // check if it's a regular file, and if so, disable the cache for the 301 // underlaying file system 302 struct stat stat; 303 if (fstat(fDevice, &stat) < 0) 304 RETURN_ERROR(B_ERROR); 305 306 // TODO: allow turning off caching of the underlying file (once O_NOCACHE works) 307 #if 0 308 #ifndef NO_FILE_UNCACHED_IO 309 if ((stat.st_mode & S_FILE) != 0 && ioctl(fDevice, IOCTL_FILE_UNCACHED_IO, NULL) < 0) { 310 // mount read-only if the cache couldn't be disabled 311 # ifdef DEBUG 312 FATAL(("couldn't disable cache for image file - system may dead-lock!\n")); 313 # else 314 FATAL(("couldn't disable cache for image file!\n")); 315 Panic(); 316 # endif 317 } 318 #endif 319 #endif 320 321 // read the super block 322 if (Identify(fDevice, &fSuperBlock) != B_OK) { 323 FATAL(("invalid super block!\n")); 324 return B_BAD_VALUE; 325 } 326 327 // initialize short hands to the super block (to save byte swapping) 328 fBlockSize = fSuperBlock.BlockSize(); 329 fBlockShift = fSuperBlock.BlockShift(); 330 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 331 332 // check if the device size is large enough to hold the file system 333 off_t diskSize; 334 if (opener.GetSize(&diskSize) < B_OK) 335 RETURN_ERROR(B_ERROR); 336 if (diskSize < (NumBlocks() << BlockShift())) 337 RETURN_ERROR(B_BAD_VALUE); 338 339 // set the current log pointers, so that journaling will work correctly 340 fLogStart = fSuperBlock.LogStart(); 341 fLogEnd = fSuperBlock.LogEnd(); 342 343 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL) 344 return B_ERROR; 345 346 fJournal = new Journal(this); 347 // replaying the log is the first thing we will do on this disk 348 if (fJournal && fJournal->InitCheck() < B_OK 349 || fBlockAllocator.Initialize() < B_OK) { 350 // ToDo: improve error reporting for a bad journal 351 FATAL(("could not initialize journal/block bitmap allocator!\n")); 352 return B_NO_MEMORY; 353 } 354 355 status_t status = B_OK; 356 357 fRootNode = new Inode(this, ToVnode(Root())); 358 if (fRootNode && fRootNode->InitCheck() == B_OK) { 359 status = publish_vnode(fID, ToVnode(Root()), (void *)fRootNode); 360 if (status == B_OK) { 361 // try to get indices root dir 362 363 // question: why doesn't get_vnode() work here?? 364 // answer: we have not yet backpropagated the pointer to the 365 // volume in bfs_mount(), so bfs_read_vnode() can't get it. 366 // But it's not needed to do that anyway. 367 368 if (!Indices().IsZero()) 369 fIndicesNode = new Inode(this, ToVnode(Indices())); 370 371 if (fIndicesNode == NULL 372 || fIndicesNode->InitCheck() < B_OK 373 || !fIndicesNode->IsContainer()) { 374 INFORM(("bfs: volume doesn't have indices!\n")); 375 376 if (fIndicesNode) { 377 // if this is the case, the index root node is gone bad, and 378 // BFS switch to read-only mode 379 fFlags |= VOLUME_READ_ONLY; 380 delete fIndicesNode; 381 fIndicesNode = NULL; 382 } 383 } 384 385 // all went fine 386 opener.Keep(); 387 return B_OK; 388 } else 389 FATAL(("could not create root node: publish_vnode() failed!\n")); 390 391 delete fRootNode; 392 } else { 393 status = B_BAD_VALUE; 394 FATAL(("could not create root node!\n")); 395 } 396 397 return status; 398 } 399 400 401 status_t 402 Volume::Unmount() 403 { 404 // Unlike in BeOS, we need to put the reference to our root node ourselves 405 put_vnode(fID, ToVnode(Root())); 406 407 // This will also flush the log & all blocks to disk 408 delete fJournal; 409 fJournal = NULL; 410 411 delete fIndicesNode; 412 413 block_cache_delete(fBlockCache, !IsReadOnly()); 414 close(fDevice); 415 416 return B_OK; 417 } 418 419 420 status_t 421 Volume::Sync() 422 { 423 return fJournal->FlushLogAndBlocks(); 424 } 425 426 427 status_t 428 Volume::ValidateBlockRun(block_run run) 429 { 430 if (run.AllocationGroup() < 0 || run.AllocationGroup() > (int32)AllocationGroups() 431 || run.Start() > (1UL << AllocationGroupShift()) 432 || run.length == 0 433 || uint32(run.Length() + run.Start()) > (1UL << AllocationGroupShift())) { 434 Panic(); 435 FATAL(("*** invalid run(%d,%d,%d)\n", (int)run.AllocationGroup(), run.Start(), run.Length())); 436 return B_BAD_DATA; 437 } 438 return B_OK; 439 } 440 441 442 block_run 443 Volume::ToBlockRun(off_t block) const 444 { 445 block_run run; 446 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(block >> AllocationGroupShift()); 447 run.start = HOST_ENDIAN_TO_BFS_INT16(block & ((1LL << AllocationGroupShift()) - 1)); 448 run.length = HOST_ENDIAN_TO_BFS_INT16(1); 449 return run; 450 } 451 452 453 status_t 454 Volume::CreateIndicesRoot(Transaction &transaction) 455 { 456 off_t id; 457 status_t status = Inode::Create(transaction, NULL, NULL, 458 S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, NULL, &id, 459 &fIndicesNode); 460 if (status < B_OK) 461 RETURN_ERROR(status); 462 463 fSuperBlock.indices = ToBlockRun(id); 464 return WriteSuperBlock(); 465 } 466 467 468 status_t 469 Volume::AllocateForInode(Transaction &transaction, const Inode *parent, mode_t type, block_run &run) 470 { 471 return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), type, run); 472 } 473 474 475 status_t 476 Volume::WriteSuperBlock() 477 { 478 if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) != sizeof(disk_super_block)) 479 return B_IO_ERROR; 480 481 return B_OK; 482 } 483 484 485 void 486 Volume::UpdateLiveQueries(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 487 size_t oldLength, const uint8 *newKey, size_t newLength) 488 { 489 if (fQueryLock.Lock() < B_OK) 490 return; 491 492 Query *query = NULL; 493 while ((query = fQueries.Next(query)) != NULL) 494 query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, newLength); 495 496 fQueryLock.Unlock(); 497 } 498 499 500 /*! 501 Checks if there is a live query whose results depend on the presence 502 or value of the specified attribute. 503 Don't use it if you already have all the data together to evaluate 504 the queries - it wouldn't safe you anything in this case. 505 */ 506 bool 507 Volume::CheckForLiveQuery(const char *attribute) 508 { 509 // ToDo: check for a live query that depends on the specified attribute 510 return true; 511 } 512 513 514 void 515 Volume::AddQuery(Query *query) 516 { 517 if (fQueryLock.Lock() < B_OK) 518 return; 519 520 fQueries.Add(query); 521 522 fQueryLock.Unlock(); 523 } 524 525 526 void 527 Volume::RemoveQuery(Query *query) 528 { 529 if (fQueryLock.Lock() < B_OK) 530 return; 531 532 fQueries.Remove(query); 533 534 fQueryLock.Unlock(); 535 } 536 537 538 // #pragma mark - Disk scanning and initialization 539 540 541 status_t 542 Volume::Identify(int fd, disk_super_block *superBlock) 543 { 544 char buffer[1024]; 545 if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer)) 546 return B_IO_ERROR; 547 548 // Note: that does work only for x86, for PowerPC, the super block 549 // may be located at offset 0! 550 memcpy(superBlock, buffer + 512, sizeof(disk_super_block)); 551 if (!superBlock->IsValid()) { 552 #ifndef BFS_LITTLE_ENDIAN_ONLY 553 memcpy(superBlock, buffer, sizeof(disk_super_block)); 554 if (!superBlock->IsValid()) 555 return B_BAD_VALUE; 556 #else 557 return B_BAD_VALUE; 558 #endif 559 } 560 561 return B_OK; 562 } 563 564 565 status_t 566 Volume::Initialize(const char *device, const char *name, uint32 blockSize, 567 uint32 flags) 568 { 569 // although there is no really good reason for it, we won't 570 // accept '/' in disk names (mkbfs does this, too - and since 571 // Tracker names mounted volumes like their name) 572 if (strchr(name, '/') != NULL) 573 return B_BAD_VALUE; 574 575 if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 576 && blockSize != 8192) 577 return B_BAD_VALUE; 578 579 DeviceOpener opener(device, O_RDWR); 580 if (opener.Device() < B_OK) 581 return B_BAD_VALUE; 582 583 fDevice = opener.Device(); 584 585 uint32 deviceBlockSize; 586 off_t deviceSize; 587 if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK) 588 return B_ERROR; 589 590 off_t numBlocks = deviceSize / blockSize; 591 592 // create valid super block 593 594 fSuperBlock.Initialize(name, numBlocks, blockSize); 595 596 // initialize short hands to the super block (to save byte swapping) 597 fBlockSize = fSuperBlock.BlockSize(); 598 fBlockShift = fSuperBlock.BlockShift(); 599 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 600 601 // since the allocator has not been initialized yet, we 602 // cannot use BlockAllocator::BitmapSize() here 603 fSuperBlock.log_blocks = ToBlockRun(AllocationGroups() 604 * fSuperBlock.BlocksPerAllocationGroup() + 1); 605 fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(2048); 606 // ToDo: set the log size depending on the disk size 607 fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64( 608 ToBlock(Log())); 609 610 // set the current log pointers, so that journaling will work correctly 611 fLogStart = fSuperBlock.LogStart(); 612 fLogEnd = fSuperBlock.LogEnd(); 613 614 if (!IsValidSuperBlock()) 615 RETURN_ERROR(B_ERROR); 616 617 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL) 618 return B_ERROR; 619 620 fJournal = new Journal(this); 621 if (fJournal == NULL || fJournal->InitCheck() < B_OK) 622 RETURN_ERROR(B_ERROR); 623 624 // ready to write data to disk 625 626 Transaction transaction(this, 0); 627 628 if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK) 629 RETURN_ERROR(B_ERROR); 630 631 off_t id; 632 status_t status = Inode::Create(transaction, NULL, NULL, 633 S_DIRECTORY | 0755, 0, 0, NULL, &id, &fRootNode); 634 if (status < B_OK) 635 RETURN_ERROR(status); 636 637 fSuperBlock.root_dir = ToBlockRun(id); 638 639 if ((flags & VOLUME_NO_INDICES) == 0) { 640 // The indices root directory will be created automatically 641 // when the standard indices are created (or any other). 642 Index index(this); 643 status = index.Create(transaction, "name", B_STRING_TYPE); 644 if (status < B_OK) 645 return status; 646 647 status = index.Create(transaction, "last_modified", B_INT64_TYPE); 648 if (status < B_OK) 649 return status; 650 651 status = index.Create(transaction, "size", B_INT64_TYPE); 652 if (status < B_OK) 653 return status; 654 } 655 656 WriteSuperBlock(); 657 transaction.Done(); 658 659 // put_vnode(ID(), fRootNode->ID()); 660 // if (fIndicesNode != NULL) 661 // put_vnode(ID(), fIndicesNode->ID()); 662 663 Sync(); 664 opener.RemoveCache(true); 665 return B_OK; 666 } 667