1 /* Volume - BFS super block, mounting, etc. 2 * 3 * Copyright 2001-2006, Axel Dörfler, axeld@pinc-software.de. 4 * This file may be used under the terms of the MIT License. 5 */ 6 7 8 #include "Debug.h" 9 #include "Volume.h" 10 #include "Journal.h" 11 #include "Inode.h" 12 #include "Query.h" 13 14 15 static const int32 kDesiredAllocationGroups = 56; 16 // This is the number of allocation groups that will be tried 17 // to be given for newly initialized disks. 18 // That's only relevant for smaller disks, though, since any 19 // of today's disk sizes already reach the maximum length 20 // of an allocation group (65536 blocks). 21 // It seems to create appropriate numbers for smaller disks 22 // with this setting, though (i.e. you can create a 400 MB 23 // file on a 1 GB disk without the need for double indirect 24 // blocks). 25 26 27 class DeviceOpener { 28 public: 29 DeviceOpener(const char *device, int mode); 30 ~DeviceOpener(); 31 32 int Open(const char *device, int mode); 33 void *InitCache(off_t numBlocks, uint32 blockSize); 34 void RemoveCache(bool allowWrites); 35 36 void Keep(); 37 38 int Device() const { return fDevice; } 39 int Mode() const { return fMode; } 40 41 status_t GetSize(off_t *_size, uint32 *_blockSize = NULL); 42 43 private: 44 int fDevice; 45 int fMode; 46 void *fBlockCache; 47 }; 48 49 50 DeviceOpener::DeviceOpener(const char *device, int mode) 51 : 52 fBlockCache(NULL) 53 { 54 Open(device, mode); 55 } 56 57 58 DeviceOpener::~DeviceOpener() 59 { 60 if (fDevice >= B_OK) { 61 RemoveCache(false); 62 close(fDevice); 63 } 64 } 65 66 67 int 68 DeviceOpener::Open(const char *device, int mode) 69 { 70 fDevice = open(device, mode); 71 if (fDevice < 0) 72 fDevice = errno; 73 74 if (fDevice < 0 && mode == O_RDWR) { 75 // try again to open read-only (don't rely on a specific error code) 76 return Open(device, O_RDONLY); 77 } 78 79 if (fDevice >= 0) { 80 // opening succeeded 81 fMode = mode; 82 if (mode == O_RDWR) { 83 // check out if the device really allows for read/write access 84 device_geometry geometry; 85 if (!ioctl(fDevice, B_GET_GEOMETRY, &geometry)) { 86 if (geometry.read_only) { 87 // reopen device read-only 88 close(fDevice); 89 return Open(device, O_RDONLY); 90 } 91 } 92 } 93 } 94 95 return fDevice; 96 } 97 98 99 void * 100 DeviceOpener::InitCache(off_t numBlocks, uint32 blockSize) 101 { 102 return block_cache_create(fDevice, numBlocks, blockSize, fMode == O_RDONLY); 103 } 104 105 106 void 107 DeviceOpener::RemoveCache(bool allowWrites) 108 { 109 if (fBlockCache == NULL) 110 return; 111 112 block_cache_delete(fBlockCache, allowWrites); 113 fBlockCache = NULL; 114 } 115 116 117 void 118 DeviceOpener::Keep() 119 { 120 fDevice = -1; 121 } 122 123 124 /** Returns the size of the device in bytes. It uses B_GET_GEOMETRY 125 * to compute the size, or fstat() if that failed. 126 */ 127 128 status_t 129 DeviceOpener::GetSize(off_t *_size, uint32 *_blockSize) 130 { 131 device_geometry geometry; 132 if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) { 133 // maybe it's just a file 134 struct stat stat; 135 if (fstat(fDevice, &stat) < 0) 136 return B_ERROR; 137 138 if (_size) 139 *_size = stat.st_size; 140 if (_blockSize) // that shouldn't cause us any problems 141 *_blockSize = 512; 142 143 return B_OK; 144 } 145 146 if (_size) { 147 *_size = 1LL * geometry.head_count * geometry.cylinder_count 148 * geometry.sectors_per_track * geometry.bytes_per_sector; 149 } 150 if (_blockSize) 151 *_blockSize = geometry.bytes_per_sector; 152 153 return B_OK; 154 } 155 156 157 // #pragma mark - 158 159 160 bool 161 disk_super_block::IsValid() 162 { 163 if (Magic1() != (int32)SUPER_BLOCK_MAGIC1 164 || Magic2() != (int32)SUPER_BLOCK_MAGIC2 165 || Magic3() != (int32)SUPER_BLOCK_MAGIC3 166 || (int32)block_size != inode_size 167 || ByteOrder() != SUPER_BLOCK_FS_LENDIAN 168 || (1UL << BlockShift()) != BlockSize() 169 || AllocationGroups() < 1 170 || AllocationGroupShift() < 1 171 || BlocksPerAllocationGroup() < 1 172 || NumBlocks() < 10 173 || AllocationGroups() != divide_roundup(NumBlocks(), 174 1L << AllocationGroupShift())) 175 return false; 176 177 return true; 178 } 179 180 181 void 182 disk_super_block::Initialize(const char *diskName, off_t numBlocks, uint32 blockSize) 183 { 184 memset(this, 0, sizeof(disk_super_block)); 185 186 magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1); 187 magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2); 188 magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3); 189 fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN); 190 flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN); 191 192 strlcpy(name, diskName, sizeof(name)); 193 194 int32 blockShift = 9; 195 while ((1UL << blockShift) < blockSize) { 196 blockShift++; 197 } 198 199 block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize); 200 block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift); 201 202 num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks); 203 used_blocks = 0; 204 205 // Get the minimum ag_shift (that's determined by the block size) 206 207 int32 bitsPerBlock = blockSize << 3; 208 off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock; 209 int32 blocksPerGroup = 1; 210 int32 groupShift = 13; 211 212 for (int32 i = 8192; i < bitsPerBlock; i *= 2) { 213 groupShift++; 214 } 215 216 // Many allocation groups help applying allocation policies, but if 217 // they are too small, we will need to many block_runs to cover large 218 // files (see above to get an explanation of the kDesiredAllocationGroups 219 // constant). 220 221 int32 numGroups; 222 223 while (true) { 224 numGroups = (bitmapBlocks + blocksPerGroup - 1) / blocksPerGroup; 225 if (numGroups > kDesiredAllocationGroups) { 226 if (groupShift == 16) 227 break; 228 229 groupShift++; 230 blocksPerGroup *= 2; 231 } else 232 break; 233 } 234 235 num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups); 236 blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(1); 237 ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift); 238 } 239 240 241 // #pragma mark - 242 243 244 Volume::Volume(mount_id id) 245 : 246 fID(id), 247 fBlockAllocator(this), 248 fLock("bfs volume"), 249 fRootNode(NULL), 250 fIndicesNode(NULL), 251 fDirtyCachedBlocks(0), 252 fUniqueID(0), 253 fFlags(0) 254 { 255 } 256 257 258 Volume::~Volume() 259 { 260 } 261 262 263 bool 264 Volume::IsValidSuperBlock() 265 { 266 return fSuperBlock.IsValid(); 267 } 268 269 270 void 271 Volume::Panic() 272 { 273 FATAL(("we have to panic... switch to read-only mode!\n")); 274 fFlags |= VOLUME_READ_ONLY; 275 #ifdef USER 276 debugger("BFS panics!"); 277 #elif defined(DEBUG) 278 kernel_debugger("BFS panics!"); 279 #endif 280 } 281 282 283 status_t 284 Volume::Mount(const char *deviceName, uint32 flags) 285 { 286 // ToDo: validate the FS in write mode as well! 287 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \ 288 || (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY)) 289 // in big endian mode, we only mount read-only for now 290 flags |= B_MOUNT_READ_ONLY; 291 #endif 292 293 DeviceOpener opener(deviceName, flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR); 294 fDevice = opener.Device(); 295 if (fDevice < B_OK) 296 RETURN_ERROR(fDevice); 297 298 if (opener.Mode() == O_RDONLY) 299 fFlags |= VOLUME_READ_ONLY; 300 301 // check if it's a regular file, and if so, disable the cache for the 302 // underlaying file system 303 struct stat stat; 304 if (fstat(fDevice, &stat) < 0) 305 RETURN_ERROR(B_ERROR); 306 307 // TODO: allow turning off caching of the underlying file (once O_NOCACHE works) 308 #if 0 309 #ifndef NO_FILE_UNCACHED_IO 310 if ((stat.st_mode & S_FILE) != 0 && ioctl(fDevice, IOCTL_FILE_UNCACHED_IO, NULL) < 0) { 311 // mount read-only if the cache couldn't be disabled 312 # ifdef DEBUG 313 FATAL(("couldn't disable cache for image file - system may dead-lock!\n")); 314 # else 315 FATAL(("couldn't disable cache for image file!\n")); 316 Panic(); 317 # endif 318 } 319 #endif 320 #endif 321 322 // read the super block 323 if (Identify(fDevice, &fSuperBlock) != B_OK) { 324 FATAL(("invalid super block!\n")); 325 return B_BAD_VALUE; 326 } 327 328 // initialize short hands to the super block (to save byte swapping) 329 fBlockSize = fSuperBlock.BlockSize(); 330 fBlockShift = fSuperBlock.BlockShift(); 331 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 332 333 // check if the device size is large enough to hold the file system 334 off_t diskSize; 335 if (opener.GetSize(&diskSize) < B_OK) 336 RETURN_ERROR(B_ERROR); 337 if (diskSize < (NumBlocks() << BlockShift())) 338 RETURN_ERROR(B_BAD_VALUE); 339 340 // set the current log pointers, so that journaling will work correctly 341 fLogStart = fSuperBlock.LogStart(); 342 fLogEnd = fSuperBlock.LogEnd(); 343 344 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL) 345 return B_ERROR; 346 347 fJournal = new Journal(this); 348 // replaying the log is the first thing we will do on this disk 349 if (fJournal && fJournal->InitCheck() < B_OK 350 || fBlockAllocator.Initialize() < B_OK) { 351 // ToDo: improve error reporting for a bad journal 352 FATAL(("could not initialize journal/block bitmap allocator!\n")); 353 return B_NO_MEMORY; 354 } 355 356 status_t status = B_OK; 357 358 fRootNode = new Inode(this, ToVnode(Root())); 359 if (fRootNode && fRootNode->InitCheck() == B_OK) { 360 status = publish_vnode(fID, ToVnode(Root()), (void *)fRootNode); 361 if (status == B_OK) { 362 // try to get indices root dir 363 364 // question: why doesn't get_vnode() work here?? 365 // answer: we have not yet backpropagated the pointer to the 366 // volume in bfs_mount(), so bfs_read_vnode() can't get it. 367 // But it's not needed to do that anyway. 368 369 if (!Indices().IsZero()) 370 fIndicesNode = new Inode(this, ToVnode(Indices())); 371 372 if (fIndicesNode == NULL 373 || fIndicesNode->InitCheck() < B_OK 374 || !fIndicesNode->IsContainer()) { 375 INFORM(("bfs: volume doesn't have indices!\n")); 376 377 if (fIndicesNode) { 378 // if this is the case, the index root node is gone bad, and 379 // BFS switch to read-only mode 380 fFlags |= VOLUME_READ_ONLY; 381 delete fIndicesNode; 382 fIndicesNode = NULL; 383 } 384 } 385 386 // all went fine 387 opener.Keep(); 388 return B_OK; 389 } else 390 FATAL(("could not create root node: publish_vnode() failed!\n")); 391 392 delete fRootNode; 393 } else { 394 status = B_BAD_VALUE; 395 FATAL(("could not create root node!\n")); 396 } 397 398 return status; 399 } 400 401 402 status_t 403 Volume::Unmount() 404 { 405 // Unlike in BeOS, we need to put the reference to our root node ourselves 406 put_vnode(fID, ToVnode(Root())); 407 408 // This will also flush the log & all blocks to disk 409 delete fJournal; 410 fJournal = NULL; 411 412 delete fIndicesNode; 413 414 block_cache_delete(fBlockCache, !IsReadOnly()); 415 close(fDevice); 416 417 return B_OK; 418 } 419 420 421 status_t 422 Volume::Sync() 423 { 424 return fJournal->FlushLogAndBlocks(); 425 } 426 427 428 status_t 429 Volume::ValidateBlockRun(block_run run) 430 { 431 if (run.AllocationGroup() < 0 || run.AllocationGroup() > (int32)AllocationGroups() 432 || run.Start() > (1UL << AllocationGroupShift()) 433 || run.length == 0 434 || uint32(run.Length() + run.Start()) > (1UL << AllocationGroupShift())) { 435 Panic(); 436 FATAL(("*** invalid run(%d,%d,%d)\n", (int)run.AllocationGroup(), run.Start(), run.Length())); 437 return B_BAD_DATA; 438 } 439 return B_OK; 440 } 441 442 443 block_run 444 Volume::ToBlockRun(off_t block) const 445 { 446 block_run run; 447 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(block >> AllocationGroupShift()); 448 run.start = HOST_ENDIAN_TO_BFS_INT16(block & ((1LL << AllocationGroupShift()) - 1)); 449 run.length = HOST_ENDIAN_TO_BFS_INT16(1); 450 return run; 451 } 452 453 454 status_t 455 Volume::CreateIndicesRoot(Transaction &transaction) 456 { 457 off_t id; 458 status_t status = Inode::Create(transaction, NULL, NULL, 459 S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, &id, &fIndicesNode); 460 if (status < B_OK) 461 RETURN_ERROR(status); 462 463 fSuperBlock.indices = ToBlockRun(id); 464 return WriteSuperBlock(); 465 } 466 467 468 status_t 469 Volume::AllocateForInode(Transaction &transaction, const Inode *parent, mode_t type, block_run &run) 470 { 471 return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), type, run); 472 } 473 474 475 status_t 476 Volume::WriteSuperBlock() 477 { 478 if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) != sizeof(disk_super_block)) 479 return B_IO_ERROR; 480 481 return B_OK; 482 } 483 484 485 void 486 Volume::UpdateLiveQueries(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 487 size_t oldLength, const uint8 *newKey, size_t newLength) 488 { 489 if (fQueryLock.Lock() < B_OK) 490 return; 491 492 Query *query = NULL; 493 while ((query = fQueries.Next(query)) != NULL) 494 query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, newLength); 495 496 fQueryLock.Unlock(); 497 } 498 499 500 /** Checks if there is a live query whose results depend on the presence 501 * or value of the specified attribute. 502 * Don't use it if you already have all the data together to evaluate 503 * the queries - it wouldn't safe you anything in this case. 504 */ 505 506 bool 507 Volume::CheckForLiveQuery(const char *attribute) 508 { 509 // ToDo: check for a live query that depends on the specified attribute 510 return true; 511 } 512 513 514 void 515 Volume::AddQuery(Query *query) 516 { 517 if (fQueryLock.Lock() < B_OK) 518 return; 519 520 fQueries.Add(query); 521 522 fQueryLock.Unlock(); 523 } 524 525 526 void 527 Volume::RemoveQuery(Query *query) 528 { 529 if (fQueryLock.Lock() < B_OK) 530 return; 531 532 fQueries.Remove(query); 533 534 fQueryLock.Unlock(); 535 } 536 537 538 // #pragma mark - 539 // Disk scanning and initialization 540 541 542 status_t 543 Volume::Identify(int fd, disk_super_block *superBlock) 544 { 545 char buffer[1024]; 546 if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer)) 547 return B_IO_ERROR; 548 549 // Note: that does work only for x86, for PowerPC, the super block 550 // may be located at offset 0! 551 memcpy(superBlock, buffer + 512, sizeof(disk_super_block)); 552 if (!superBlock->IsValid()) { 553 #ifndef BFS_LITTLE_ENDIAN_ONLY 554 memcpy(superBlock, buffer, sizeof(disk_super_block)); 555 if (!superBlock->IsValid()) 556 return B_BAD_VALUE; 557 #else 558 return B_BAD_VALUE; 559 #endif 560 } 561 562 return B_OK; 563 } 564 565 566 status_t 567 Volume::Initialize(const char *device, const char *name, uint32 blockSize, 568 uint32 flags) 569 { 570 // although there is no really good reason for it, we won't 571 // accept '/' in disk names (mkbfs does this, too - and since 572 // Tracker names mounted volumes like their name) 573 if (strchr(name, '/') != NULL) 574 return B_BAD_VALUE; 575 576 if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 && blockSize != 8192) 577 return B_BAD_VALUE; 578 579 DeviceOpener opener(device, O_RDWR); 580 if (opener.Device() < B_OK) 581 return B_BAD_VALUE; 582 583 fDevice = opener.Device(); 584 585 uint32 deviceBlockSize; 586 off_t deviceSize; 587 if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK) 588 return B_ERROR; 589 590 off_t numBlocks = deviceSize / blockSize; 591 592 // create valid super block 593 594 fSuperBlock.Initialize(name, numBlocks, blockSize); 595 596 // initialize short hands to the super block (to save byte swapping) 597 fBlockSize = fSuperBlock.BlockSize(); 598 fBlockShift = fSuperBlock.BlockShift(); 599 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 600 601 // since the allocator has not been initialized yet, we 602 // cannot use BlockAllocator::BitmapSize() here 603 fSuperBlock.log_blocks = ToBlockRun(AllocationGroups() 604 * fSuperBlock.BlocksPerAllocationGroup() + 1); 605 fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(2048); 606 // ToDo: set the log size depending on the disk size 607 fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(ToBlock(Log())); 608 609 // set the current log pointers, so that journaling will work correctly 610 fLogStart = fSuperBlock.LogStart(); 611 fLogEnd = fSuperBlock.LogEnd(); 612 613 if (!IsValidSuperBlock()) 614 RETURN_ERROR(B_ERROR); 615 616 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL) 617 return B_ERROR; 618 619 fJournal = new Journal(this); 620 if (fJournal == NULL || fJournal->InitCheck() < B_OK) 621 RETURN_ERROR(B_ERROR); 622 623 // ready to write data to disk 624 625 Transaction transaction(this, 0); 626 627 if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK) 628 RETURN_ERROR(B_ERROR); 629 630 off_t id; 631 status_t status = Inode::Create(transaction, NULL, NULL, 632 S_DIRECTORY | 0755, 0, 0, &id, &fRootNode); 633 if (status < B_OK) 634 RETURN_ERROR(status); 635 636 fSuperBlock.root_dir = ToBlockRun(id); 637 638 if ((flags & VOLUME_NO_INDICES) == 0) { 639 // The indices root directory will be created automatically 640 // when the standard indices are created (or any other). 641 Index index(this); 642 status = index.Create(transaction, "name", B_STRING_TYPE); 643 if (status < B_OK) 644 return status; 645 646 status = index.Create(transaction, "last_modified", B_INT64_TYPE); 647 if (status < B_OK) 648 return status; 649 650 status = index.Create(transaction, "size", B_INT64_TYPE); 651 if (status < B_OK) 652 return status; 653 } 654 655 WriteSuperBlock(); 656 transaction.Done(); 657 658 // put_vnode(ID(), fRootNode->ID()); 659 // if (fIndicesNode != NULL) 660 // put_vnode(ID(), fIndicesNode->ID()); 661 662 Sync(); 663 opener.RemoveCache(true); 664 return B_OK; 665 } 666