1 /* Volume - BFS super block, mounting, etc. 2 * 3 * Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de. 4 * This file may be used under the terms of the MIT License. 5 */ 6 7 8 #include "Debug.h" 9 #include "Volume.h" 10 #include "Journal.h" 11 #include "Inode.h" 12 #include "Query.h" 13 14 #include <util/kernel_cpp.h> 15 #include <KernelExport.h> 16 #include <Drivers.h> 17 #include <fs_volume.h> 18 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <string.h> 22 #include <ctype.h> 23 24 25 static const int32 kDesiredAllocationGroups = 56; 26 // This is the number of allocation groups that will be tried 27 // to be given for newly initialized disks. 28 // That's only relevant for smaller disks, though, since any 29 // of today's disk sizes already reach the maximum length 30 // of an allocation group (65536 blocks). 31 // It seems to create appropriate numbers for smaller disks 32 // with this setting, though (i.e. you can create a 400 MB 33 // file on a 1 GB disk without the need for double indirect 34 // blocks). 35 36 37 class DeviceOpener { 38 public: 39 DeviceOpener(const char *device, int mode); 40 ~DeviceOpener(); 41 42 int Open(const char *device, int mode); 43 void *InitCache(off_t numBlocks, uint32 blockSize); 44 void RemoveCache(bool allowWrites); 45 46 void Keep(); 47 48 int Device() const { return fDevice; } 49 50 status_t GetSize(off_t *_size, uint32 *_blockSize = NULL); 51 52 private: 53 int fDevice; 54 void *fBlockCache; 55 }; 56 57 58 DeviceOpener::DeviceOpener(const char *device, int mode) 59 : 60 fBlockCache(NULL) 61 { 62 Open(device, mode); 63 } 64 65 66 DeviceOpener::~DeviceOpener() 67 { 68 if (fDevice >= B_OK) { 69 RemoveCache(false); 70 close(fDevice); 71 } 72 } 73 74 75 int 76 DeviceOpener::Open(const char *device, int mode) 77 { 78 fDevice = open(device, mode); 79 return fDevice; 80 } 81 82 83 void * 84 DeviceOpener::InitCache(off_t numBlocks, uint32 blockSize) 85 { 86 return block_cache_create(fDevice, numBlocks, blockSize); 87 } 88 89 90 void 91 DeviceOpener::RemoveCache(bool allowWrites) 92 { 93 if (fBlockCache == NULL) 94 return; 95 96 block_cache_delete(fBlockCache, allowWrites); 97 fBlockCache = NULL; 98 } 99 100 101 void 102 DeviceOpener::Keep() 103 { 104 fDevice = -1; 105 } 106 107 108 /** Returns the size of the device in bytes. It uses B_GET_GEOMETRY 109 * to compute the size, or fstat() if that failed. 110 */ 111 112 status_t 113 DeviceOpener::GetSize(off_t *_size, uint32 *_blockSize) 114 { 115 device_geometry geometry; 116 if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) { 117 // maybe it's just a file 118 struct stat stat; 119 if (fstat(fDevice, &stat) < 0) 120 return B_ERROR; 121 122 if (_size) 123 *_size = stat.st_size; 124 if (_blockSize) // that shouldn't cause us any problems 125 *_blockSize = 512; 126 127 return B_OK; 128 } 129 130 if (_size) { 131 *_size = 1LL * geometry.head_count * geometry.cylinder_count 132 * geometry.sectors_per_track * geometry.bytes_per_sector; 133 } 134 if (_blockSize) 135 *_blockSize = geometry.bytes_per_sector; 136 137 return B_OK; 138 } 139 140 141 // #pragma mark - 142 143 144 bool 145 disk_super_block::IsValid() 146 { 147 if (Magic1() != (int32)SUPER_BLOCK_MAGIC1 148 || Magic2() != (int32)SUPER_BLOCK_MAGIC2 149 || Magic3() != (int32)SUPER_BLOCK_MAGIC3 150 || (int32)block_size != inode_size 151 || ByteOrder() != SUPER_BLOCK_FS_LENDIAN 152 || (1UL << BlockShift()) != BlockSize() 153 || AllocationGroups() < 1 154 || AllocationGroupShift() < 1 155 || BlocksPerAllocationGroup() < 1 156 || NumBlocks() < 10 157 || AllocationGroups() != divide_roundup(NumBlocks(), 158 1L << AllocationGroupShift())) 159 return false; 160 161 return true; 162 } 163 164 165 void 166 disk_super_block::Initialize(const char *diskName, off_t numBlocks, uint32 blockSize) 167 { 168 memset(this, 0, sizeof(disk_super_block)); 169 170 magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1); 171 magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2); 172 magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3); 173 fs_byte_order = SUPER_BLOCK_FS_LENDIAN; 174 flags = SUPER_BLOCK_DISK_CLEAN; 175 176 strlcpy(name, diskName, sizeof(name)); 177 178 block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize); 179 for (block_shift = 9; (1UL << block_shift) < blockSize; block_shift++); 180 181 num_blocks = numBlocks; 182 used_blocks = 0; 183 184 // Get the minimum ag_shift (that's determined by the block size) 185 186 blocks_per_ag = 1; 187 ag_shift = 13; 188 189 int32 bitsPerBlock = blockSize << 3; 190 off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock; 191 192 for (int32 i = 8192; i < bitsPerBlock; i *= 2) { 193 ag_shift++; 194 } 195 196 // Many allocation groups help applying allocation policies, but if 197 // they are too small, we will need to many block_runs to cover large 198 // files (see above to get an explanation of the kDesiredAllocationGroups 199 // constant). 200 201 while (true) { 202 num_ags = (bitmapBlocks + blocks_per_ag - 1) / blocks_per_ag; 203 if (num_ags > kDesiredAllocationGroups) { 204 if (ag_shift == 16) 205 break; 206 207 ag_shift++; 208 blocks_per_ag *= 2; 209 } else 210 break; 211 } 212 } 213 214 215 // #pragma mark - 216 217 218 Volume::Volume(mount_id id) 219 : 220 fID(id), 221 fBlockAllocator(this), 222 fLock("bfs volume"), 223 fRootNode(NULL), 224 fIndicesNode(NULL), 225 fDirtyCachedBlocks(0), 226 fUniqueID(0), 227 fFlags(0) 228 { 229 } 230 231 232 Volume::~Volume() 233 { 234 } 235 236 237 bool 238 Volume::IsValidSuperBlock() 239 { 240 return fSuperBlock.IsValid(); 241 } 242 243 244 void 245 Volume::Panic() 246 { 247 FATAL(("we have to panic... switch to read-only mode!\n")); 248 fFlags |= VOLUME_READ_ONLY; 249 #ifdef USER 250 debugger("BFS panics!"); 251 #elif defined(DEBUG) 252 kernel_debugger("BFS panics!"); 253 #endif 254 } 255 256 257 status_t 258 Volume::Mount(const char *deviceName, uint32 flags) 259 { 260 if (flags & B_MOUNT_READ_ONLY) 261 fFlags |= VOLUME_READ_ONLY; 262 263 // ToDo: validate the FS in write mode as well! 264 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \ 265 || (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY)) 266 // in big endian mode, we only mount read-only for now 267 flags |= B_MOUNT_READ_ONLY; 268 #endif 269 270 DeviceOpener opener(deviceName, flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR); 271 272 // if we couldn't open the device, try read-only (don't rely on a specific error code) 273 if (opener.Device() < B_OK && (flags & B_MOUNT_READ_ONLY) == 0) { 274 opener.Open(deviceName, O_RDONLY); 275 fFlags |= VOLUME_READ_ONLY; 276 } 277 278 fDevice = opener.Device(); 279 if (fDevice < B_OK) 280 RETURN_ERROR(fDevice); 281 282 // check if it's a regular file, and if so, disable the cache for the 283 // underlaying file system 284 struct stat stat; 285 if (fstat(fDevice, &stat) < 0) 286 RETURN_ERROR(B_ERROR); 287 288 #ifndef NO_FILE_UNCACHED_IO 289 if (stat.st_mode & S_FILE && ioctl(fDevice, IOCTL_FILE_UNCACHED_IO, NULL) < 0) { 290 // mount read-only if the cache couldn't be disabled 291 # ifdef DEBUG 292 FATAL(("couldn't disable cache for image file - system may dead-lock!\n")); 293 # else 294 FATAL(("couldn't disable cache for image file!\n")); 295 Panic(); 296 # endif 297 } 298 #endif 299 300 // read the super block 301 if (Identify(fDevice, &fSuperBlock) != B_OK) { 302 FATAL(("invalid super block!\n")); 303 return B_BAD_VALUE; 304 } 305 306 // initialize short hands to the super block (to save byte swapping) 307 fBlockSize = fSuperBlock.BlockSize(); 308 fBlockShift = fSuperBlock.BlockShift(); 309 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 310 311 // check if the device size is large enough to hold the file system 312 off_t diskSize; 313 if (opener.GetSize(&diskSize) < B_OK) 314 RETURN_ERROR(B_ERROR); 315 if (diskSize < (NumBlocks() << BlockShift())) 316 RETURN_ERROR(B_BAD_VALUE); 317 318 // set the current log pointers, so that journaling will work correctly 319 fLogStart = fSuperBlock.LogStart(); 320 fLogEnd = fSuperBlock.LogEnd(); 321 322 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL) 323 return B_ERROR; 324 325 fJournal = new Journal(this); 326 // replaying the log is the first thing we will do on this disk 327 if (fJournal && fJournal->InitCheck() < B_OK 328 || fBlockAllocator.Initialize() < B_OK) { 329 // ToDo: improve error reporting for a bad journal 330 FATAL(("could not initialize journal/block bitmap allocator!\n")); 331 return B_NO_MEMORY; 332 } 333 334 status_t status = B_OK; 335 336 fRootNode = new Inode(this, ToVnode(Root())); 337 if (fRootNode && fRootNode->InitCheck() == B_OK) { 338 status = publish_vnode(fID, ToVnode(Root()), (void *)fRootNode); 339 if (status == B_OK) { 340 // try to get indices root dir 341 342 // question: why doesn't get_vnode() work here?? 343 // answer: we have not yet backpropagated the pointer to the 344 // volume in bfs_mount(), so bfs_read_vnode() can't get it. 345 // But it's not needed to do that anyway. 346 347 if (!Indices().IsZero()) 348 fIndicesNode = new Inode(this, ToVnode(Indices())); 349 350 if (fIndicesNode == NULL 351 || fIndicesNode->InitCheck() < B_OK 352 || !fIndicesNode->IsContainer()) { 353 INFORM(("bfs: volume doesn't have indices!\n")); 354 355 if (fIndicesNode) { 356 // if this is the case, the index root node is gone bad, and 357 // BFS switch to read-only mode 358 fFlags |= VOLUME_READ_ONLY; 359 delete fIndicesNode; 360 fIndicesNode = NULL; 361 } 362 } 363 364 // all went fine 365 opener.Keep(); 366 return B_OK; 367 } else 368 FATAL(("could not create root node: new_vnode() failed!\n")); 369 370 delete fRootNode; 371 } else { 372 status = B_BAD_VALUE; 373 FATAL(("could not create root node!\n")); 374 } 375 376 return status; 377 } 378 379 380 status_t 381 Volume::Unmount() 382 { 383 // Unlike in BeOS, we need to put the reference to our root node ourselves 384 put_vnode(fID, ToVnode(Root())); 385 386 // This will also flush the log & all blocks to disk 387 delete fJournal; 388 fJournal = NULL; 389 390 delete fIndicesNode; 391 392 block_cache_delete(fBlockCache, !IsReadOnly()); 393 close(fDevice); 394 395 return B_OK; 396 } 397 398 399 status_t 400 Volume::Sync() 401 { 402 return fJournal->FlushLogAndBlocks(); 403 } 404 405 406 status_t 407 Volume::ValidateBlockRun(block_run run) 408 { 409 if (run.AllocationGroup() < 0 || run.AllocationGroup() > (int32)AllocationGroups() 410 || run.Start() > (1UL << AllocationGroupShift()) 411 || run.length == 0 412 || uint32(run.Length() + run.Start()) > (1UL << AllocationGroupShift())) { 413 Panic(); 414 FATAL(("*** invalid run(%ld,%d,%d)\n", run.AllocationGroup(), run.Start(), run.Length())); 415 return B_BAD_DATA; 416 } 417 return B_OK; 418 } 419 420 421 block_run 422 Volume::ToBlockRun(off_t block) const 423 { 424 block_run run; 425 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(block >> AllocationGroupShift()); 426 run.start = HOST_ENDIAN_TO_BFS_INT16(block & ((1LL << AllocationGroupShift()) - 1)); 427 run.length = HOST_ENDIAN_TO_BFS_INT16(1); 428 return run; 429 } 430 431 432 status_t 433 Volume::CreateIndicesRoot(Transaction &transaction) 434 { 435 off_t id; 436 status_t status = Inode::Create(transaction, NULL, NULL, 437 S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, &id, &fIndicesNode); 438 if (status < B_OK) 439 RETURN_ERROR(status); 440 441 fSuperBlock.indices = ToBlockRun(id); 442 return WriteSuperBlock(); 443 } 444 445 446 status_t 447 Volume::AllocateForInode(Transaction &transaction, const Inode *parent, mode_t type, block_run &run) 448 { 449 return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), type, run); 450 } 451 452 453 status_t 454 Volume::WriteSuperBlock() 455 { 456 if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) != sizeof(disk_super_block)) 457 return B_IO_ERROR; 458 459 return B_OK; 460 } 461 462 463 void 464 Volume::UpdateLiveQueries(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 465 size_t oldLength, const uint8 *newKey, size_t newLength) 466 { 467 if (fQueryLock.Lock() < B_OK) 468 return; 469 470 Query *query = NULL; 471 while ((query = fQueries.Next(query)) != NULL) 472 query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, newLength); 473 474 fQueryLock.Unlock(); 475 } 476 477 478 /** Checks if there is a live query whose results depend on the presence 479 * or value of the specified attribute. 480 * Don't use it if you already have all the data together to evaluate 481 * the queries - it wouldn't safe you anything in this case. 482 */ 483 484 bool 485 Volume::CheckForLiveQuery(const char *attribute) 486 { 487 // ToDo: check for a live query that depends on the specified attribute 488 return true; 489 } 490 491 492 void 493 Volume::AddQuery(Query *query) 494 { 495 if (fQueryLock.Lock() < B_OK) 496 return; 497 498 fQueries.Add(query); 499 500 fQueryLock.Unlock(); 501 } 502 503 504 void 505 Volume::RemoveQuery(Query *query) 506 { 507 if (fQueryLock.Lock() < B_OK) 508 return; 509 510 fQueries.Remove(query); 511 512 fQueryLock.Unlock(); 513 } 514 515 516 // #pragma mark - 517 // Disk scanning and initialization 518 519 520 status_t 521 Volume::Identify(int fd, disk_super_block *superBlock) 522 { 523 char buffer[1024]; 524 if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer)) 525 return B_IO_ERROR; 526 527 // Note: that does work only for x86, for PowerPC, the super block 528 // may be located at offset 0! 529 memcpy(superBlock, buffer + 512, sizeof(disk_super_block)); 530 if (!superBlock->IsValid()) { 531 #ifndef BFS_LITTLE_ENDIAN_ONLY 532 memcpy(superBlock, buffer, sizeof(disk_super_block)); 533 if (!superBlock->IsValid()) 534 return B_BAD_VALUE; 535 #else 536 return B_BAD_VALUE; 537 #endif 538 } 539 540 return B_OK; 541 } 542 543 544 #ifdef USER 545 extern "C" void kill_device_vnodes(dev_t id); 546 // This call is only available in the userland fs_shell 547 548 status_t 549 Volume::Initialize(const char *device, const char *name, uint32 blockSize, uint32 flags) 550 { 551 // although there is no really good reason for it, we won't 552 // accept '/' in disk names (mkbfs does this, too - and since 553 // Tracker names mounted volumes like their name) 554 if (strchr(name, '/') != NULL) 555 return B_BAD_VALUE; 556 557 if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 && blockSize != 8192) 558 return B_BAD_VALUE; 559 560 DeviceOpener opener(device, O_RDWR); 561 if (opener.Device() < B_OK) 562 return B_BAD_VALUE; 563 564 fDevice = opener.Device(); 565 566 uint32 deviceBlockSize; 567 off_t deviceSize; 568 if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK) 569 return B_ERROR; 570 571 off_t numBlocks = deviceSize / blockSize; 572 573 // create valid super block 574 575 fSuperBlock.Initialize(name, numBlocks, blockSize); 576 577 // initialize short hands to the super block (to save byte swapping) 578 fBlockSize = fSuperBlock.BlockSize(); 579 fBlockShift = fSuperBlock.BlockShift(); 580 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 581 582 // since the allocator has not been initialized yet, we 583 // cannot use BlockAllocator::BitmapSize() here 584 fSuperBlock.log_blocks = ToBlockRun(AllocationGroups() 585 * fSuperBlock.BlocksPerAllocationGroup() + 1); 586 fSuperBlock.log_blocks.length = 2048; 587 // ToDo: set the log size depending on the disk size 588 fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(ToBlock(Log())); 589 590 // set the current log pointers, so that journaling will work correctly 591 fLogStart = fSuperBlock.LogStart(); 592 fLogEnd = fSuperBlock.LogEnd(); 593 594 if (!IsValidSuperBlock()) 595 RETURN_ERROR(B_ERROR); 596 597 if (opener.InitCache(numBlocks) != B_OK) 598 return B_ERROR; 599 600 fJournal = new Journal(this); 601 if (fJournal == NULL || fJournal->InitCheck() < B_OK) 602 RETURN_ERROR(B_ERROR); 603 604 // ready to write data to disk 605 606 Transaction transaction(this, 0); 607 608 if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK) 609 RETURN_ERROR(B_ERROR); 610 611 off_t id; 612 status_t status = Inode::Create(&transaction, NULL, NULL, 613 S_DIRECTORY | 0755, 0, 0, &id, &fRootNode); 614 if (status < B_OK) 615 RETURN_ERROR(status); 616 617 fSuperBlock.root_dir = ToBlockRun(id); 618 619 if ((flags & VOLUME_NO_INDICES) == 0) { 620 // The indices root directory will be created automatically 621 // when the standard indices are created (or any other). 622 Index index(this); 623 status = index.Create(&transaction, "name", B_STRING_TYPE); 624 if (status < B_OK) 625 return status; 626 627 status = index.Create(&transaction, "last_modified", B_INT64_TYPE); 628 if (status < B_OK) 629 return status; 630 631 status = index.Create(&transaction, "size", B_INT64_TYPE); 632 if (status < B_OK) 633 return status; 634 } 635 636 WriteSuperBlock(); 637 transaction.Done(); 638 639 put_vnode(ID(), fRootNode->ID()); 640 if (fIndicesNode != NULL) 641 put_vnode(ID(), fIndicesNode->ID()); 642 643 kill_device_vnodes(ID()); 644 // This call is only available in the userland fs_shell 645 646 Sync(); 647 opener.RemoveCache(ALLOW_WRITES); 648 return B_OK; 649 } 650 #endif 651