1 /* Volume - BFS super block, mounting, etc. 2 ** 3 ** Initial version by Axel Dörfler, axeld@pinc-software.de 4 ** This file may be used under the terms of the OpenBeOS License. 5 */ 6 7 8 #include "Debug.h" 9 #include "Volume.h" 10 #include "Journal.h" 11 #include "Inode.h" 12 #include "Query.h" 13 14 #include <kernel_cpp.h> 15 #include <KernelExport.h> 16 #include <fs_volume.h> 17 18 #include <stdlib.h> 19 #include <stdio.h> 20 #include <string.h> 21 #include <ctype.h> 22 23 24 static const int32 kDesiredAllocationGroups = 56; 25 // This is the number of allocation groups that will be tried 26 // to be given for newly initialized disks. 27 // That's only relevant for smaller disks, though, since any 28 // of today's disk sizes already reach the maximum length 29 // of an allocation group (65536 blocks). 30 // It seems to create appropriate numbers for smaller disks 31 // with this setting, though (i.e. you can create a 400 MB 32 // file on a 1 GB disk without the need for double indirect 33 // blocks). 34 35 36 class DeviceOpener { 37 public: 38 DeviceOpener(const char *device, int mode); 39 ~DeviceOpener(); 40 41 int Open(const char *device, int mode); 42 status_t InitCache(off_t numBlocks); 43 void RemoveCache(int mode); 44 45 void Keep(); 46 47 int Device() const { return fDevice; } 48 49 status_t GetSize(off_t *_size, uint32 *_blockSize = NULL); 50 51 private: 52 int fDevice; 53 bool fCached; 54 }; 55 56 57 DeviceOpener::DeviceOpener(const char *device, int mode) 58 : 59 fCached(false) 60 { 61 Open(device, mode); 62 } 63 64 65 DeviceOpener::~DeviceOpener() 66 { 67 if (fDevice >= B_OK) { 68 close(fDevice); 69 if (fCached) 70 remove_cached_device_blocks(fDevice, NO_WRITES); 71 } 72 } 73 74 75 int 76 DeviceOpener::Open(const char *device, int mode) 77 { 78 fDevice = open(device, mode); 79 return fDevice; 80 } 81 82 83 status_t 84 DeviceOpener::InitCache(off_t numBlocks) 85 { 86 if (init_cache_for_device(fDevice, numBlocks) == B_OK) { 87 fCached = true; 88 return B_OK; 89 } 90 91 return B_ERROR; 92 } 93 94 95 void 96 DeviceOpener::RemoveCache(int mode) 97 { 98 if (!fCached) 99 return; 100 101 remove_cached_device_blocks(fDevice, mode); 102 fCached = false; 103 } 104 105 106 void 107 DeviceOpener::Keep() 108 { 109 fDevice = -1; 110 } 111 112 113 /** Returns the size of the device in bytes. It uses B_GET_GEOMETRY 114 * to compute the size, or fstat() if that failed. 115 */ 116 117 status_t 118 DeviceOpener::GetSize(off_t *_size, uint32 *_blockSize) 119 { 120 device_geometry geometry; 121 if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) { 122 // maybe it's just a file 123 struct stat stat; 124 if (fstat(fDevice, &stat) < 0) 125 return B_ERROR; 126 127 if (_size) 128 *_size = stat.st_size; 129 if (_blockSize) // that shouldn't cause us any problems 130 *_blockSize = 512; 131 132 return B_OK; 133 } 134 135 if (_size) 136 *_size = geometry.head_count * geometry.cylinder_count * geometry.sectors_per_track; 137 if (_blockSize) 138 *_blockSize = geometry.bytes_per_sector; 139 140 return B_OK; 141 } 142 143 144 // #pragma mark - 145 146 147 bool 148 disk_super_block::IsValid() 149 { 150 if (Magic1() != (int32)SUPER_BLOCK_MAGIC1 151 || Magic2() != (int32)SUPER_BLOCK_MAGIC2 152 || Magic3() != (int32)SUPER_BLOCK_MAGIC3 153 || (int32)block_size != inode_size 154 || ByteOrder() != SUPER_BLOCK_FS_LENDIAN 155 || (1UL << BlockShift()) != BlockSize() 156 || AllocationGroups() < 1 157 || AllocationGroupShift() < 1 158 || BlocksPerAllocationGroup() < 1 159 || NumBlocks() < 10 160 || AllocationGroups() != divide_roundup(NumBlocks(), 161 1L << AllocationGroupShift())) 162 return false; 163 164 return true; 165 } 166 167 168 void 169 disk_super_block::Initialize(const char *diskName, off_t numBlocks, uint32 blockSize) 170 { 171 memset(this, 0, sizeof(disk_super_block)); 172 173 magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1); 174 magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2); 175 magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3); 176 fs_byte_order = SUPER_BLOCK_FS_LENDIAN; 177 flags = SUPER_BLOCK_DISK_CLEAN; 178 179 strlcpy(name, diskName, sizeof(name)); 180 181 block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize); 182 for (block_shift = 9; (1UL << block_shift) < blockSize; block_shift++); 183 184 num_blocks = numBlocks; 185 used_blocks = 0; 186 187 // Get the minimum ag_shift (that's determined by the block size) 188 189 blocks_per_ag = 1; 190 ag_shift = 13; 191 192 int32 bitsPerBlock = blockSize << 3; 193 off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock; 194 195 for (int32 i = 8192; i < bitsPerBlock; i *= 2) { 196 ag_shift++; 197 } 198 199 // Many allocation groups help applying allocation policies, but if 200 // they are too small, we will need to many block_runs to cover large 201 // files (see above to get an explanation of the kDesiredAllocationGroups 202 // constant). 203 204 while (true) { 205 num_ags = (bitmapBlocks + blocks_per_ag - 1) / blocks_per_ag; 206 if (num_ags > kDesiredAllocationGroups) { 207 if (ag_shift == 16) 208 break; 209 210 ag_shift++; 211 blocks_per_ag *= 2; 212 } else 213 break; 214 } 215 } 216 217 218 // #pragma mark - 219 220 221 Volume::Volume(nspace_id id) 222 : 223 fID(id), 224 fBlockAllocator(this), 225 fLock("bfs volume"), 226 fRootNode(NULL), 227 fIndicesNode(NULL), 228 fDirtyCachedBlocks(0), 229 fUniqueID(0), 230 fFlags(0) 231 { 232 } 233 234 235 Volume::~Volume() 236 { 237 } 238 239 240 bool 241 Volume::IsValidSuperBlock() 242 { 243 return fSuperBlock.IsValid(); 244 } 245 246 247 void 248 Volume::Panic() 249 { 250 FATAL(("we have to panic... switch to read-only mode!\n")); 251 fFlags |= VOLUME_READ_ONLY; 252 #ifdef USER 253 debugger("BFS panics!"); 254 #elif defined(DEBUG) 255 kernel_debugger("BFS panics!"); 256 #endif 257 } 258 259 260 status_t 261 Volume::Mount(const char *deviceName, uint32 flags) 262 { 263 if (flags & B_MOUNT_READ_ONLY) 264 fFlags |= VOLUME_READ_ONLY; 265 266 // ToDo: validate the FS in write mode as well! 267 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \ 268 || (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY)) 269 // in big endian mode, we only mount read-only for now 270 flags |= B_MOUNT_READ_ONLY; 271 #endif 272 273 DeviceOpener opener(deviceName, flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR); 274 275 // if we couldn't open the device, try read-only (don't rely on a specific error code) 276 if (opener.Device() < B_OK && (flags & B_MOUNT_READ_ONLY) == 0) { 277 opener.Open(deviceName, O_RDONLY); 278 fFlags |= VOLUME_READ_ONLY; 279 } 280 281 fDevice = opener.Device(); 282 if (fDevice < B_OK) 283 RETURN_ERROR(fDevice); 284 285 // check if it's a regular file, and if so, disable the cache for the 286 // underlaying file system 287 struct stat stat; 288 if (fstat(fDevice, &stat) < 0) 289 RETURN_ERROR(B_ERROR); 290 291 //#ifndef USER 292 if (stat.st_mode & S_FILE && ioctl(fDevice, IOCTL_FILE_UNCACHED_IO, NULL) < 0) { 293 // mount read-only if the cache couldn't be disabled 294 # ifdef DEBUG 295 FATAL(("couldn't disable cache for image file - system may dead-lock!\n")); 296 # else 297 FATAL(("couldn't disable cache for image file!\n")); 298 Panic(); 299 # endif 300 } 301 //#endif 302 303 // read the super block 304 char buffer[1024]; 305 if (read_pos(fDevice, 0, buffer, sizeof(buffer)) != sizeof(buffer)) 306 return B_IO_ERROR; 307 308 status_t status = B_OK; 309 310 // Note: that does work only for x86, for PowerPC, the super block 311 // is located at offset 0! 312 memcpy(&fSuperBlock, buffer + 512, sizeof(disk_super_block)); 313 if (!IsValidSuperBlock()) { 314 #ifndef BFS_LITTLE_ENDIAN_ONLY 315 memcpy(&fSuperBlock, buffer, sizeof(disk_super_block)); 316 if (!IsValidSuperBlock()) 317 return B_BAD_VALUE; 318 #else 319 return B_BAD_VALUE; 320 #endif 321 } 322 323 if (!IsValidSuperBlock()) { 324 FATAL(("invalid super block!\n")); 325 return B_BAD_VALUE; 326 } 327 328 // check if the device size is large enough to hold the file system 329 off_t diskSize; 330 if (opener.GetSize(&diskSize) < B_OK) 331 RETURN_ERROR(B_ERROR); 332 if (diskSize < (NumBlocks() << BlockShift())) 333 RETURN_ERROR(B_BAD_VALUE); 334 335 // set the current log pointers, so that journaling will work correctly 336 fLogStart = fSuperBlock.LogStart(); 337 fLogEnd = fSuperBlock.LogEnd(); 338 339 // initialize short hands to the super block (to save byte swapping) 340 fBlockSize = fSuperBlock.BlockSize(); 341 fBlockShift = fSuperBlock.BlockShift(); 342 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 343 344 if (opener.InitCache(NumBlocks()) != B_OK) 345 return B_ERROR; 346 347 fJournal = new Journal(this); 348 // replaying the log is the first thing we will do on this disk 349 if (fJournal && fJournal->InitCheck() < B_OK 350 || fBlockAllocator.Initialize() < B_OK) { 351 // ToDo: improve error reporting for a bad journal 352 FATAL(("could not initialize journal/block bitmap allocator!\n")); 353 return B_NO_MEMORY; 354 } 355 356 fRootNode = new Inode(this, ToVnode(Root())); 357 if (fRootNode && fRootNode->InitCheck() == B_OK) { 358 if (new_vnode(fID, ToVnode(Root()), (void *)fRootNode) == B_OK) { 359 // try to get indices root dir 360 361 // question: why doesn't get_vnode() work here?? 362 // answer: we have not yet backpropagated the pointer to the 363 // volume in bfs_mount(), so bfs_read_vnode() can't get it. 364 // But it's not needed to do that anyway. 365 366 if (!Indices().IsZero()) 367 fIndicesNode = new Inode(this, ToVnode(Indices())); 368 369 if (fIndicesNode == NULL 370 || fIndicesNode->InitCheck() < B_OK 371 || !fIndicesNode->IsContainer()) { 372 INFORM(("bfs: volume doesn't have indices!\n")); 373 374 if (fIndicesNode) { 375 // if this is the case, the index root node is gone bad, and 376 // BFS switch to read-only mode 377 fFlags |= VOLUME_READ_ONLY; 378 delete fIndicesNode; 379 fIndicesNode = NULL; 380 } 381 } 382 383 // all went fine 384 opener.Keep(); 385 return B_OK; 386 } else 387 status = B_NO_MEMORY; 388 } else 389 status = B_BAD_VALUE; 390 391 FATAL(("could not create root node: new_vnode() failed!\n")); 392 393 return status; 394 } 395 396 397 status_t 398 Volume::Unmount() 399 { 400 // This will also flush the log & all blocks to disk 401 delete fJournal; 402 fJournal = NULL; 403 404 delete fIndicesNode; 405 406 remove_cached_device_blocks(fDevice, IsReadOnly() ? NO_WRITES : ALLOW_WRITES); 407 close(fDevice); 408 409 return B_OK; 410 } 411 412 413 status_t 414 Volume::Sync() 415 { 416 return fJournal->FlushLogAndBlocks(); 417 } 418 419 420 status_t 421 Volume::ValidateBlockRun(block_run run) 422 { 423 if (run.AllocationGroup() < 0 || run.AllocationGroup() > (int32)AllocationGroups() 424 || run.Start() > (1UL << AllocationGroupShift()) 425 || run.length == 0 426 || uint32(run.Length() + run.Start()) > (1UL << AllocationGroupShift())) { 427 Panic(); 428 FATAL(("*** invalid run(%ld,%d,%d)\n", run.AllocationGroup(), run.Start(), run.Length())); 429 return B_BAD_DATA; 430 } 431 return B_OK; 432 } 433 434 435 block_run 436 Volume::ToBlockRun(off_t block) const 437 { 438 block_run run; 439 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(block >> AllocationGroupShift()); 440 run.start = HOST_ENDIAN_TO_BFS_INT16(block & ((1LL << AllocationGroupShift()) - 1)); 441 run.length = HOST_ENDIAN_TO_BFS_INT16(1); 442 return run; 443 } 444 445 446 status_t 447 Volume::CreateIndicesRoot(Transaction *transaction) 448 { 449 off_t id; 450 status_t status = Inode::Create(transaction, NULL, NULL, 451 S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, &id, &fIndicesNode); 452 if (status < B_OK) 453 RETURN_ERROR(status); 454 455 fSuperBlock.indices = ToBlockRun(id); 456 return WriteSuperBlock(); 457 } 458 459 460 status_t 461 Volume::AllocateForInode(Transaction *transaction, const Inode *parent, mode_t type, block_run &run) 462 { 463 return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), type, run); 464 } 465 466 467 status_t 468 Volume::WriteSuperBlock() 469 { 470 if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) != sizeof(disk_super_block)) 471 return B_IO_ERROR; 472 473 return B_OK; 474 } 475 476 477 void 478 Volume::UpdateLiveQueries(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 479 size_t oldLength, const uint8 *newKey, size_t newLength) 480 { 481 if (fQueryLock.Lock() < B_OK) 482 return; 483 484 Query *query = NULL; 485 while ((query = fQueries.Next(query)) != NULL) 486 query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, newLength); 487 488 fQueryLock.Unlock(); 489 } 490 491 492 /** Checks if there is a live query whose results depend on the presence 493 * or value of the specified attribute. 494 * Don't use it if you already have all the data together to evaluate 495 * the queries - it wouldn't safe you anything in this case. 496 */ 497 498 bool 499 Volume::CheckForLiveQuery(const char *attribute) 500 { 501 // ToDo: check for a live query that depends on the specified attribute 502 return true; 503 } 504 505 506 void 507 Volume::AddQuery(Query *query) 508 { 509 if (fQueryLock.Lock() < B_OK) 510 return; 511 512 fQueries.Add(query); 513 514 fQueryLock.Unlock(); 515 } 516 517 518 void 519 Volume::RemoveQuery(Query *query) 520 { 521 if (fQueryLock.Lock() < B_OK) 522 return; 523 524 fQueries.Remove(query); 525 526 fQueryLock.Unlock(); 527 } 528 529 530 // #pragma mark - 531 // Disk initialization 532 533 534 #ifdef USER 535 extern "C" void kill_device_vnodes(dev_t id); 536 // This call is only available in the userland fs_shell 537 538 status_t 539 Volume::Initialize(const char *device, const char *name, uint32 blockSize, uint32 flags) 540 { 541 // although there is no really good reason for it, we won't 542 // accept '/' in disk names (mkbfs does this, too - and since 543 // Tracker names mounted volumes like their name) 544 if (strchr(name, '/') != NULL) 545 return B_BAD_VALUE; 546 547 if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 && blockSize != 8192) 548 return B_BAD_VALUE; 549 550 DeviceOpener opener(device, O_RDWR); 551 if (opener.Device() < B_OK) 552 return B_BAD_VALUE; 553 554 fDevice = opener.Device(); 555 556 uint32 deviceBlockSize; 557 off_t deviceSize; 558 if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK) 559 return B_ERROR; 560 561 off_t numBlocks = deviceSize / blockSize; 562 563 // create valid super block 564 565 fSuperBlock.Initialize(name, numBlocks, blockSize); 566 567 // initialize short hands to the super block (to save byte swapping) 568 fBlockSize = fSuperBlock.BlockSize(); 569 fBlockShift = fSuperBlock.BlockShift(); 570 fAllocationGroupShift = fSuperBlock.AllocationGroupShift(); 571 572 // since the allocator has not been initialized yet, we 573 // cannot use BlockAllocator::BitmapSize() here 574 fSuperBlock.log_blocks = ToBlockRun(AllocationGroups() 575 * fSuperBlock.BlocksPerAllocationGroup() + 1); 576 fSuperBlock.log_blocks.length = 2048; 577 // ToDo: set the log size depending on the disk size 578 fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(ToBlock(Log())); 579 580 // set the current log pointers, so that journaling will work correctly 581 fLogStart = fSuperBlock.LogStart(); 582 fLogEnd = fSuperBlock.LogEnd(); 583 584 if (!IsValidSuperBlock()) 585 RETURN_ERROR(B_ERROR); 586 587 if (opener.InitCache(numBlocks) != B_OK) 588 return B_ERROR; 589 590 fJournal = new Journal(this); 591 if (fJournal == NULL || fJournal->InitCheck() < B_OK) 592 RETURN_ERROR(B_ERROR); 593 594 // ready to write data to disk 595 596 Transaction transaction(this, 0); 597 598 if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK) 599 RETURN_ERROR(B_ERROR); 600 601 off_t id; 602 status_t status = Inode::Create(&transaction, NULL, NULL, 603 S_DIRECTORY | 0755, 0, 0, &id, &fRootNode); 604 if (status < B_OK) 605 RETURN_ERROR(status); 606 607 fSuperBlock.root_dir = ToBlockRun(id); 608 609 if ((flags & VOLUME_NO_INDICES) == 0) { 610 // The indices root directory will be created automatically 611 // when the standard indices are created (or any other). 612 Index index(this); 613 status = index.Create(&transaction, "name", B_STRING_TYPE); 614 if (status < B_OK) 615 return status; 616 617 status = index.Create(&transaction, "last_modified", B_INT64_TYPE); 618 if (status < B_OK) 619 return status; 620 621 status = index.Create(&transaction, "size", B_INT64_TYPE); 622 if (status < B_OK) 623 return status; 624 } 625 626 WriteSuperBlock(); 627 transaction.Done(); 628 629 put_vnode(ID(), fRootNode->ID()); 630 if (fIndicesNode != NULL) 631 put_vnode(ID(), fIndicesNode->ID()); 632 633 kill_device_vnodes(ID()); 634 // This call is only available in the userland fs_shell 635 636 Sync(); 637 opener.RemoveCache(ALLOW_WRITES); 638 return B_OK; 639 } 640 #endif 641