1 /* 2 * Copyright 2019, Les De Ridder, les@lesderid.net 3 * Copyright 2017, Chế Vũ Gia Hy, cvghy116@gmail.com. 4 * Copyright 2011, Jérôme Duval, korli@users.berlios.de. 5 * Copyright 2008-2010, Axel Dörfler, axeld@pinc-software.de. 6 * 7 * This file may be used under the terms of the MIT License. 8 */ 9 10 11 //! Superblock, mounting, etc. 12 13 14 #include "Volume.h" 15 #include "BTree.h" 16 #include "CachedBlock.h" 17 #include "Chunk.h" 18 #include "DebugSupport.h" 19 #include "ExtentAllocator.h" 20 #include "Inode.h" 21 #include "Journal.h" 22 #include "Utility.h" 23 24 25 //#define TRACE_BTRFS 26 #ifdef TRACE_BTRFS 27 # define TRACE(x...) dprintf("\33[34mbtrfs:\33[0m " x) 28 #else 29 # define TRACE(x...) ; 30 #endif 31 32 33 class DeviceOpener { 34 public: 35 DeviceOpener(int fd, int mode); 36 DeviceOpener(const char* device, int mode); 37 ~DeviceOpener(); 38 39 int Open(const char* device, int mode); 40 int Open(int fd, int mode); 41 void* InitCache(off_t numBlocks, uint32 blockSize); 42 void RemoveCache(bool allowWrites); 43 44 void Keep(); 45 46 int Device() const { return fDevice; } 47 int Mode() const { return fMode; } 48 bool IsReadOnly() const 49 { return _IsReadOnly(fMode); } 50 51 status_t GetSize(off_t* _size, uint32* _blockSize = NULL); 52 53 private: 54 static bool _IsReadOnly(int mode) 55 { return (mode & O_RWMASK) == O_RDONLY;} 56 static bool _IsReadWrite(int mode) 57 { return (mode & O_RWMASK) == O_RDWR;} 58 59 int fDevice; 60 int fMode; 61 void* fBlockCache; 62 }; 63 64 65 DeviceOpener::DeviceOpener(const char* device, int mode) 66 : 67 fBlockCache(NULL) 68 { 69 Open(device, mode); 70 } 71 72 73 DeviceOpener::DeviceOpener(int fd, int mode) 74 : 75 fBlockCache(NULL) 76 { 77 Open(fd, mode); 78 } 79 80 81 DeviceOpener::~DeviceOpener() 82 { 83 if (fDevice >= 0) { 84 RemoveCache(false); 85 close(fDevice); 86 } 87 } 88 89 90 int 91 DeviceOpener::Open(const char* device, int mode) 92 { 93 fDevice = open(device, mode | O_NOCACHE); 94 if (fDevice < 0) 95 fDevice = errno; 96 97 if (fDevice < 0 && _IsReadWrite(mode)) { 98 // try again to open read-only (don't rely on a specific error code) 99 return Open(device, O_RDONLY | O_NOCACHE); 100 } 101 102 if (fDevice >= 0) { 103 // opening succeeded 104 fMode = mode; 105 if (_IsReadWrite(mode)) { 106 // check out if the device really allows for read/write access 107 device_geometry geometry; 108 if (!ioctl(fDevice, B_GET_GEOMETRY, &geometry, sizeof(device_geometry))) { 109 if (geometry.read_only) { 110 // reopen device read-only 111 close(fDevice); 112 return Open(device, O_RDONLY | O_NOCACHE); 113 } 114 } 115 } 116 } 117 118 return fDevice; 119 } 120 121 122 int 123 DeviceOpener::Open(int fd, int mode) 124 { 125 fDevice = dup(fd); 126 if (fDevice < 0) 127 return errno; 128 129 fMode = mode; 130 131 return fDevice; 132 } 133 134 135 void* 136 DeviceOpener::InitCache(off_t numBlocks, uint32 blockSize) 137 { 138 return fBlockCache = block_cache_create(fDevice, numBlocks, blockSize, 139 IsReadOnly()); 140 } 141 142 143 void 144 DeviceOpener::RemoveCache(bool allowWrites) 145 { 146 if (fBlockCache == NULL) 147 return; 148 149 block_cache_delete(fBlockCache, allowWrites); 150 fBlockCache = NULL; 151 } 152 153 154 void 155 DeviceOpener::Keep() 156 { 157 fDevice = -1; 158 } 159 160 161 /*! Returns the size of the device in bytes. It uses B_GET_GEOMETRY 162 to compute the size, or fstat() if that failed. 163 */ 164 status_t 165 DeviceOpener::GetSize(off_t* _size, uint32* _blockSize) 166 { 167 device_geometry geometry; 168 if (ioctl(fDevice, B_GET_GEOMETRY, &geometry, sizeof(device_geometry)) < 0) { 169 // maybe it's just a file 170 struct stat stat; 171 if (fstat(fDevice, &stat) < 0) 172 return B_ERROR; 173 174 if (_size) 175 *_size = stat.st_size; 176 if (_blockSize) // that shouldn't cause us any problems 177 *_blockSize = 512; 178 179 return B_OK; 180 } 181 182 if (_size) { 183 *_size = 1ULL * geometry.head_count * geometry.cylinder_count 184 * geometry.sectors_per_track * geometry.bytes_per_sector; 185 } 186 if (_blockSize) 187 *_blockSize = geometry.bytes_per_sector; 188 189 return B_OK; 190 } 191 192 193 // #pragma mark - 194 195 196 bool 197 btrfs_super_block::IsValid() const 198 { 199 // TODO: check some more values! 200 if (strncmp(magic, BTRFS_SUPER_BLOCK_MAGIC, sizeof(magic)) != 0) 201 return false; 202 203 return true; 204 } 205 206 207 void 208 btrfs_super_block::Initialize(const char* name, off_t numBlocks, 209 uint32 blockSize, uint32 sectorSize) 210 { 211 memset(this, 0, sizeof(btrfs_super_block)); 212 213 uuid_generate(fsid); 214 blocknum = B_HOST_TO_LENDIAN_INT64(BTRFS_SUPER_BLOCK_OFFSET); 215 num_devices = B_HOST_TO_LENDIAN_INT64(1); 216 strncpy(magic, BTRFS_SUPER_BLOCK_MAGIC_TEMPORARY, sizeof(magic)); 217 generation = B_HOST_TO_LENDIAN_INT64(1); 218 root = B_HOST_TO_LENDIAN_INT64(BTRFS_RESERVED_SPACE_OFFSET + blockSize); 219 chunk_root = B_HOST_TO_LENDIAN_INT64(Root() + blockSize); 220 total_size = B_HOST_TO_LENDIAN_INT64(numBlocks * blockSize); 221 used_size = B_HOST_TO_LENDIAN_INT64(6 * blockSize); 222 sector_size = B_HOST_TO_LENDIAN_INT32(sectorSize); 223 leaf_size = B_HOST_TO_LENDIAN_INT32(blockSize); 224 node_size = B_HOST_TO_LENDIAN_INT32(blockSize); 225 stripe_size = B_HOST_TO_LENDIAN_INT32(blockSize); 226 checksum_type = B_HOST_TO_LENDIAN_INT32(BTRFS_CSUM_TYPE_CRC32); 227 chunk_root_generation = B_HOST_TO_LENDIAN_INT64(1); 228 // TODO(lesderid): Support configurable filesystem features 229 incompat_flags = B_HOST_TO_LENDIAN_INT64(0); 230 strlcpy(label, name, BTRFS_LABEL_SIZE); 231 } 232 233 234 // #pragma mark - 235 236 237 Volume::Volume(fs_volume* volume) 238 : 239 fFSVolume(volume), 240 fFlags(0), 241 fChunk(NULL), 242 fChunkTree(NULL) 243 { 244 mutex_init(&fLock, "btrfs volume"); 245 } 246 247 248 Volume::~Volume() 249 { 250 TRACE("Volume destructor.\n"); 251 } 252 253 254 bool 255 Volume::IsValidSuperBlock() 256 { 257 return fSuperBlock.IsValid(); 258 } 259 260 261 const char* 262 Volume::Name() const 263 { 264 if (fSuperBlock.label[0]) 265 return fSuperBlock.label; 266 267 return fName; 268 } 269 270 271 status_t 272 Volume::Mount(const char* deviceName, uint32 flags) 273 { 274 flags |= B_MOUNT_READ_ONLY; 275 // we only support read-only for now 276 277 if ((flags & B_MOUNT_READ_ONLY) != 0) { 278 TRACE("Volume::Mount(): Read only\n"); 279 } else { 280 TRACE("Volume::Mount(): Read write\n"); 281 } 282 283 DeviceOpener opener(deviceName, (flags & B_MOUNT_READ_ONLY) != 0 284 ? O_RDONLY : O_RDWR); 285 fDevice = opener.Device(); 286 if (fDevice < B_OK) { 287 ERROR("Volume::Mount(): couldn't open device\n"); 288 return fDevice; 289 } 290 291 if (opener.IsReadOnly()) 292 fFlags |= VOLUME_READ_ONLY; 293 294 // read the superblock 295 status_t status = Identify(fDevice, &fSuperBlock); 296 if (status != B_OK) { 297 ERROR("Volume::Mount(): Identify() failed\n"); 298 return status; 299 } 300 301 fBlockSize = fSuperBlock.BlockSize(); 302 fSectorSize = fSuperBlock.SectorSize(); 303 TRACE("block size %" B_PRIu32 "\n", fBlockSize); 304 TRACE("sector size %" B_PRIu32 "\n", fSectorSize); 305 306 uint8* start = (uint8*)&fSuperBlock.system_chunk_array[0]; 307 uint8* end = (uint8*)&fSuperBlock.system_chunk_array[2048]; 308 while (start < end) { 309 btrfs_key* key = (btrfs_key*)start; 310 TRACE("system_chunk_array object_id 0x%" B_PRIx64 " offset 0x%" 311 B_PRIx64 " type 0x%x\n", key->ObjectID(), key->Offset(), 312 key->Type()); 313 if (key->Type() != BTRFS_KEY_TYPE_CHUNK_ITEM) { 314 break; 315 } 316 317 btrfs_chunk* chunk = (btrfs_chunk*)(key + 1); 318 fChunk = new(std::nothrow) Chunk(chunk, key->Offset()); 319 if (fChunk == NULL) 320 return B_ERROR; 321 start += sizeof(btrfs_key) + fChunk->Size(); 322 } 323 324 // check if the device size is large enough to hold the file system 325 off_t diskSize; 326 status = opener.GetSize(&diskSize); 327 if (status != B_OK) 328 return status; 329 if (diskSize < (off_t)fSuperBlock.TotalSize()) 330 return B_BAD_VALUE; 331 332 fBlockCache = opener.InitCache(fSuperBlock.TotalSize() / fBlockSize, 333 fBlockSize); 334 if (fBlockCache == NULL) 335 return B_ERROR; 336 337 TRACE("Volume::Mount(): Initialized block cache: %p\n", fBlockCache); 338 339 fChunkTree = new(std::nothrow) BTree(this); 340 if (fChunkTree == NULL) 341 return B_NO_MEMORY; 342 fChunkTree->SetRoot(fSuperBlock.ChunkRoot(), NULL); 343 TRACE("Volume::Mount() chunk_root: %" B_PRIu64 " (physical block %" B_PRIu64 344 ")\n", fSuperBlock.ChunkRoot(), fChunkTree->RootBlock()); 345 346 fRootTree = new(std::nothrow) BTree(this); 347 if (fRootTree == NULL) 348 return B_NO_MEMORY; 349 fRootTree->SetRoot(fSuperBlock.Root(), NULL); 350 TRACE("Volume::Mount() root: %" B_PRIu64 " (physical block %" B_PRIu64 ")\n", 351 fSuperBlock.Root(), fRootTree->RootBlock()); 352 353 BTree::Path path(fRootTree); 354 355 TRACE("Volume::Mount(): Searching extent root\n"); 356 btrfs_key search_key; 357 search_key.SetOffset(0); 358 search_key.SetType(BTRFS_KEY_TYPE_ROOT_ITEM); 359 search_key.SetObjectID(BTRFS_OBJECT_ID_EXTENT_TREE); 360 btrfs_root* root; 361 status = fRootTree->FindExact(&path, search_key, (void**)&root); 362 if (status != B_OK) { 363 ERROR("Volume::Mount(): Couldn't find extent root\n"); 364 return status; 365 } 366 TRACE("Volume::Mount(): Found extent root: %" B_PRIu64 "\n", 367 root->LogicalAddress()); 368 fExtentTree = new(std::nothrow) BTree(this); 369 if (fExtentTree == NULL) 370 return B_NO_MEMORY; 371 fExtentTree->SetRoot(root->LogicalAddress(), NULL); 372 free(root); 373 374 TRACE("Volume::Mount(): Searching fs root\n"); 375 search_key.SetOffset(0); 376 search_key.SetObjectID(BTRFS_OBJECT_ID_FS_TREE); 377 status = fRootTree->FindExact(&path, search_key, (void**)&root); 378 if (status != B_OK) { 379 ERROR("Volume::Mount(): Couldn't find fs root\n"); 380 return status; 381 } 382 TRACE("Volume::Mount(): Found fs root: %" B_PRIu64 "\n", 383 root->LogicalAddress()); 384 fFSTree = new(std::nothrow) BTree(this); 385 if (fFSTree == NULL) 386 return B_NO_MEMORY; 387 fFSTree->SetRoot(root->LogicalAddress(), NULL); 388 free(root); 389 390 TRACE("Volume::Mount(): Searching dev root\n"); 391 search_key.SetOffset(0); 392 search_key.SetObjectID(BTRFS_OBJECT_ID_DEV_TREE); 393 status = fRootTree->FindExact(&path, search_key, (void**)&root); 394 if (status != B_OK) { 395 ERROR("Volume::Mount(): Couldn't find dev root\n"); 396 return status; 397 } 398 TRACE("Volume::Mount(): Found dev root: %" B_PRIu64 "\n", 399 root->LogicalAddress()); 400 fDevTree = new(std::nothrow) BTree(this); 401 if (fDevTree == NULL) 402 return B_NO_MEMORY; 403 fDevTree->SetRoot(root->LogicalAddress(), NULL); 404 free(root); 405 406 TRACE("Volume::Mount(): Searching checksum root\n"); 407 search_key.SetOffset(0); 408 search_key.SetObjectID(BTRFS_OBJECT_ID_CHECKSUM_TREE); 409 status = fRootTree->FindExact(&path, search_key, (void**)&root); 410 if (status != B_OK) { 411 ERROR("Volume::Mount(): Couldn't find checksum root\n"); 412 return status; 413 } 414 TRACE("Volume::Mount(): Found checksum root: %" B_PRIu64 "\n", 415 root->LogicalAddress()); 416 fChecksumTree = new(std::nothrow) BTree(this); 417 if (fChecksumTree == NULL) 418 return B_NO_MEMORY; 419 fChecksumTree->SetRoot(root->LogicalAddress(), NULL); 420 free(root); 421 422 search_key.SetObjectID(-1); 423 search_key.SetType(0); 424 status = fFSTree->FindPrevious(&path, search_key, NULL); 425 if (status != B_OK) { 426 ERROR("Volume::Mount() Couldn't find any inode!!\n"); 427 return status; 428 } 429 fLargestInodeID = search_key.ObjectID(); 430 TRACE("Volume::Mount() Find larget inode id % " B_PRIu64 "\n", 431 fLargestInodeID); 432 433 if ((flags & B_MOUNT_READ_ONLY) != 0) { 434 fJournal = NULL; 435 fExtentAllocator = NULL; 436 } else { 437 // Initialize Journal 438 fJournal = new(std::nothrow) Journal(this); 439 if (fJournal == NULL) 440 return B_NO_MEMORY; 441 442 // Initialize ExtentAllocator; 443 fExtentAllocator = new(std::nothrow) ExtentAllocator(this); 444 if (fExtentAllocator == NULL) 445 return B_NO_MEMORY; 446 status = fExtentAllocator->Initialize(); 447 if (status != B_OK) { 448 ERROR("could not initalize extent allocator!\n"); 449 return status; 450 } 451 } 452 453 // ready 454 status = get_vnode(fFSVolume, BTRFS_FIRST_SUBVOLUME, 455 (void**)&fRootNode); 456 if (status != B_OK) { 457 ERROR("could not create root node: get_vnode() failed!\n"); 458 return status; 459 } 460 461 TRACE("Volume::Mount(): Found root node: %" B_PRIu64 " (%s)\n", 462 fRootNode->ID(), strerror(fRootNode->InitCheck())); 463 464 // all went fine 465 opener.Keep(); 466 467 if (!fSuperBlock.label[0]) { 468 // generate a more or less descriptive volume name 469 off_t divisor = 1ULL << 40; 470 char unit = 'T'; 471 if (diskSize < divisor) { 472 divisor = 1UL << 30; 473 unit = 'G'; 474 if (diskSize < divisor) { 475 divisor = 1UL << 20; 476 unit = 'M'; 477 } 478 } 479 480 double size = double((10 * diskSize + divisor - 1) / divisor); 481 // %g in the kernel does not support precision... 482 483 snprintf(fName, sizeof(fName), "%g %cB Btrfs Volume", 484 size / 10, unit); 485 } 486 487 return B_OK; 488 } 489 490 491 status_t 492 Volume::Initialize(int fd, const char* label, uint32 blockSize, 493 uint32 sectorSize) 494 { 495 TRACE("Volume::Initialize()\n"); 496 497 // label must != NULL and may not contain '/' or '\\' 498 if (label == NULL 499 || strchr(label, '/') != NULL || strchr(label, '\\') != NULL) { 500 return B_BAD_VALUE; 501 } 502 503 if ((blockSize != 1024 && blockSize != 2048 && blockSize != 4096 504 && blockSize != 8192 && blockSize != 16384) 505 || blockSize % sectorSize != 0) { 506 return B_BAD_VALUE; 507 } 508 509 DeviceOpener opener(fd, O_RDWR); 510 if (opener.Device() < B_OK) 511 return B_BAD_VALUE; 512 513 if (opener.IsReadOnly()) 514 return B_READ_ONLY_DEVICE; 515 516 fDevice = opener.Device(); 517 518 uint32 deviceBlockSize; 519 off_t deviceSize; 520 if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK) 521 return B_ERROR; 522 off_t numBlocks = deviceSize / sectorSize; 523 524 // create valid superblock 525 526 fSuperBlock.Initialize(label, numBlocks, blockSize, sectorSize); 527 528 fBlockSize = fSuperBlock.BlockSize(); 529 fSectorSize = fSuperBlock.SectorSize(); 530 531 // TODO(lesderid): Initialize remaining core structures 532 // (extent tree, chunk tree, fs tree, etc.) 533 534 status_t status = WriteSuperBlock(); 535 if (status < B_OK) 536 return status; 537 538 fBlockCache = opener.InitCache(fSuperBlock.TotalSize() / fBlockSize, 539 fBlockSize); 540 if (fBlockCache == NULL) 541 return B_ERROR; 542 543 fJournal = new(std::nothrow) Journal(this); 544 if (fJournal == NULL) 545 RETURN_ERROR(B_ERROR); 546 547 // TODO(lesderid): Perform secondary initialization (in transactions) 548 // (add block groups to extent tree, create root dir, etc.) 549 Transaction transaction(this); 550 551 // TODO(lesderid): Write all superblocks when transactions are committed 552 status = transaction.Done(); 553 if (status < B_OK) 554 return status; 555 556 opener.RemoveCache(true); 557 558 TRACE("Volume::Initialize(): Done\n"); 559 return B_OK; 560 } 561 562 563 status_t 564 Volume::Unmount() 565 { 566 TRACE("Volume::Unmount()\n"); 567 delete fRootTree; 568 delete fExtentTree; 569 delete fChunkTree; 570 delete fChecksumTree; 571 delete fFSTree; 572 delete fDevTree; 573 delete fJournal; 574 delete fExtentAllocator; 575 fRootTree = NULL; 576 fExtentTree = NULL; 577 fChunkTree = NULL; 578 fChecksumTree = NULL; 579 fFSTree = NULL; 580 fDevTree = NULL; 581 fJournal = NULL; 582 fExtentAllocator = NULL; 583 584 TRACE("Volume::Unmount(): Putting root node\n"); 585 put_vnode(fFSVolume, RootNode()->ID()); 586 TRACE("Volume::Unmount(): Deleting the block cache\n"); 587 block_cache_delete(fBlockCache, !IsReadOnly()); 588 TRACE("Volume::Unmount(): Closing device\n"); 589 close(fDevice); 590 591 TRACE("Volume::Unmount(): Done\n"); 592 return B_OK; 593 } 594 595 596 status_t 597 Volume::LoadSuperBlock() 598 { 599 CachedBlock cached(this); 600 const uint8* block = cached.SetTo(BTRFS_SUPER_BLOCK_OFFSET / fBlockSize); 601 602 if (block == NULL) 603 return B_IO_ERROR; 604 605 memcpy(&fSuperBlock, block + BTRFS_SUPER_BLOCK_OFFSET % fBlockSize, 606 sizeof(fSuperBlock)); 607 608 return B_OK; 609 } 610 611 612 status_t 613 Volume::FindBlock(off_t logical, fsblock_t& physicalBlock) 614 { 615 off_t physical; 616 status_t status = FindBlock(logical, physical); 617 if (status != B_OK) 618 return status; 619 physicalBlock = physical / fBlockSize; 620 return B_OK; 621 } 622 623 624 status_t 625 Volume::FindBlock(off_t logical, off_t& physical) 626 { 627 if (fChunkTree == NULL 628 || (logical >= (off_t)fChunk->Offset() 629 && logical < (off_t)fChunk->End())) { 630 // try with fChunk 631 return fChunk->FindBlock(logical, physical); 632 } 633 634 btrfs_key search_key; 635 search_key.SetOffset(logical); 636 search_key.SetType(BTRFS_KEY_TYPE_CHUNK_ITEM); 637 search_key.SetObjectID(BTRFS_OBJECT_ID_FIRST_CHUNK_TREE); 638 btrfs_chunk* chunk; 639 BTree::Path path(fChunkTree); 640 status_t status = fChunkTree->FindPrevious(&path, search_key, 641 (void**)&chunk); 642 if (status != B_OK) 643 return status; 644 645 Chunk _chunk(chunk, search_key.Offset()); 646 free(chunk); 647 status = _chunk.FindBlock(logical, physical); 648 if (status != B_OK) 649 return status; 650 TRACE("Volume::FindBlock(): logical: %" B_PRIdOFF ", physical: %" B_PRIdOFF 651 "\n", logical, physical); 652 return B_OK; 653 } 654 655 656 status_t 657 Volume::WriteSuperBlock() 658 { 659 // TODO(lesderid): Calculate checksum 660 661 if (write_pos(fDevice, BTRFS_SUPER_BLOCK_OFFSET, &fSuperBlock, 662 sizeof(btrfs_super_block)) 663 != sizeof(btrfs_super_block)) 664 return B_IO_ERROR; 665 666 return B_OK; 667 } 668 669 670 /* Wrapper function for allocating new block 671 */ 672 status_t 673 Volume::GetNewBlock(uint64& logical, fsblock_t& physical, uint64 start, 674 uint64 flags) 675 { 676 status_t status = fExtentAllocator->AllocateTreeBlock(logical, start, flags); 677 if (status != B_OK) 678 return status; 679 680 return FindBlock(logical, physical); 681 } 682 683 684 // #pragma mark - Disk scanning and initialization 685 686 687 /*static*/ status_t 688 Volume::Identify(int fd, btrfs_super_block* superBlock) 689 { 690 if (read_pos(fd, BTRFS_SUPER_BLOCK_OFFSET, superBlock, 691 sizeof(btrfs_super_block)) != sizeof(btrfs_super_block)) 692 return B_IO_ERROR; 693 694 if (!superBlock->IsValid()) { 695 ERROR("invalid superblock!\n"); 696 return B_BAD_VALUE; 697 } 698 699 return B_OK; 700 } 701 702