1 /*
2 * Copyright 2001-2019, Axel Dörfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
4 */
5
6
7 //! superblock, mounting, etc.
8
9
10 #include "Attribute.h"
11 #include "CheckVisitor.h"
12 #include "Debug.h"
13 #include "file_systems/DeviceOpener.h"
14 #include "Inode.h"
15 #include "Journal.h"
16 #include "Query.h"
17 #include "Volume.h"
18
19
20 static const int32 kDesiredAllocationGroups = 56;
21 // This is the number of allocation groups that will be tried
22 // to be given for newly initialized disks.
23 // That's only relevant for smaller disks, though, since any
24 // of today's disk sizes already reach the maximum length
25 // of an allocation group (65536 blocks).
26 // It seems to create appropriate numbers for smaller disks
27 // with this setting, though (i.e. you can create a 400 MB
28 // file on a 1 GB disk without the need for double indirect
29 // blocks).
30
31
32 // #pragma mark -
33
34
35 bool
IsValid() const36 disk_super_block::IsValid() const
37 {
38 if (Magic1() != (int32)SUPER_BLOCK_MAGIC1
39 || Magic2() != (int32)SUPER_BLOCK_MAGIC2
40 || Magic3() != (int32)SUPER_BLOCK_MAGIC3
41 || (int32)block_size != inode_size
42 || ByteOrder() != SUPER_BLOCK_FS_LENDIAN
43 || (1UL << BlockShift()) != BlockSize()
44 || AllocationGroups() < 1
45 || AllocationGroupShift() < 1
46 || BlocksPerAllocationGroup() < 1
47 || NumBlocks() < 10
48 || AllocationGroups() != divide_roundup(NumBlocks(),
49 1L << AllocationGroupShift()))
50 return false;
51
52 return true;
53 }
54
55
56 void
Initialize(const char * diskName,off_t numBlocks,uint32 blockSize)57 disk_super_block::Initialize(const char* diskName, off_t numBlocks,
58 uint32 blockSize)
59 {
60 memset(this, 0, sizeof(disk_super_block));
61
62 magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1);
63 magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2);
64 magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3);
65 fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN);
66 flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN);
67
68 strlcpy(name, diskName, sizeof(name));
69
70 int32 blockShift = 9;
71 while ((1UL << blockShift) < blockSize) {
72 blockShift++;
73 }
74
75 block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize);
76 block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift);
77
78 num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks);
79 used_blocks = 0;
80
81 // Get the minimum ag_shift (that's determined by the block size)
82
83 int32 bitsPerBlock = blockSize << 3;
84 off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock;
85 int32 bitmapBlocksPerGroup = 1;
86 int32 groupShift = 13;
87
88 for (int32 i = 8192; i < bitsPerBlock; i *= 2) {
89 groupShift++;
90 }
91
92 // Many allocation groups help applying allocation policies, but if
93 // they are too small, we will need to many block_runs to cover large
94 // files (see above to get an explanation of the kDesiredAllocationGroups
95 // constant).
96
97 int32 numGroups;
98
99 while (true) {
100 numGroups = (bitmapBlocks + bitmapBlocksPerGroup - 1) / bitmapBlocksPerGroup;
101 if (numGroups > kDesiredAllocationGroups) {
102 if (groupShift == 16)
103 break;
104
105 groupShift++;
106 bitmapBlocksPerGroup *= 2;
107 } else
108 break;
109 }
110
111 num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups);
112 // blocks_per_ag holds the number of bitmap blocks that are in each allocation group
113 blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(bitmapBlocksPerGroup);
114 ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift);
115 }
116
117
118 // #pragma mark -
119
120
Volume(fs_volume * volume)121 Volume::Volume(fs_volume* volume)
122 :
123 fVolume(volume),
124 fBlockAllocator(this),
125 fRootNode(NULL),
126 fIndicesNode(NULL),
127 fDirtyCachedBlocks(0),
128 fFlags(0),
129 fCheckingThread(-1),
130 fCheckVisitor(NULL)
131 {
132 mutex_init(&fLock, "bfs volume");
133 mutex_init(&fQueryLock, "bfs queries");
134 }
135
136
~Volume()137 Volume::~Volume()
138 {
139 mutex_destroy(&fQueryLock);
140 mutex_destroy(&fLock);
141 }
142
143
144 bool
IsValidSuperBlock() const145 Volume::IsValidSuperBlock() const
146 {
147 return fSuperBlock.IsValid();
148 }
149
150
151 /*! Checks whether the given block number may be the location of an inode block.
152 */
153 bool
IsValidInodeBlock(off_t block) const154 Volume::IsValidInodeBlock(off_t block) const
155 {
156 return block > fSuperBlock.LogEnd() && block < NumBlocks();
157 }
158
159
160 void
Panic()161 Volume::Panic()
162 {
163 FATAL(("Disk corrupted... switch to read-only mode!\n"));
164 fFlags |= VOLUME_READ_ONLY;
165 #if KDEBUG
166 kernel_debugger("BFS panics!");
167 #endif
168 }
169
170
171 status_t
Mount(const char * deviceName,uint32 flags)172 Volume::Mount(const char* deviceName, uint32 flags)
173 {
174 // TODO: validate the FS in write mode as well!
175 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \
176 || (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY))
177 // in big endian mode, we only mount read-only for now
178 flags |= B_MOUNT_READ_ONLY;
179 #endif
180
181 DeviceOpener opener(deviceName, (flags & B_MOUNT_READ_ONLY) != 0
182 ? O_RDONLY : O_RDWR);
183 fDevice = opener.Device();
184 if (fDevice < B_OK)
185 RETURN_ERROR(fDevice);
186
187 if (opener.IsReadOnly())
188 fFlags |= VOLUME_READ_ONLY;
189
190 // read the superblock
191 if (Identify(fDevice, &fSuperBlock) != B_OK) {
192 FATAL(("invalid superblock!\n"));
193 return B_BAD_VALUE;
194 }
195
196 // initialize short hands to the superblock (to save byte swapping)
197 fBlockSize = fSuperBlock.BlockSize();
198 fBlockShift = fSuperBlock.BlockShift();
199 fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
200
201 // check if the device size is large enough to hold the file system
202 off_t diskSize;
203 if (opener.GetSize(&diskSize, &fDeviceBlockSize) != B_OK)
204 RETURN_ERROR(B_ERROR);
205 if (diskSize < (NumBlocks() << BlockShift())) {
206 FATAL(("Disk size (%" B_PRIdOFF " bytes) < file system size (%"
207 B_PRIdOFF " bytes)!\n", diskSize, NumBlocks() << BlockShift()));
208 RETURN_ERROR(B_BAD_VALUE);
209 }
210
211 // set the current log pointers, so that journaling will work correctly
212 fLogStart = fSuperBlock.LogStart();
213 fLogEnd = fSuperBlock.LogEnd();
214
215 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
216 return B_ERROR;
217
218 fJournal = new(std::nothrow) Journal(this);
219 if (fJournal == NULL)
220 return B_NO_MEMORY;
221
222 status_t status = fJournal->InitCheck();
223 if (status < B_OK) {
224 FATAL(("could not initialize journal: %s!\n", strerror(status)));
225 return status;
226 }
227
228 // replaying the log is the first thing we will do on this disk
229 status = fJournal->ReplayLog();
230 if (status != B_OK) {
231 FATAL(("Replaying log failed, data may be corrupted, volume "
232 "read-only.\n"));
233 fFlags |= VOLUME_READ_ONLY;
234 // TODO: if this is the boot volume, Bootscript will assume this
235 // is a CD...
236 // TODO: it would be nice to have a user visible alert instead
237 // of letting him just find this in the syslog.
238 }
239
240 status = fBlockAllocator.Initialize();
241 if (status != B_OK) {
242 FATAL(("could not initialize block bitmap allocator!\n"));
243 return status;
244 }
245
246 fRootNode = new(std::nothrow) Inode(this, ToVnode(Root()));
247 if (fRootNode != NULL && fRootNode->InitCheck() == B_OK) {
248 status = publish_vnode(fVolume, ToVnode(Root()), (void*)fRootNode,
249 &gBFSVnodeOps, fRootNode->Mode(), 0);
250 if (status == B_OK) {
251 // try to get indices root dir
252
253 if (!Indices().IsZero()) {
254 fIndicesNode = new(std::nothrow) Inode(this,
255 ToVnode(Indices()));
256 }
257
258 if (fIndicesNode == NULL
259 || fIndicesNode->InitCheck() < B_OK
260 || !fIndicesNode->IsContainer()) {
261 INFORM(("bfs: volume doesn't have indices!\n"));
262
263 if (fIndicesNode) {
264 // if this is the case, the index root node is gone bad,
265 // and BFS switch to read-only mode
266 fFlags |= VOLUME_READ_ONLY;
267 delete fIndicesNode;
268 fIndicesNode = NULL;
269 }
270 } else {
271 // we don't use the vnode layer to access the indices node
272 }
273 } else {
274 FATAL(("could not create root node: publish_vnode() failed!\n"));
275 delete fRootNode;
276 return status;
277 }
278 } else {
279 status = B_BAD_VALUE;
280 FATAL(("could not create root node!\n"));
281
282 // We need to wait for the block allocator to finish
283 fBlockAllocator.Uninitialize();
284 return status;
285 }
286
287 // all went fine
288 opener.Keep();
289 return B_OK;
290 }
291
292
293 status_t
Unmount()294 Volume::Unmount()
295 {
296 put_vnode(fVolume, ToVnode(Root()));
297
298 fBlockAllocator.Uninitialize();
299
300 // This will also flush the log & all blocks to disk
301 delete fJournal;
302 fJournal = NULL;
303
304 delete fIndicesNode;
305
306 block_cache_delete(fBlockCache, !IsReadOnly());
307 close(fDevice);
308
309 return B_OK;
310 }
311
312
313 status_t
Sync()314 Volume::Sync()
315 {
316 return fJournal->FlushLogAndBlocks();
317 }
318
319
320 status_t
ValidateBlockRun(block_run run)321 Volume::ValidateBlockRun(block_run run)
322 {
323 if (run.AllocationGroup() < 0
324 || run.AllocationGroup() > (int32)AllocationGroups()
325 || run.Start() > (1UL << AllocationGroupShift())
326 || run.length == 0
327 || uint32(run.Length() + run.Start())
328 > (1UL << AllocationGroupShift())) {
329 Panic();
330 FATAL(("*** invalid run(%d,%d,%d)\n", (int)run.AllocationGroup(),
331 run.Start(), run.Length()));
332 return B_BAD_DATA;
333 }
334 return B_OK;
335 }
336
337
338 block_run
ToBlockRun(off_t block) const339 Volume::ToBlockRun(off_t block) const
340 {
341 block_run run;
342 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(
343 block >> AllocationGroupShift());
344 run.start = HOST_ENDIAN_TO_BFS_INT16(
345 block & ((1LL << AllocationGroupShift()) - 1));
346 run.length = HOST_ENDIAN_TO_BFS_INT16(1);
347 return run;
348 }
349
350
351 status_t
CreateIndicesRoot(Transaction & transaction)352 Volume::CreateIndicesRoot(Transaction& transaction)
353 {
354 off_t id;
355 status_t status = Inode::Create(transaction, NULL, NULL,
356 S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, NULL, &id,
357 &fIndicesNode, NULL, BFS_DO_NOT_PUBLISH_VNODE);
358 if (status < B_OK)
359 RETURN_ERROR(status);
360
361 fSuperBlock.indices = ToBlockRun(id);
362 return WriteSuperBlock();
363 }
364
365
366 status_t
CreateVolumeID(Transaction & transaction)367 Volume::CreateVolumeID(Transaction& transaction)
368 {
369 Attribute attr(fRootNode);
370 status_t status;
371 attr_cookie* cookie;
372 status = attr.Create("be:volume_id", B_UINT64_TYPE, O_RDWR, &cookie);
373 if (status == B_OK) {
374 static bool seeded = false;
375 if (!seeded) {
376 // seed the random number generator for the be:volume_id attribute.
377 srand(time(NULL));
378 seeded = true;
379 }
380 uint64_t id;
381 size_t length = sizeof(id);
382 id = ((uint64_t)rand() << 32) | rand();
383 attr.Write(transaction, cookie, 0, (uint8_t *)&id, &length, NULL);
384 }
385 return status;
386 }
387
388
389
390 status_t
AllocateForInode(Transaction & transaction,const Inode * parent,mode_t type,block_run & run)391 Volume::AllocateForInode(Transaction& transaction, const Inode* parent,
392 mode_t type, block_run& run)
393 {
394 return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(),
395 type, run);
396 }
397
398
399 status_t
WriteSuperBlock()400 Volume::WriteSuperBlock()
401 {
402 if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block))
403 != sizeof(disk_super_block))
404 return B_IO_ERROR;
405
406 return B_OK;
407 }
408
409
410 void
UpdateLiveQueries(Inode * inode,const char * attribute,int32 type,const uint8 * oldKey,size_t oldLength,const uint8 * newKey,size_t newLength)411 Volume::UpdateLiveQueries(Inode* inode, const char* attribute, int32 type,
412 const uint8* oldKey, size_t oldLength, const uint8* newKey,
413 size_t newLength)
414 {
415 MutexLocker _(fQueryLock);
416
417 DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
418 while (iterator.HasNext()) {
419 Query* query = iterator.Next();
420 query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey,
421 newLength);
422 }
423 }
424
425
426 void
UpdateLiveQueriesRenameMove(Inode * inode,ino_t oldDirectoryID,const char * oldName,ino_t newDirectoryID,const char * newName)427 Volume::UpdateLiveQueriesRenameMove(Inode* inode, ino_t oldDirectoryID,
428 const char* oldName, ino_t newDirectoryID, const char* newName)
429 {
430 MutexLocker _(fQueryLock);
431
432 size_t oldLength = strlen(oldName);
433 size_t newLength = strlen(newName);
434
435 DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
436 while (iterator.HasNext()) {
437 Query* query = iterator.Next();
438 query->LiveUpdateRenameMove(inode, oldDirectoryID, oldName, oldLength,
439 newDirectoryID, newName, newLength);
440 }
441 }
442
443
444 /*! Checks if there is a live query whose results depend on the presence
445 or value of the specified attribute.
446 Don't use it if you already have all the data together to evaluate
447 the queries - it wouldn't safe you anything in this case.
448 */
449 bool
CheckForLiveQuery(const char * attribute)450 Volume::CheckForLiveQuery(const char* attribute)
451 {
452 // TODO: check for a live query that depends on the specified attribute
453 return true;
454 }
455
456
457 void
AddQuery(Query * query)458 Volume::AddQuery(Query* query)
459 {
460 MutexLocker _(fQueryLock);
461 fQueries.Add(query);
462 }
463
464
465 void
RemoveQuery(Query * query)466 Volume::RemoveQuery(Query* query)
467 {
468 MutexLocker _(fQueryLock);
469 fQueries.Remove(query);
470 }
471
472
473 status_t
CreateCheckVisitor()474 Volume::CreateCheckVisitor()
475 {
476 if (fCheckVisitor != NULL)
477 return B_BUSY;
478
479 fCheckVisitor = new(std::nothrow) ::CheckVisitor(this);
480 if (fCheckVisitor == NULL)
481 return B_NO_MEMORY;
482
483 return B_OK;
484 }
485
486
487 void
DeleteCheckVisitor()488 Volume::DeleteCheckVisitor()
489 {
490 delete fCheckVisitor;
491 fCheckVisitor = NULL;
492 }
493
494
495 // #pragma mark - Disk scanning and initialization
496
497
498 /*static*/ status_t
CheckSuperBlock(const uint8 * data,uint32 * _offset)499 Volume::CheckSuperBlock(const uint8* data, uint32* _offset)
500 {
501 disk_super_block* superBlock = (disk_super_block*)(data + 512);
502 if (superBlock->IsValid()) {
503 if (_offset != NULL)
504 *_offset = 512;
505 return B_OK;
506 }
507
508 #ifndef BFS_LITTLE_ENDIAN_ONLY
509 // For PPC, the superblock might be located at offset 0
510 superBlock = (disk_super_block*)data;
511 if (superBlock->IsValid()) {
512 if (_offset != NULL)
513 *_offset = 0;
514 return B_OK;
515 }
516 #endif
517
518 return B_BAD_VALUE;
519 }
520
521
522 /*static*/ status_t
Identify(int fd,disk_super_block * superBlock)523 Volume::Identify(int fd, disk_super_block* superBlock)
524 {
525 uint8 buffer[1024];
526 if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer))
527 return B_IO_ERROR;
528
529 uint32 offset;
530 if (CheckSuperBlock(buffer, &offset) != B_OK)
531 return B_BAD_VALUE;
532
533 memcpy(superBlock, buffer + offset, sizeof(disk_super_block));
534 return B_OK;
535 }
536
537
538 status_t
Initialize(int fd,const char * name,uint32 blockSize,uint32 flags)539 Volume::Initialize(int fd, const char* name, uint32 blockSize,
540 uint32 flags)
541 {
542 // although there is no really good reason for it, we won't
543 // accept '/' in disk names (mkbfs does this, too - and since
544 // Tracker names mounted volumes like their name)
545 if (strchr(name, '/') != NULL)
546 return B_BAD_VALUE;
547
548 if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096
549 && blockSize != 8192)
550 return B_BAD_VALUE;
551
552 DeviceOpener opener(fd, O_RDWR);
553 if (opener.Device() < B_OK)
554 return B_BAD_VALUE;
555
556 if (opener.IsReadOnly())
557 return B_READ_ONLY_DEVICE;
558
559 fDevice = opener.Device();
560
561 uint32 deviceBlockSize;
562 off_t deviceSize;
563 if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK)
564 return B_ERROR;
565
566 off_t numBlocks = deviceSize / blockSize;
567
568 // create valid superblock
569
570 fSuperBlock.Initialize(name, numBlocks, blockSize);
571
572 // initialize short hands to the superblock (to save byte swapping)
573 fBlockSize = fSuperBlock.BlockSize();
574 fBlockShift = fSuperBlock.BlockShift();
575 fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
576
577 // determine log size depending on the size of the volume
578 off_t logSize = 2048;
579 if (numBlocks <= 20480)
580 logSize = 512;
581 if (deviceSize > 1LL * 1024 * 1024 * 1024)
582 logSize = 4096;
583
584 // since the allocator has not been initialized yet, we
585 // cannot use BlockAllocator::BitmapSize() here
586 off_t bitmapBlocks = (numBlocks + blockSize * 8 - 1) / (blockSize * 8);
587
588 fSuperBlock.log_blocks = ToBlockRun(bitmapBlocks + 1);
589 fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(logSize);
590 fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(
591 ToBlock(Log()));
592
593 // set the current log pointers, so that journaling will work correctly
594 fLogStart = fSuperBlock.LogStart();
595 fLogEnd = fSuperBlock.LogEnd();
596
597 if (!IsValidSuperBlock())
598 RETURN_ERROR(B_ERROR);
599
600 if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
601 return B_ERROR;
602
603 fJournal = new(std::nothrow) Journal(this);
604 if (fJournal == NULL || fJournal->InitCheck() < B_OK)
605 RETURN_ERROR(B_ERROR);
606
607 // ready to write data to disk
608
609 Transaction transaction(this, 0);
610
611 if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK)
612 RETURN_ERROR(B_ERROR);
613
614 off_t id;
615 status_t status = Inode::Create(transaction, NULL, NULL,
616 S_DIRECTORY | 0755, 0, 0, NULL, &id, &fRootNode);
617 if (status < B_OK)
618 RETURN_ERROR(status);
619
620 fSuperBlock.root_dir = ToBlockRun(id);
621
622 if ((flags & VOLUME_NO_INDICES) == 0) {
623 // The indices root directory will be created automatically
624 // when the standard indices are created (or any other).
625 Index index(this);
626 status = index.Create(transaction, "name", B_STRING_TYPE);
627 if (status < B_OK)
628 return status;
629
630 status = index.Create(transaction, "BEOS:APP_SIG", B_STRING_TYPE);
631 if (status < B_OK)
632 return status;
633
634 status = index.Create(transaction, "last_modified", B_INT64_TYPE);
635 if (status < B_OK)
636 return status;
637
638 status = index.Create(transaction, "size", B_INT64_TYPE);
639 if (status < B_OK)
640 return status;
641 }
642
643 status = CreateVolumeID(transaction);
644 if (status < B_OK)
645 return status;
646
647 status = _EraseUnusedBootBlock();
648 if (status < B_OK)
649 return status;
650
651 status = WriteSuperBlock();
652 if (status < B_OK)
653 return status;
654
655 status = transaction.Done();
656 if (status < B_OK)
657 return status;
658
659 Sync();
660 opener.RemoveCache(true);
661 return B_OK;
662 }
663
664
665 /*! Erase the first boot block, as we don't use it and there
666 * might be leftovers from other file systems. This can cause
667 * confusion for identifying the partition if not erased.
668 */
669 status_t
_EraseUnusedBootBlock()670 Volume::_EraseUnusedBootBlock()
671 {
672 const int32 blockSize = 512;
673 const char emptySector[blockSize] = { 0 };
674 // Erase boot block if any
675 if (write_pos(fDevice, 0, emptySector, blockSize) != blockSize)
676 return B_IO_ERROR;
677 // Erase ext2 superblock if any
678 if (write_pos(fDevice, 1024, emptySector, blockSize) != blockSize)
679 return B_IO_ERROR;
680
681 return B_OK;
682 }
683