xref: /haiku/src/add-ons/kernel/file_systems/bfs/Volume.cpp (revision b8a45b3a2df2379b4301bf3bd5949b9a105be4ba)
1 /*
2  * Copyright 2001-2019, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //! superblock, mounting, etc.
8 
9 
10 #include "Attribute.h"
11 #include "CheckVisitor.h"
12 #include "Debug.h"
13 #include "file_systems/DeviceOpener.h"
14 #include "Inode.h"
15 #include "Journal.h"
16 #include "Query.h"
17 #include "Volume.h"
18 
19 
20 static const int32 kDesiredAllocationGroups = 56;
21 	// This is the number of allocation groups that will be tried
22 	// to be given for newly initialized disks.
23 	// That's only relevant for smaller disks, though, since any
24 	// of today's disk sizes already reach the maximum length
25 	// of an allocation group (65536 blocks).
26 	// It seems to create appropriate numbers for smaller disks
27 	// with this setting, though (i.e. you can create a 400 MB
28 	// file on a 1 GB disk without the need for double indirect
29 	// blocks).
30 
31 
32 //	#pragma mark -
33 
34 
35 bool
36 disk_super_block::IsValid() const
37 {
38 	if (Magic1() != (int32)SUPER_BLOCK_MAGIC1
39 		|| Magic2() != (int32)SUPER_BLOCK_MAGIC2
40 		|| Magic3() != (int32)SUPER_BLOCK_MAGIC3
41 		|| (int32)block_size != inode_size
42 		|| ByteOrder() != SUPER_BLOCK_FS_LENDIAN
43 		|| (1UL << BlockShift()) != BlockSize()
44 		|| AllocationGroups() < 1
45 		|| AllocationGroupShift() < 1
46 		|| BlocksPerAllocationGroup() < 1
47 		|| NumBlocks() < 10
48 		|| AllocationGroups() != divide_roundup(NumBlocks(),
49 			1L << AllocationGroupShift()))
50 		return false;
51 
52 	return true;
53 }
54 
55 
56 void
57 disk_super_block::Initialize(const char* diskName, off_t numBlocks,
58 	uint32 blockSize)
59 {
60 	memset(this, 0, sizeof(disk_super_block));
61 
62 	magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1);
63 	magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2);
64 	magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3);
65 	fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN);
66 	flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN);
67 
68 	strlcpy(name, diskName, sizeof(name));
69 
70 	int32 blockShift = 9;
71 	while ((1UL << blockShift) < blockSize) {
72 		blockShift++;
73 	}
74 
75 	block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize);
76 	block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift);
77 
78 	num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks);
79 	used_blocks = 0;
80 
81 	// Get the minimum ag_shift (that's determined by the block size)
82 
83 	int32 bitsPerBlock = blockSize << 3;
84 	off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock;
85 	int32 blocksPerGroup = 1;
86 	int32 groupShift = 13;
87 
88 	for (int32 i = 8192; i < bitsPerBlock; i *= 2) {
89 		groupShift++;
90 	}
91 
92 	// Many allocation groups help applying allocation policies, but if
93 	// they are too small, we will need to many block_runs to cover large
94 	// files (see above to get an explanation of the kDesiredAllocationGroups
95 	// constant).
96 
97 	int32 numGroups;
98 
99 	while (true) {
100 		numGroups = (bitmapBlocks + blocksPerGroup - 1) / blocksPerGroup;
101 		if (numGroups > kDesiredAllocationGroups) {
102 			if (groupShift == 16)
103 				break;
104 
105 			groupShift++;
106 			blocksPerGroup *= 2;
107 		} else
108 			break;
109 	}
110 
111 	num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups);
112 	blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(blocksPerGroup);
113 	ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift);
114 }
115 
116 
117 //	#pragma mark -
118 
119 
120 Volume::Volume(fs_volume* volume)
121 	:
122 	fVolume(volume),
123 	fBlockAllocator(this),
124 	fRootNode(NULL),
125 	fIndicesNode(NULL),
126 	fDirtyCachedBlocks(0),
127 	fFlags(0),
128 	fCheckingThread(-1),
129 	fCheckVisitor(NULL)
130 {
131 	mutex_init(&fLock, "bfs volume");
132 	mutex_init(&fQueryLock, "bfs queries");
133 }
134 
135 
136 Volume::~Volume()
137 {
138 	mutex_destroy(&fQueryLock);
139 	mutex_destroy(&fLock);
140 }
141 
142 
143 bool
144 Volume::IsValidSuperBlock() const
145 {
146 	return fSuperBlock.IsValid();
147 }
148 
149 
150 /*!	Checks whether the given block number may be the location of an inode block.
151 */
152 bool
153 Volume::IsValidInodeBlock(off_t block) const
154 {
155 	return block > fSuperBlock.LogEnd() && block < NumBlocks();
156 }
157 
158 
159 void
160 Volume::Panic()
161 {
162 	FATAL(("Disk corrupted... switch to read-only mode!\n"));
163 	fFlags |= VOLUME_READ_ONLY;
164 #if KDEBUG
165 	kernel_debugger("BFS panics!");
166 #endif
167 }
168 
169 
170 status_t
171 Volume::Mount(const char* deviceName, uint32 flags)
172 {
173 	// TODO: validate the FS in write mode as well!
174 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \
175 	|| (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY))
176 	// in big endian mode, we only mount read-only for now
177 	flags |= B_MOUNT_READ_ONLY;
178 #endif
179 
180 	DeviceOpener opener(deviceName, (flags & B_MOUNT_READ_ONLY) != 0
181 		? O_RDONLY : O_RDWR);
182 	fDevice = opener.Device();
183 	if (fDevice < B_OK)
184 		RETURN_ERROR(fDevice);
185 
186 	if (opener.IsReadOnly())
187 		fFlags |= VOLUME_READ_ONLY;
188 
189 	// read the superblock
190 	if (Identify(fDevice, &fSuperBlock) != B_OK) {
191 		FATAL(("invalid superblock!\n"));
192 		return B_BAD_VALUE;
193 	}
194 
195 	// initialize short hands to the superblock (to save byte swapping)
196 	fBlockSize = fSuperBlock.BlockSize();
197 	fBlockShift = fSuperBlock.BlockShift();
198 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
199 
200 	// check if the device size is large enough to hold the file system
201 	off_t diskSize;
202 	if (opener.GetSize(&diskSize, &fDeviceBlockSize) != B_OK)
203 		RETURN_ERROR(B_ERROR);
204 	if (diskSize < (NumBlocks() << BlockShift())) {
205 		FATAL(("Disk size (%" B_PRIdOFF " bytes) < file system size (%"
206 			B_PRIdOFF " bytes)!\n", diskSize, NumBlocks() << BlockShift()));
207 		RETURN_ERROR(B_BAD_VALUE);
208 	}
209 
210 	// set the current log pointers, so that journaling will work correctly
211 	fLogStart = fSuperBlock.LogStart();
212 	fLogEnd = fSuperBlock.LogEnd();
213 
214 	if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
215 		return B_ERROR;
216 
217 	fJournal = new(std::nothrow) Journal(this);
218 	if (fJournal == NULL)
219 		return B_NO_MEMORY;
220 
221 	status_t status = fJournal->InitCheck();
222 	if (status < B_OK) {
223 		FATAL(("could not initialize journal: %s!\n", strerror(status)));
224 		return status;
225 	}
226 
227 	// replaying the log is the first thing we will do on this disk
228 	status = fJournal->ReplayLog();
229 	if (status != B_OK) {
230 		FATAL(("Replaying log failed, data may be corrupted, volume "
231 			"read-only.\n"));
232 		fFlags |= VOLUME_READ_ONLY;
233 			// TODO: if this is the boot volume, Bootscript will assume this
234 			// is a CD...
235 			// TODO: it would be nice to have a user visible alert instead
236 			// of letting him just find this in the syslog.
237 	}
238 
239 	status = fBlockAllocator.Initialize();
240 	if (status != B_OK) {
241 		FATAL(("could not initialize block bitmap allocator!\n"));
242 		return status;
243 	}
244 
245 	fRootNode = new(std::nothrow) Inode(this, ToVnode(Root()));
246 	if (fRootNode != NULL && fRootNode->InitCheck() == B_OK) {
247 		status = publish_vnode(fVolume, ToVnode(Root()), (void*)fRootNode,
248 			&gBFSVnodeOps, fRootNode->Mode(), 0);
249 		if (status == B_OK) {
250 			// try to get indices root dir
251 
252 			if (!Indices().IsZero()) {
253 				fIndicesNode = new(std::nothrow) Inode(this,
254 					ToVnode(Indices()));
255 			}
256 
257 			if (fIndicesNode == NULL
258 				|| fIndicesNode->InitCheck() < B_OK
259 				|| !fIndicesNode->IsContainer()) {
260 				INFORM(("bfs: volume doesn't have indices!\n"));
261 
262 				if (fIndicesNode) {
263 					// if this is the case, the index root node is gone bad,
264 					// and BFS switch to read-only mode
265 					fFlags |= VOLUME_READ_ONLY;
266 					delete fIndicesNode;
267 					fIndicesNode = NULL;
268 				}
269 			} else {
270 				// we don't use the vnode layer to access the indices node
271 			}
272 		} else {
273 			FATAL(("could not create root node: publish_vnode() failed!\n"));
274 			delete fRootNode;
275 			return status;
276 		}
277 	} else {
278 		status = B_BAD_VALUE;
279 		FATAL(("could not create root node!\n"));
280 
281 		// We need to wait for the block allocator to finish
282 		fBlockAllocator.Uninitialize();
283 		return status;
284 	}
285 
286 	// all went fine
287 	opener.Keep();
288 	return B_OK;
289 }
290 
291 
292 status_t
293 Volume::Unmount()
294 {
295 	put_vnode(fVolume, ToVnode(Root()));
296 
297 	fBlockAllocator.Uninitialize();
298 
299 	// This will also flush the log & all blocks to disk
300 	delete fJournal;
301 	fJournal = NULL;
302 
303 	delete fIndicesNode;
304 
305 	block_cache_delete(fBlockCache, !IsReadOnly());
306 	close(fDevice);
307 
308 	return B_OK;
309 }
310 
311 
312 status_t
313 Volume::Sync()
314 {
315 	return fJournal->FlushLogAndBlocks();
316 }
317 
318 
319 status_t
320 Volume::ValidateBlockRun(block_run run)
321 {
322 	if (run.AllocationGroup() < 0
323 		|| run.AllocationGroup() > (int32)AllocationGroups()
324 		|| run.Start() > (1UL << AllocationGroupShift())
325 		|| run.length == 0
326 		|| uint32(run.Length() + run.Start())
327 				> (1UL << AllocationGroupShift())) {
328 		Panic();
329 		FATAL(("*** invalid run(%d,%d,%d)\n", (int)run.AllocationGroup(),
330 			run.Start(), run.Length()));
331 		return B_BAD_DATA;
332 	}
333 	return B_OK;
334 }
335 
336 
337 block_run
338 Volume::ToBlockRun(off_t block) const
339 {
340 	block_run run;
341 	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(
342 		block >> AllocationGroupShift());
343 	run.start = HOST_ENDIAN_TO_BFS_INT16(
344 		block & ((1LL << AllocationGroupShift()) - 1));
345 	run.length = HOST_ENDIAN_TO_BFS_INT16(1);
346 	return run;
347 }
348 
349 
350 status_t
351 Volume::CreateIndicesRoot(Transaction& transaction)
352 {
353 	off_t id;
354 	status_t status = Inode::Create(transaction, NULL, NULL,
355 		S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, NULL, &id,
356 		&fIndicesNode, NULL, BFS_DO_NOT_PUBLISH_VNODE);
357 	if (status < B_OK)
358 		RETURN_ERROR(status);
359 
360 	fSuperBlock.indices = ToBlockRun(id);
361 	return WriteSuperBlock();
362 }
363 
364 
365 status_t
366 Volume::CreateVolumeID(Transaction& transaction)
367 {
368 	Attribute attr(fRootNode);
369 	status_t status;
370 	attr_cookie* cookie;
371 	status = attr.Create("be:volume_id", B_UINT64_TYPE, O_RDWR, &cookie);
372 	if (status == B_OK) {
373 		static bool seeded = false;
374 		if (!seeded) {
375 			// seed the random number generator for the be:volume_id attribute.
376 			srand(time(NULL));
377 			seeded = true;
378 		}
379 		uint64_t id;
380 		size_t length = sizeof(id);
381 		id = ((uint64_t)rand() << 32) | rand();
382 		attr.Write(transaction, cookie, 0, (uint8_t *)&id, &length, NULL);
383 	}
384 	return status;
385 }
386 
387 
388 
389 status_t
390 Volume::AllocateForInode(Transaction& transaction, const Inode* parent,
391 	mode_t type, block_run& run)
392 {
393 	return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(),
394 		type, run);
395 }
396 
397 
398 status_t
399 Volume::WriteSuperBlock()
400 {
401 	if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block))
402 			!= sizeof(disk_super_block))
403 		return B_IO_ERROR;
404 
405 	return B_OK;
406 }
407 
408 
409 void
410 Volume::UpdateLiveQueries(Inode* inode, const char* attribute, int32 type,
411 	const uint8* oldKey, size_t oldLength, const uint8* newKey,
412 	size_t newLength)
413 {
414 	MutexLocker _(fQueryLock);
415 
416 	DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
417 	while (iterator.HasNext()) {
418 		Query* query = iterator.Next();
419 		query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey,
420 			newLength);
421 	}
422 }
423 
424 
425 void
426 Volume::UpdateLiveQueriesRenameMove(Inode* inode, ino_t oldDirectoryID,
427 	const char* oldName, ino_t newDirectoryID, const char* newName)
428 {
429 	MutexLocker _(fQueryLock);
430 
431 	size_t oldLength = strlen(oldName);
432 	size_t newLength = strlen(newName);
433 
434 	DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
435 	while (iterator.HasNext()) {
436 		Query* query = iterator.Next();
437 		query->LiveUpdateRenameMove(inode, oldDirectoryID, oldName, oldLength,
438 			newDirectoryID, newName, newLength);
439 	}
440 }
441 
442 
443 /*!	Checks if there is a live query whose results depend on the presence
444 	or value of the specified attribute.
445 	Don't use it if you already have all the data together to evaluate
446 	the queries - it wouldn't safe you anything in this case.
447 */
448 bool
449 Volume::CheckForLiveQuery(const char* attribute)
450 {
451 	// TODO: check for a live query that depends on the specified attribute
452 	return true;
453 }
454 
455 
456 void
457 Volume::AddQuery(Query* query)
458 {
459 	MutexLocker _(fQueryLock);
460 	fQueries.Add(query);
461 }
462 
463 
464 void
465 Volume::RemoveQuery(Query* query)
466 {
467 	MutexLocker _(fQueryLock);
468 	fQueries.Remove(query);
469 }
470 
471 
472 status_t
473 Volume::CreateCheckVisitor()
474 {
475 	if (fCheckVisitor != NULL)
476 		return B_BUSY;
477 
478 	fCheckVisitor = new(std::nothrow) ::CheckVisitor(this);
479 	if (fCheckVisitor == NULL)
480 		return B_NO_MEMORY;
481 
482 	return B_OK;
483 }
484 
485 
486 void
487 Volume::DeleteCheckVisitor()
488 {
489 	delete fCheckVisitor;
490 	fCheckVisitor = NULL;
491 }
492 
493 
494 //	#pragma mark - Disk scanning and initialization
495 
496 
497 /*static*/ status_t
498 Volume::CheckSuperBlock(const uint8* data, uint32* _offset)
499 {
500 	disk_super_block* superBlock = (disk_super_block*)(data + 512);
501 	if (superBlock->IsValid()) {
502 		if (_offset != NULL)
503 			*_offset = 512;
504 		return B_OK;
505 	}
506 
507 #ifndef BFS_LITTLE_ENDIAN_ONLY
508 	// For PPC, the superblock might be located at offset 0
509 	superBlock = (disk_super_block*)data;
510 	if (superBlock->IsValid()) {
511 		if (_offset != NULL)
512 			*_offset = 0;
513 		return B_OK;
514 	}
515 #endif
516 
517 	return B_BAD_VALUE;
518 }
519 
520 
521 /*static*/ status_t
522 Volume::Identify(int fd, disk_super_block* superBlock)
523 {
524 	uint8 buffer[1024];
525 	if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer))
526 		return B_IO_ERROR;
527 
528 	uint32 offset;
529 	if (CheckSuperBlock(buffer, &offset) != B_OK)
530 		return B_BAD_VALUE;
531 
532 	memcpy(superBlock, buffer + offset, sizeof(disk_super_block));
533 	return B_OK;
534 }
535 
536 
537 status_t
538 Volume::Initialize(int fd, const char* name, uint32 blockSize,
539 	uint32 flags)
540 {
541 	// although there is no really good reason for it, we won't
542 	// accept '/' in disk names (mkbfs does this, too - and since
543 	// Tracker names mounted volumes like their name)
544 	if (strchr(name, '/') != NULL)
545 		return B_BAD_VALUE;
546 
547 	if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096
548 		&& blockSize != 8192)
549 		return B_BAD_VALUE;
550 
551 	DeviceOpener opener(fd, O_RDWR);
552 	if (opener.Device() < B_OK)
553 		return B_BAD_VALUE;
554 
555 	if (opener.IsReadOnly())
556 		return B_READ_ONLY_DEVICE;
557 
558 	fDevice = opener.Device();
559 
560 	uint32 deviceBlockSize;
561 	off_t deviceSize;
562 	if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK)
563 		return B_ERROR;
564 
565 	off_t numBlocks = deviceSize / blockSize;
566 
567 	// create valid superblock
568 
569 	fSuperBlock.Initialize(name, numBlocks, blockSize);
570 
571 	// initialize short hands to the superblock (to save byte swapping)
572 	fBlockSize = fSuperBlock.BlockSize();
573 	fBlockShift = fSuperBlock.BlockShift();
574 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
575 
576 	// determine log size depending on the size of the volume
577 	off_t logSize = 2048;
578 	if (numBlocks <= 20480)
579 		logSize = 512;
580 	if (deviceSize > 1LL * 1024 * 1024 * 1024)
581 		logSize = 4096;
582 
583 	// since the allocator has not been initialized yet, we
584 	// cannot use BlockAllocator::BitmapSize() here
585 	off_t bitmapBlocks = (numBlocks + blockSize * 8 - 1) / (blockSize * 8);
586 
587 	fSuperBlock.log_blocks = ToBlockRun(bitmapBlocks + 1);
588 	fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(logSize);
589 	fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(
590 		ToBlock(Log()));
591 
592 	// set the current log pointers, so that journaling will work correctly
593 	fLogStart = fSuperBlock.LogStart();
594 	fLogEnd = fSuperBlock.LogEnd();
595 
596 	if (!IsValidSuperBlock())
597 		RETURN_ERROR(B_ERROR);
598 
599 	if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
600 		return B_ERROR;
601 
602 	fJournal = new(std::nothrow) Journal(this);
603 	if (fJournal == NULL || fJournal->InitCheck() < B_OK)
604 		RETURN_ERROR(B_ERROR);
605 
606 	// ready to write data to disk
607 
608 	Transaction transaction(this, 0);
609 
610 	if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK)
611 		RETURN_ERROR(B_ERROR);
612 
613 	off_t id;
614 	status_t status = Inode::Create(transaction, NULL, NULL,
615 		S_DIRECTORY | 0755, 0, 0, NULL, &id, &fRootNode);
616 	if (status < B_OK)
617 		RETURN_ERROR(status);
618 
619 	fSuperBlock.root_dir = ToBlockRun(id);
620 
621 	if ((flags & VOLUME_NO_INDICES) == 0) {
622 		// The indices root directory will be created automatically
623 		// when the standard indices are created (or any other).
624 		Index index(this);
625 		status = index.Create(transaction, "name", B_STRING_TYPE);
626 		if (status < B_OK)
627 			return status;
628 
629 		status = index.Create(transaction, "BEOS:APP_SIG", B_STRING_TYPE);
630 		if (status < B_OK)
631 			return status;
632 
633 		status = index.Create(transaction, "last_modified", B_INT64_TYPE);
634 		if (status < B_OK)
635 			return status;
636 
637 		status = index.Create(transaction, "size", B_INT64_TYPE);
638 		if (status < B_OK)
639 			return status;
640 	}
641 
642 	status = CreateVolumeID(transaction);
643 	if (status < B_OK)
644 		return status;
645 
646 	status = _EraseUnusedBootBlock();
647 	if (status < B_OK)
648 		return status;
649 
650 	status = WriteSuperBlock();
651 	if (status < B_OK)
652 		return status;
653 
654 	status = transaction.Done();
655 	if (status < B_OK)
656 		return status;
657 
658 	Sync();
659 	opener.RemoveCache(true);
660 	return B_OK;
661 }
662 
663 
664 /*!	Erase the first boot block, as we don't use it and there
665  *	might be leftovers from other file systems. This can cause
666  *	confusion for identifying the partition if not erased.
667  */
668 status_t
669 Volume::_EraseUnusedBootBlock()
670 {
671 	const int32 blockSize = 512;
672 	const char emptySector[blockSize] = { 0 };
673 	// Erase boot block if any
674 	if (write_pos(fDevice, 0, emptySector, blockSize) != blockSize)
675 		return B_IO_ERROR;
676 	// Erase ext2 superblock if any
677 	if (write_pos(fDevice, 1024, emptySector, blockSize) != blockSize)
678 		return B_IO_ERROR;
679 
680 	return B_OK;
681 }
682