xref: /haiku/src/add-ons/kernel/file_systems/bfs/Volume.cpp (revision 344ded80d400028c8f561b4b876257b94c12db4a)
1 /*
2  * Copyright 2001-2019, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //! superblock, mounting, etc.
8 
9 
10 #include "Attribute.h"
11 #include "CheckVisitor.h"
12 #include "Debug.h"
13 #include "file_systems/DeviceOpener.h"
14 #include "Inode.h"
15 #include "Journal.h"
16 #include "Query.h"
17 #include "Volume.h"
18 
19 
20 static const int32 kDesiredAllocationGroups = 56;
21 	// This is the number of allocation groups that will be tried
22 	// to be given for newly initialized disks.
23 	// That's only relevant for smaller disks, though, since any
24 	// of today's disk sizes already reach the maximum length
25 	// of an allocation group (65536 blocks).
26 	// It seems to create appropriate numbers for smaller disks
27 	// with this setting, though (i.e. you can create a 400 MB
28 	// file on a 1 GB disk without the need for double indirect
29 	// blocks).
30 
31 
32 //	#pragma mark -
33 
34 
35 bool
36 disk_super_block::IsValid() const
37 {
38 	if (Magic1() != (int32)SUPER_BLOCK_MAGIC1
39 		|| Magic2() != (int32)SUPER_BLOCK_MAGIC2
40 		|| Magic3() != (int32)SUPER_BLOCK_MAGIC3
41 		|| (int32)block_size != inode_size
42 		|| ByteOrder() != SUPER_BLOCK_FS_LENDIAN
43 		|| (1UL << BlockShift()) != BlockSize()
44 		|| AllocationGroups() < 1
45 		|| AllocationGroupShift() < 1
46 		|| BlocksPerAllocationGroup() < 1
47 		|| NumBlocks() < 10
48 		|| AllocationGroups() != divide_roundup(NumBlocks(),
49 			1L << AllocationGroupShift()))
50 		return false;
51 
52 	return true;
53 }
54 
55 
56 void
57 disk_super_block::Initialize(const char* diskName, off_t numBlocks,
58 	uint32 blockSize)
59 {
60 	memset(this, 0, sizeof(disk_super_block));
61 
62 	magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1);
63 	magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2);
64 	magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3);
65 	fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN);
66 	flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN);
67 
68 	strlcpy(name, diskName, sizeof(name));
69 
70 	int32 blockShift = 9;
71 	while ((1UL << blockShift) < blockSize) {
72 		blockShift++;
73 	}
74 
75 	block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize);
76 	block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift);
77 
78 	num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks);
79 	used_blocks = 0;
80 
81 	// Get the minimum ag_shift (that's determined by the block size)
82 
83 	int32 bitsPerBlock = blockSize << 3;
84 	off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock;
85 	int32 bitmapBlocksPerGroup = 1;
86 	int32 groupShift = 13;
87 
88 	for (int32 i = 8192; i < bitsPerBlock; i *= 2) {
89 		groupShift++;
90 	}
91 
92 	// Many allocation groups help applying allocation policies, but if
93 	// they are too small, we will need to many block_runs to cover large
94 	// files (see above to get an explanation of the kDesiredAllocationGroups
95 	// constant).
96 
97 	int32 numGroups;
98 
99 	while (true) {
100 		numGroups = (bitmapBlocks + bitmapBlocksPerGroup - 1) / bitmapBlocksPerGroup;
101 		if (numGroups > kDesiredAllocationGroups) {
102 			if (groupShift == 16)
103 				break;
104 
105 			groupShift++;
106 			bitmapBlocksPerGroup *= 2;
107 		} else
108 			break;
109 	}
110 
111 	num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups);
112 	// blocks_per_ag holds the number of bitmap blocks that are in each allocation group
113 	blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(bitmapBlocksPerGroup);
114 	ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift);
115 }
116 
117 
118 //	#pragma mark -
119 
120 
121 Volume::Volume(fs_volume* volume)
122 	:
123 	fVolume(volume),
124 	fBlockAllocator(this),
125 	fRootNode(NULL),
126 	fIndicesNode(NULL),
127 	fDirtyCachedBlocks(0),
128 	fFlags(0),
129 	fCheckingThread(-1),
130 	fCheckVisitor(NULL)
131 {
132 	mutex_init(&fLock, "bfs volume");
133 	mutex_init(&fQueryLock, "bfs queries");
134 }
135 
136 
137 Volume::~Volume()
138 {
139 	mutex_destroy(&fQueryLock);
140 	mutex_destroy(&fLock);
141 }
142 
143 
144 bool
145 Volume::IsValidSuperBlock() const
146 {
147 	return fSuperBlock.IsValid();
148 }
149 
150 
151 /*!	Checks whether the given block number may be the location of an inode block.
152 */
153 bool
154 Volume::IsValidInodeBlock(off_t block) const
155 {
156 	return block > fSuperBlock.LogEnd() && block < NumBlocks();
157 }
158 
159 
160 void
161 Volume::Panic()
162 {
163 	FATAL(("Disk corrupted... switch to read-only mode!\n"));
164 	fFlags |= VOLUME_READ_ONLY;
165 #if KDEBUG
166 	kernel_debugger("BFS panics!");
167 #endif
168 }
169 
170 
171 status_t
172 Volume::Mount(const char* deviceName, uint32 flags)
173 {
174 	// TODO: validate the FS in write mode as well!
175 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \
176 	|| (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY))
177 	// in big endian mode, we only mount read-only for now
178 	flags |= B_MOUNT_READ_ONLY;
179 #endif
180 
181 	DeviceOpener opener(deviceName, (flags & B_MOUNT_READ_ONLY) != 0
182 		? O_RDONLY : O_RDWR);
183 	fDevice = opener.Device();
184 	if (fDevice < B_OK)
185 		RETURN_ERROR(fDevice);
186 
187 	if (opener.IsReadOnly())
188 		fFlags |= VOLUME_READ_ONLY;
189 
190 	// read the superblock
191 	if (Identify(fDevice, &fSuperBlock) != B_OK) {
192 		FATAL(("invalid superblock!\n"));
193 		return B_BAD_VALUE;
194 	}
195 
196 	// initialize short hands to the superblock (to save byte swapping)
197 	fBlockSize = fSuperBlock.BlockSize();
198 	fBlockShift = fSuperBlock.BlockShift();
199 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
200 
201 	// check if the device size is large enough to hold the file system
202 	off_t diskSize;
203 	if (opener.GetSize(&diskSize, &fDeviceBlockSize) != B_OK)
204 		RETURN_ERROR(B_ERROR);
205 	if (diskSize < (NumBlocks() << BlockShift())) {
206 		FATAL(("Disk size (%" B_PRIdOFF " bytes) < file system size (%"
207 			B_PRIdOFF " bytes)!\n", diskSize, NumBlocks() << BlockShift()));
208 		RETURN_ERROR(B_BAD_VALUE);
209 	}
210 
211 	// set the current log pointers, so that journaling will work correctly
212 	fLogStart = fSuperBlock.LogStart();
213 	fLogEnd = fSuperBlock.LogEnd();
214 
215 	if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
216 		return B_ERROR;
217 
218 	fJournal = new(std::nothrow) Journal(this);
219 	if (fJournal == NULL)
220 		return B_NO_MEMORY;
221 
222 	status_t status = fJournal->InitCheck();
223 	if (status < B_OK) {
224 		FATAL(("could not initialize journal: %s!\n", strerror(status)));
225 		return status;
226 	}
227 
228 	// replaying the log is the first thing we will do on this disk
229 	status = fJournal->ReplayLog();
230 	if (status != B_OK) {
231 		FATAL(("Replaying log failed, data may be corrupted, volume "
232 			"read-only.\n"));
233 		fFlags |= VOLUME_READ_ONLY;
234 			// TODO: if this is the boot volume, Bootscript will assume this
235 			// is a CD...
236 			// TODO: it would be nice to have a user visible alert instead
237 			// of letting him just find this in the syslog.
238 	}
239 
240 	status = fBlockAllocator.Initialize();
241 	if (status != B_OK) {
242 		FATAL(("could not initialize block bitmap allocator!\n"));
243 		return status;
244 	}
245 
246 	fRootNode = new(std::nothrow) Inode(this, ToVnode(Root()));
247 	if (fRootNode != NULL && fRootNode->InitCheck() == B_OK) {
248 		status = publish_vnode(fVolume, ToVnode(Root()), (void*)fRootNode,
249 			&gBFSVnodeOps, fRootNode->Mode(), 0);
250 		if (status == B_OK) {
251 			// try to get indices root dir
252 
253 			if (!Indices().IsZero()) {
254 				fIndicesNode = new(std::nothrow) Inode(this,
255 					ToVnode(Indices()));
256 			}
257 
258 			if (fIndicesNode == NULL
259 				|| fIndicesNode->InitCheck() < B_OK
260 				|| !fIndicesNode->IsContainer()) {
261 				INFORM(("bfs: volume doesn't have indices!\n"));
262 
263 				if (fIndicesNode) {
264 					// if this is the case, the index root node is gone bad,
265 					// and BFS switch to read-only mode
266 					fFlags |= VOLUME_READ_ONLY;
267 					delete fIndicesNode;
268 					fIndicesNode = NULL;
269 				}
270 			} else {
271 				// we don't use the vnode layer to access the indices node
272 			}
273 		} else {
274 			FATAL(("could not create root node: publish_vnode() failed!\n"));
275 			delete fRootNode;
276 			return status;
277 		}
278 	} else {
279 		status = B_BAD_VALUE;
280 		FATAL(("could not create root node!\n"));
281 
282 		// We need to wait for the block allocator to finish
283 		fBlockAllocator.Uninitialize();
284 		return status;
285 	}
286 
287 	// all went fine
288 	opener.Keep();
289 	return B_OK;
290 }
291 
292 
293 status_t
294 Volume::Unmount()
295 {
296 	put_vnode(fVolume, ToVnode(Root()));
297 
298 	fBlockAllocator.Uninitialize();
299 
300 	// This will also flush the log & all blocks to disk
301 	delete fJournal;
302 	fJournal = NULL;
303 
304 	delete fIndicesNode;
305 
306 	block_cache_delete(fBlockCache, !IsReadOnly());
307 	close(fDevice);
308 
309 	return B_OK;
310 }
311 
312 
313 status_t
314 Volume::Sync()
315 {
316 	return fJournal->FlushLogAndBlocks();
317 }
318 
319 
320 status_t
321 Volume::ValidateBlockRun(block_run run)
322 {
323 	if (run.AllocationGroup() < 0
324 		|| run.AllocationGroup() > (int32)AllocationGroups()
325 		|| run.Start() > (1UL << AllocationGroupShift())
326 		|| run.length == 0
327 		|| uint32(run.Length() + run.Start())
328 				> (1UL << AllocationGroupShift())) {
329 		Panic();
330 		FATAL(("*** invalid run(%d,%d,%d)\n", (int)run.AllocationGroup(),
331 			run.Start(), run.Length()));
332 		return B_BAD_DATA;
333 	}
334 	return B_OK;
335 }
336 
337 
338 block_run
339 Volume::ToBlockRun(off_t block) const
340 {
341 	block_run run;
342 	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(
343 		block >> AllocationGroupShift());
344 	run.start = HOST_ENDIAN_TO_BFS_INT16(
345 		block & ((1LL << AllocationGroupShift()) - 1));
346 	run.length = HOST_ENDIAN_TO_BFS_INT16(1);
347 	return run;
348 }
349 
350 
351 status_t
352 Volume::CreateIndicesRoot(Transaction& transaction)
353 {
354 	off_t id;
355 	status_t status = Inode::Create(transaction, NULL, NULL,
356 		S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, NULL, &id,
357 		&fIndicesNode, NULL, BFS_DO_NOT_PUBLISH_VNODE);
358 	if (status < B_OK)
359 		RETURN_ERROR(status);
360 
361 	fSuperBlock.indices = ToBlockRun(id);
362 	return WriteSuperBlock();
363 }
364 
365 
366 status_t
367 Volume::CreateVolumeID(Transaction& transaction)
368 {
369 	Attribute attr(fRootNode);
370 	status_t status;
371 	attr_cookie* cookie;
372 	status = attr.Create("be:volume_id", B_UINT64_TYPE, O_RDWR, &cookie);
373 	if (status == B_OK) {
374 		static bool seeded = false;
375 		if (!seeded) {
376 			// seed the random number generator for the be:volume_id attribute.
377 			srand(time(NULL));
378 			seeded = true;
379 		}
380 		uint64_t id;
381 		size_t length = sizeof(id);
382 		id = ((uint64_t)rand() << 32) | rand();
383 		attr.Write(transaction, cookie, 0, (uint8_t *)&id, &length, NULL);
384 	}
385 	return status;
386 }
387 
388 
389 
390 status_t
391 Volume::AllocateForInode(Transaction& transaction, const Inode* parent,
392 	mode_t type, block_run& run)
393 {
394 	return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(),
395 		type, run);
396 }
397 
398 
399 status_t
400 Volume::WriteSuperBlock()
401 {
402 	if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block))
403 			!= sizeof(disk_super_block))
404 		return B_IO_ERROR;
405 
406 	return B_OK;
407 }
408 
409 
410 void
411 Volume::UpdateLiveQueries(Inode* inode, const char* attribute, int32 type,
412 	const uint8* oldKey, size_t oldLength, const uint8* newKey,
413 	size_t newLength)
414 {
415 	MutexLocker _(fQueryLock);
416 
417 	DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
418 	while (iterator.HasNext()) {
419 		Query* query = iterator.Next();
420 		query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey,
421 			newLength);
422 	}
423 }
424 
425 
426 void
427 Volume::UpdateLiveQueriesRenameMove(Inode* inode, ino_t oldDirectoryID,
428 	const char* oldName, ino_t newDirectoryID, const char* newName)
429 {
430 	MutexLocker _(fQueryLock);
431 
432 	size_t oldLength = strlen(oldName);
433 	size_t newLength = strlen(newName);
434 
435 	DoublyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
436 	while (iterator.HasNext()) {
437 		Query* query = iterator.Next();
438 		query->LiveUpdateRenameMove(inode, oldDirectoryID, oldName, oldLength,
439 			newDirectoryID, newName, newLength);
440 	}
441 }
442 
443 
444 /*!	Checks if there is a live query whose results depend on the presence
445 	or value of the specified attribute.
446 	Don't use it if you already have all the data together to evaluate
447 	the queries - it wouldn't safe you anything in this case.
448 */
449 bool
450 Volume::CheckForLiveQuery(const char* attribute)
451 {
452 	// TODO: check for a live query that depends on the specified attribute
453 	return true;
454 }
455 
456 
457 void
458 Volume::AddQuery(Query* query)
459 {
460 	MutexLocker _(fQueryLock);
461 	fQueries.Add(query);
462 }
463 
464 
465 void
466 Volume::RemoveQuery(Query* query)
467 {
468 	MutexLocker _(fQueryLock);
469 	fQueries.Remove(query);
470 }
471 
472 
473 status_t
474 Volume::CreateCheckVisitor()
475 {
476 	if (fCheckVisitor != NULL)
477 		return B_BUSY;
478 
479 	fCheckVisitor = new(std::nothrow) ::CheckVisitor(this);
480 	if (fCheckVisitor == NULL)
481 		return B_NO_MEMORY;
482 
483 	return B_OK;
484 }
485 
486 
487 void
488 Volume::DeleteCheckVisitor()
489 {
490 	delete fCheckVisitor;
491 	fCheckVisitor = NULL;
492 }
493 
494 
495 //	#pragma mark - Disk scanning and initialization
496 
497 
498 /*static*/ status_t
499 Volume::CheckSuperBlock(const uint8* data, uint32* _offset)
500 {
501 	disk_super_block* superBlock = (disk_super_block*)(data + 512);
502 	if (superBlock->IsValid()) {
503 		if (_offset != NULL)
504 			*_offset = 512;
505 		return B_OK;
506 	}
507 
508 #ifndef BFS_LITTLE_ENDIAN_ONLY
509 	// For PPC, the superblock might be located at offset 0
510 	superBlock = (disk_super_block*)data;
511 	if (superBlock->IsValid()) {
512 		if (_offset != NULL)
513 			*_offset = 0;
514 		return B_OK;
515 	}
516 #endif
517 
518 	return B_BAD_VALUE;
519 }
520 
521 
522 /*static*/ status_t
523 Volume::Identify(int fd, disk_super_block* superBlock)
524 {
525 	uint8 buffer[1024];
526 	if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer))
527 		return B_IO_ERROR;
528 
529 	uint32 offset;
530 	if (CheckSuperBlock(buffer, &offset) != B_OK)
531 		return B_BAD_VALUE;
532 
533 	memcpy(superBlock, buffer + offset, sizeof(disk_super_block));
534 	return B_OK;
535 }
536 
537 
538 status_t
539 Volume::Initialize(int fd, const char* name, uint32 blockSize,
540 	uint32 flags)
541 {
542 	// although there is no really good reason for it, we won't
543 	// accept '/' in disk names (mkbfs does this, too - and since
544 	// Tracker names mounted volumes like their name)
545 	if (strchr(name, '/') != NULL)
546 		return B_BAD_VALUE;
547 
548 	if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096
549 		&& blockSize != 8192)
550 		return B_BAD_VALUE;
551 
552 	DeviceOpener opener(fd, O_RDWR);
553 	if (opener.Device() < B_OK)
554 		return B_BAD_VALUE;
555 
556 	if (opener.IsReadOnly())
557 		return B_READ_ONLY_DEVICE;
558 
559 	fDevice = opener.Device();
560 
561 	uint32 deviceBlockSize;
562 	off_t deviceSize;
563 	if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK)
564 		return B_ERROR;
565 
566 	off_t numBlocks = deviceSize / blockSize;
567 
568 	// create valid superblock
569 
570 	fSuperBlock.Initialize(name, numBlocks, blockSize);
571 
572 	// initialize short hands to the superblock (to save byte swapping)
573 	fBlockSize = fSuperBlock.BlockSize();
574 	fBlockShift = fSuperBlock.BlockShift();
575 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
576 
577 	// determine log size depending on the size of the volume
578 	off_t logSize = 2048;
579 	if (numBlocks <= 20480)
580 		logSize = 512;
581 	if (deviceSize > 1LL * 1024 * 1024 * 1024)
582 		logSize = 4096;
583 
584 	// since the allocator has not been initialized yet, we
585 	// cannot use BlockAllocator::BitmapSize() here
586 	off_t bitmapBlocks = (numBlocks + blockSize * 8 - 1) / (blockSize * 8);
587 
588 	fSuperBlock.log_blocks = ToBlockRun(bitmapBlocks + 1);
589 	fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(logSize);
590 	fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(
591 		ToBlock(Log()));
592 
593 	// set the current log pointers, so that journaling will work correctly
594 	fLogStart = fSuperBlock.LogStart();
595 	fLogEnd = fSuperBlock.LogEnd();
596 
597 	if (!IsValidSuperBlock())
598 		RETURN_ERROR(B_ERROR);
599 
600 	if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
601 		return B_ERROR;
602 
603 	fJournal = new(std::nothrow) Journal(this);
604 	if (fJournal == NULL || fJournal->InitCheck() < B_OK)
605 		RETURN_ERROR(B_ERROR);
606 
607 	// ready to write data to disk
608 
609 	Transaction transaction(this, 0);
610 
611 	if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK)
612 		RETURN_ERROR(B_ERROR);
613 
614 	off_t id;
615 	status_t status = Inode::Create(transaction, NULL, NULL,
616 		S_DIRECTORY | 0755, 0, 0, NULL, &id, &fRootNode);
617 	if (status < B_OK)
618 		RETURN_ERROR(status);
619 
620 	fSuperBlock.root_dir = ToBlockRun(id);
621 
622 	if ((flags & VOLUME_NO_INDICES) == 0) {
623 		// The indices root directory will be created automatically
624 		// when the standard indices are created (or any other).
625 		Index index(this);
626 		status = index.Create(transaction, "name", B_STRING_TYPE);
627 		if (status < B_OK)
628 			return status;
629 
630 		status = index.Create(transaction, "BEOS:APP_SIG", B_STRING_TYPE);
631 		if (status < B_OK)
632 			return status;
633 
634 		status = index.Create(transaction, "last_modified", B_INT64_TYPE);
635 		if (status < B_OK)
636 			return status;
637 
638 		status = index.Create(transaction, "size", B_INT64_TYPE);
639 		if (status < B_OK)
640 			return status;
641 	}
642 
643 	status = CreateVolumeID(transaction);
644 	if (status < B_OK)
645 		return status;
646 
647 	status = _EraseUnusedBootBlock();
648 	if (status < B_OK)
649 		return status;
650 
651 	status = WriteSuperBlock();
652 	if (status < B_OK)
653 		return status;
654 
655 	status = transaction.Done();
656 	if (status < B_OK)
657 		return status;
658 
659 	Sync();
660 	opener.RemoveCache(true);
661 	return B_OK;
662 }
663 
664 
665 /*!	Erase the first boot block, as we don't use it and there
666  *	might be leftovers from other file systems. This can cause
667  *	confusion for identifying the partition if not erased.
668  */
669 status_t
670 Volume::_EraseUnusedBootBlock()
671 {
672 	const int32 blockSize = 512;
673 	const char emptySector[blockSize] = { 0 };
674 	// Erase boot block if any
675 	if (write_pos(fDevice, 0, emptySector, blockSize) != blockSize)
676 		return B_IO_ERROR;
677 	// Erase ext2 superblock if any
678 	if (write_pos(fDevice, 1024, emptySector, blockSize) != blockSize)
679 		return B_IO_ERROR;
680 
681 	return B_OK;
682 }
683