xref: /haiku/src/add-ons/kernel/file_systems/bfs/Volume.cpp (revision fef6144999c2fa611f59ee6ffe6dd7999501385c)
1 /* Volume - BFS super block, mounting, etc.
2  *
3  * Copyright 2001-2004, Axel Dörfler, axeld@pinc-software.de.
4  * This file may be used under the terms of the MIT License.
5  */
6 
7 
8 #include "Debug.h"
9 #include "Volume.h"
10 #include "Journal.h"
11 #include "Inode.h"
12 #include "Query.h"
13 
14 #include <util/kernel_cpp.h>
15 #include <KernelExport.h>
16 #include <Drivers.h>
17 #include <fs_volume.h>
18 
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <ctype.h>
23 
24 
25 static const int32 kDesiredAllocationGroups = 56;
26 	// This is the number of allocation groups that will be tried
27 	// to be given for newly initialized disks.
28 	// That's only relevant for smaller disks, though, since any
29 	// of today's disk sizes already reach the maximum length
30 	// of an allocation group (65536 blocks).
31 	// It seems to create appropriate numbers for smaller disks
32 	// with this setting, though (i.e. you can create a 400 MB
33 	// file on a 1 GB disk without the need for double indirect
34 	// blocks).
35 
36 
37 class DeviceOpener {
38 	public:
39 		DeviceOpener(const char *device, int mode);
40 		~DeviceOpener();
41 
42 		int Open(const char *device, int mode);
43 		void *InitCache(off_t numBlocks, uint32 blockSize);
44 		void RemoveCache(bool allowWrites);
45 
46 		void Keep();
47 
48 		int Device() const { return fDevice; }
49 
50 		status_t GetSize(off_t *_size, uint32 *_blockSize = NULL);
51 
52 	private:
53 		int		fDevice;
54 		void	*fBlockCache;
55 };
56 
57 
58 DeviceOpener::DeviceOpener(const char *device, int mode)
59 	:
60 	fBlockCache(NULL)
61 {
62 	Open(device, mode);
63 }
64 
65 
66 DeviceOpener::~DeviceOpener()
67 {
68 	if (fDevice >= B_OK) {
69 		RemoveCache(false);
70 		close(fDevice);
71 	}
72 }
73 
74 
75 int
76 DeviceOpener::Open(const char *device, int mode)
77 {
78 	fDevice = open(device, mode);
79 	return fDevice;
80 }
81 
82 
83 void *
84 DeviceOpener::InitCache(off_t numBlocks, uint32 blockSize)
85 {
86 	return block_cache_create(fDevice, numBlocks, blockSize);
87 }
88 
89 
90 void
91 DeviceOpener::RemoveCache(bool allowWrites)
92 {
93 	if (fBlockCache == NULL)
94 		return;
95 
96 	block_cache_delete(fBlockCache, allowWrites);
97 	fBlockCache = NULL;
98 }
99 
100 
101 void
102 DeviceOpener::Keep()
103 {
104 	fDevice = -1;
105 }
106 
107 
108 /** Returns the size of the device in bytes. It uses B_GET_GEOMETRY
109  *	to compute the size, or fstat() if that failed.
110  */
111 
112 status_t
113 DeviceOpener::GetSize(off_t *_size, uint32 *_blockSize)
114 {
115 	device_geometry geometry;
116 	if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) {
117 		// maybe it's just a file
118 		struct stat stat;
119 		if (fstat(fDevice, &stat) < 0)
120 			return B_ERROR;
121 
122 		if (_size)
123 			*_size = stat.st_size;
124 		if (_blockSize)	// that shouldn't cause us any problems
125 			*_blockSize = 512;
126 
127 		return B_OK;
128 	}
129 
130 	if (_size) {
131 		*_size = 1LL * geometry.head_count * geometry.cylinder_count
132 					* geometry.sectors_per_track * geometry.bytes_per_sector;
133 	}
134 	if (_blockSize)
135 		*_blockSize = geometry.bytes_per_sector;
136 
137 	return B_OK;
138 }
139 
140 
141 //	#pragma mark -
142 
143 
144 bool
145 disk_super_block::IsValid()
146 {
147 	if (Magic1() != (int32)SUPER_BLOCK_MAGIC1
148 		|| Magic2() != (int32)SUPER_BLOCK_MAGIC2
149 		|| Magic3() != (int32)SUPER_BLOCK_MAGIC3
150 		|| (int32)block_size != inode_size
151 		|| ByteOrder() != SUPER_BLOCK_FS_LENDIAN
152 		|| (1UL << BlockShift()) != BlockSize()
153 		|| AllocationGroups() < 1
154 		|| AllocationGroupShift() < 1
155 		|| BlocksPerAllocationGroup() < 1
156 		|| NumBlocks() < 10
157 		|| AllocationGroups() != divide_roundup(NumBlocks(),
158 			1L << AllocationGroupShift()))
159 		return false;
160 
161 	return true;
162 }
163 
164 
165 void
166 disk_super_block::Initialize(const char *diskName, off_t numBlocks, uint32 blockSize)
167 {
168 	memset(this, 0, sizeof(disk_super_block));
169 
170 	magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1);
171 	magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2);
172 	magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3);
173 	fs_byte_order = SUPER_BLOCK_FS_LENDIAN;
174 	flags = SUPER_BLOCK_DISK_CLEAN;
175 
176 	strlcpy(name, diskName, sizeof(name));
177 
178 	block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize);
179 	for (block_shift = 9; (1UL << block_shift) < blockSize; block_shift++);
180 
181 	num_blocks = numBlocks;
182 	used_blocks = 0;
183 
184 	// Get the minimum ag_shift (that's determined by the block size)
185 
186 	blocks_per_ag = 1;
187 	ag_shift = 13;
188 
189 	int32 bitsPerBlock = blockSize << 3;
190 	off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock;
191 
192 	for (int32 i = 8192; i < bitsPerBlock; i *= 2) {
193 		ag_shift++;
194 	}
195 
196 	// Many allocation groups help applying allocation policies, but if
197 	// they are too small, we will need to many block_runs to cover large
198 	// files (see above to get an explanation of the kDesiredAllocationGroups
199 	// constant).
200 
201 	while (true) {
202 		num_ags = (bitmapBlocks + blocks_per_ag - 1) / blocks_per_ag;
203 		if (num_ags > kDesiredAllocationGroups) {
204 			if (ag_shift == 16)
205 				break;
206 
207 			ag_shift++;
208 			blocks_per_ag *= 2;
209 		} else
210 			break;
211 	}
212 }
213 
214 
215 //	#pragma mark -
216 
217 
218 Volume::Volume(mount_id id)
219 	:
220 	fID(id),
221 	fBlockAllocator(this),
222 	fLock("bfs volume"),
223 	fRootNode(NULL),
224 	fIndicesNode(NULL),
225 	fDirtyCachedBlocks(0),
226 	fUniqueID(0),
227 	fFlags(0)
228 {
229 }
230 
231 
232 Volume::~Volume()
233 {
234 }
235 
236 
237 bool
238 Volume::IsValidSuperBlock()
239 {
240 	return fSuperBlock.IsValid();
241 }
242 
243 
244 void
245 Volume::Panic()
246 {
247 	FATAL(("we have to panic... switch to read-only mode!\n"));
248 	fFlags |= VOLUME_READ_ONLY;
249 #ifdef USER
250 	debugger("BFS panics!");
251 #elif defined(DEBUG)
252 	kernel_debugger("BFS panics!");
253 #endif
254 }
255 
256 
257 status_t
258 Volume::Mount(const char *deviceName, uint32 flags)
259 {
260 	if (flags & B_MOUNT_READ_ONLY)
261 		fFlags |= VOLUME_READ_ONLY;
262 
263 	// ToDo: validate the FS in write mode as well!
264 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \
265 	|| (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY))
266 	// in big endian mode, we only mount read-only for now
267 	flags |= B_MOUNT_READ_ONLY;
268 #endif
269 
270 	DeviceOpener opener(deviceName, flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR);
271 
272 	// if we couldn't open the device, try read-only (don't rely on a specific error code)
273 	if (opener.Device() < B_OK && (flags & B_MOUNT_READ_ONLY) == 0) {
274 		opener.Open(deviceName, O_RDONLY);
275 		fFlags |= VOLUME_READ_ONLY;
276 	}
277 
278 	fDevice = opener.Device();
279 	if (fDevice < B_OK)
280 		RETURN_ERROR(fDevice);
281 
282 	// check if it's a regular file, and if so, disable the cache for the
283 	// underlaying file system
284 	struct stat stat;
285 	if (fstat(fDevice, &stat) < 0)
286 		RETURN_ERROR(B_ERROR);
287 
288 #ifndef NO_FILE_UNCACHED_IO
289 	if (stat.st_mode & S_FILE && ioctl(fDevice, IOCTL_FILE_UNCACHED_IO, NULL) < 0) {
290 		// mount read-only if the cache couldn't be disabled
291 #	ifdef DEBUG
292 		FATAL(("couldn't disable cache for image file - system may dead-lock!\n"));
293 #	else
294 		FATAL(("couldn't disable cache for image file!\n"));
295 		Panic();
296 #	endif
297 	}
298 #endif
299 
300 	// read the super block
301 	if (Identify(fDevice, &fSuperBlock) != B_OK) {
302 		FATAL(("invalid super block!\n"));
303 		return B_BAD_VALUE;
304 	}
305 
306 	// initialize short hands to the super block (to save byte swapping)
307 	fBlockSize = fSuperBlock.BlockSize();
308 	fBlockShift = fSuperBlock.BlockShift();
309 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
310 
311 	// check if the device size is large enough to hold the file system
312 	off_t diskSize;
313 	if (opener.GetSize(&diskSize) < B_OK)
314 		RETURN_ERROR(B_ERROR);
315 	if (diskSize < (NumBlocks() << BlockShift()))
316 		RETURN_ERROR(B_BAD_VALUE);
317 
318 	// set the current log pointers, so that journaling will work correctly
319 	fLogStart = fSuperBlock.LogStart();
320 	fLogEnd = fSuperBlock.LogEnd();
321 
322 	if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
323 		return B_ERROR;
324 
325 	fJournal = new Journal(this);
326 	// replaying the log is the first thing we will do on this disk
327 	if (fJournal && fJournal->InitCheck() < B_OK
328 		|| fBlockAllocator.Initialize() < B_OK) {
329 		// ToDo: improve error reporting for a bad journal
330 		FATAL(("could not initialize journal/block bitmap allocator!\n"));
331 		return B_NO_MEMORY;
332 	}
333 
334 	status_t status = B_OK;
335 
336 	fRootNode = new Inode(this, ToVnode(Root()));
337 	if (fRootNode && fRootNode->InitCheck() == B_OK) {
338 		status = publish_vnode(fID, ToVnode(Root()), (void *)fRootNode);
339 		if (status == B_OK) {
340 			// try to get indices root dir
341 
342 			// question: why doesn't get_vnode() work here??
343 			// answer: we have not yet backpropagated the pointer to the
344 			// volume in bfs_mount(), so bfs_read_vnode() can't get it.
345 			// But it's not needed to do that anyway.
346 
347 			if (!Indices().IsZero())
348 				fIndicesNode = new Inode(this, ToVnode(Indices()));
349 
350 			if (fIndicesNode == NULL
351 				|| fIndicesNode->InitCheck() < B_OK
352 				|| !fIndicesNode->IsContainer()) {
353 				INFORM(("bfs: volume doesn't have indices!\n"));
354 
355 				if (fIndicesNode) {
356 					// if this is the case, the index root node is gone bad, and
357 					// BFS switch to read-only mode
358 					fFlags |= VOLUME_READ_ONLY;
359 					delete fIndicesNode;
360 					fIndicesNode = NULL;
361 				}
362 			}
363 
364 			// all went fine
365 			opener.Keep();
366 			return B_OK;
367 		} else
368 			FATAL(("could not create root node: new_vnode() failed!\n"));
369 
370 		delete fRootNode;
371 	} else {
372 		status = B_BAD_VALUE;
373 		FATAL(("could not create root node!\n"));
374 	}
375 
376 	return status;
377 }
378 
379 
380 status_t
381 Volume::Unmount()
382 {
383 	// Unlike in BeOS, we need to put the reference to our root node ourselves
384 	put_vnode(fID, ToVnode(Root()));
385 
386 	// This will also flush the log & all blocks to disk
387 	delete fJournal;
388 	fJournal = NULL;
389 
390 	delete fIndicesNode;
391 
392 	block_cache_delete(fBlockCache, !IsReadOnly());
393 	close(fDevice);
394 
395 	return B_OK;
396 }
397 
398 
399 status_t
400 Volume::Sync()
401 {
402 	return fJournal->FlushLogAndBlocks();
403 }
404 
405 
406 status_t
407 Volume::ValidateBlockRun(block_run run)
408 {
409 	if (run.AllocationGroup() < 0 || run.AllocationGroup() > (int32)AllocationGroups()
410 		|| run.Start() > (1UL << AllocationGroupShift())
411 		|| run.length == 0
412 		|| uint32(run.Length() + run.Start()) > (1UL << AllocationGroupShift())) {
413 		Panic();
414 		FATAL(("*** invalid run(%ld,%d,%d)\n", run.AllocationGroup(), run.Start(), run.Length()));
415 		return B_BAD_DATA;
416 	}
417 	return B_OK;
418 }
419 
420 
421 block_run
422 Volume::ToBlockRun(off_t block) const
423 {
424 	block_run run;
425 	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(block >> AllocationGroupShift());
426 	run.start = HOST_ENDIAN_TO_BFS_INT16(block & ((1LL << AllocationGroupShift()) - 1));
427 	run.length = HOST_ENDIAN_TO_BFS_INT16(1);
428 	return run;
429 }
430 
431 
432 status_t
433 Volume::CreateIndicesRoot(Transaction &transaction)
434 {
435 	off_t id;
436 	status_t status = Inode::Create(transaction, NULL, NULL,
437 		S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, &id, &fIndicesNode);
438 	if (status < B_OK)
439 		RETURN_ERROR(status);
440 
441 	fSuperBlock.indices = ToBlockRun(id);
442 	return WriteSuperBlock();
443 }
444 
445 
446 status_t
447 Volume::AllocateForInode(Transaction &transaction, const Inode *parent, mode_t type, block_run &run)
448 {
449 	return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), type, run);
450 }
451 
452 
453 status_t
454 Volume::WriteSuperBlock()
455 {
456 	if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) != sizeof(disk_super_block))
457 		return B_IO_ERROR;
458 
459 	return B_OK;
460 }
461 
462 
463 void
464 Volume::UpdateLiveQueries(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey,
465 	size_t oldLength, const uint8 *newKey, size_t newLength)
466 {
467 	if (fQueryLock.Lock() < B_OK)
468 		return;
469 
470 	Query *query = NULL;
471 	while ((query = fQueries.Next(query)) != NULL)
472 		query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, newLength);
473 
474 	fQueryLock.Unlock();
475 }
476 
477 
478 /** Checks if there is a live query whose results depend on the presence
479  *	or value of the specified attribute.
480  *	Don't use it if you already have all the data together to evaluate
481  *	the queries - it wouldn't safe you anything in this case.
482  */
483 
484 bool
485 Volume::CheckForLiveQuery(const char *attribute)
486 {
487 	// ToDo: check for a live query that depends on the specified attribute
488 	return true;
489 }
490 
491 
492 void
493 Volume::AddQuery(Query *query)
494 {
495 	if (fQueryLock.Lock() < B_OK)
496 		return;
497 
498 	fQueries.Add(query);
499 
500 	fQueryLock.Unlock();
501 }
502 
503 
504 void
505 Volume::RemoveQuery(Query *query)
506 {
507 	if (fQueryLock.Lock() < B_OK)
508 		return;
509 
510 	fQueries.Remove(query);
511 
512 	fQueryLock.Unlock();
513 }
514 
515 
516 //	#pragma mark -
517 //	Disk scanning and initialization
518 
519 
520 status_t
521 Volume::Identify(int fd, disk_super_block *superBlock)
522 {
523 	char buffer[1024];
524 	if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer))
525 		return B_IO_ERROR;
526 
527 	// Note: that does work only for x86, for PowerPC, the super block
528 	// may be located at offset 0!
529 	memcpy(superBlock, buffer + 512, sizeof(disk_super_block));
530 	if (!superBlock->IsValid()) {
531 #ifndef BFS_LITTLE_ENDIAN_ONLY
532 		memcpy(superBlock, buffer, sizeof(disk_super_block));
533 		if (!superBlock->IsValid())
534 			return B_BAD_VALUE;
535 #else
536 		return B_BAD_VALUE;
537 #endif
538 	}
539 
540 	return B_OK;
541 }
542 
543 
544 #ifdef USER
545 extern "C" void kill_device_vnodes(dev_t id);
546 	// This call is only available in the userland fs_shell
547 
548 status_t
549 Volume::Initialize(const char *device, const char *name, uint32 blockSize, uint32 flags)
550 {
551 	// although there is no really good reason for it, we won't
552 	// accept '/' in disk names (mkbfs does this, too - and since
553 	// Tracker names mounted volumes like their name)
554 	if (strchr(name, '/') != NULL)
555 		return B_BAD_VALUE;
556 
557 	if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 && blockSize != 8192)
558 		return B_BAD_VALUE;
559 
560 	DeviceOpener opener(device, O_RDWR);
561 	if (opener.Device() < B_OK)
562 		return B_BAD_VALUE;
563 
564 	fDevice = opener.Device();
565 
566 	uint32 deviceBlockSize;
567 	off_t deviceSize;
568 	if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK)
569 		return B_ERROR;
570 
571 	off_t numBlocks = deviceSize / blockSize;
572 
573 	// create valid super block
574 
575 	fSuperBlock.Initialize(name, numBlocks, blockSize);
576 
577 	// initialize short hands to the super block (to save byte swapping)
578 	fBlockSize = fSuperBlock.BlockSize();
579 	fBlockShift = fSuperBlock.BlockShift();
580 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
581 
582 	// since the allocator has not been initialized yet, we
583 	// cannot use BlockAllocator::BitmapSize() here
584 	fSuperBlock.log_blocks = ToBlockRun(AllocationGroups()
585 		* fSuperBlock.BlocksPerAllocationGroup() + 1);
586 	fSuperBlock.log_blocks.length = 2048;
587 		// ToDo: set the log size depending on the disk size
588 	fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(ToBlock(Log()));
589 
590 	// set the current log pointers, so that journaling will work correctly
591 	fLogStart = fSuperBlock.LogStart();
592 	fLogEnd = fSuperBlock.LogEnd();
593 
594 	if (!IsValidSuperBlock())
595 		RETURN_ERROR(B_ERROR);
596 
597 	if (opener.InitCache(numBlocks) != B_OK)
598 		return B_ERROR;
599 
600 	fJournal = new Journal(this);
601 	if (fJournal == NULL || fJournal->InitCheck() < B_OK)
602 		RETURN_ERROR(B_ERROR);
603 
604 	// ready to write data to disk
605 
606 	Transaction transaction(this, 0);
607 
608 	if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK)
609 		RETURN_ERROR(B_ERROR);
610 
611 	off_t id;
612 	status_t status = Inode::Create(&transaction, NULL, NULL,
613 		S_DIRECTORY | 0755, 0, 0, &id, &fRootNode);
614 	if (status < B_OK)
615 		RETURN_ERROR(status);
616 
617 	fSuperBlock.root_dir = ToBlockRun(id);
618 
619 	if ((flags & VOLUME_NO_INDICES) == 0) {
620 		// The indices root directory will be created automatically
621 		// when the standard indices are created (or any other).
622 		Index index(this);
623 		status = index.Create(&transaction, "name", B_STRING_TYPE);
624 		if (status < B_OK)
625 			return status;
626 
627 		status = index.Create(&transaction, "last_modified", B_INT64_TYPE);
628 		if (status < B_OK)
629 			return status;
630 
631 		status = index.Create(&transaction, "size", B_INT64_TYPE);
632 		if (status < B_OK)
633 			return status;
634 	}
635 
636 	WriteSuperBlock();
637 	transaction.Done();
638 
639 	put_vnode(ID(), fRootNode->ID());
640 	if (fIndicesNode != NULL)
641 		put_vnode(ID(), fIndicesNode->ID());
642 
643 	kill_device_vnodes(ID());
644 		// This call is only available in the userland fs_shell
645 
646 	Sync();
647 	opener.RemoveCache(ALLOW_WRITES);
648 	return B_OK;
649 }
650 #endif
651