xref: /haiku/src/add-ons/kernel/file_systems/bfs/Volume.cpp (revision 81f5654c124bf46fba0fd251f208e2d88d81e1ce)
1 /* Volume - BFS super block, mounting, etc.
2 **
3 ** Initial version by Axel Dörfler, axeld@pinc-software.de
4 ** This file may be used under the terms of the OpenBeOS License.
5 */
6 
7 
8 #include "Debug.h"
9 #include "Volume.h"
10 #include "Journal.h"
11 #include "Inode.h"
12 #include "Query.h"
13 
14 #include <util/kernel_cpp.h>
15 #include <KernelExport.h>
16 #include <fs_volume.h>
17 
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22 
23 
24 static const int32 kDesiredAllocationGroups = 56;
25 	// This is the number of allocation groups that will be tried
26 	// to be given for newly initialized disks.
27 	// That's only relevant for smaller disks, though, since any
28 	// of today's disk sizes already reach the maximum length
29 	// of an allocation group (65536 blocks).
30 	// It seems to create appropriate numbers for smaller disks
31 	// with this setting, though (i.e. you can create a 400 MB
32 	// file on a 1 GB disk without the need for double indirect
33 	// blocks).
34 
35 
36 class DeviceOpener {
37 	public:
38 		DeviceOpener(const char *device, int mode);
39 		~DeviceOpener();
40 
41 		int Open(const char *device, int mode);
42 		status_t InitCache(off_t numBlocks);
43 		void RemoveCache(int mode);
44 
45 		void Keep();
46 
47 		int Device() const { return fDevice; }
48 
49 		status_t GetSize(off_t *_size, uint32 *_blockSize = NULL);
50 
51 	private:
52 		int		fDevice;
53 		bool	fCached;
54 };
55 
56 
57 DeviceOpener::DeviceOpener(const char *device, int mode)
58 	:
59 	fCached(false)
60 {
61 	Open(device, mode);
62 }
63 
64 
65 DeviceOpener::~DeviceOpener()
66 {
67 	if (fDevice >= B_OK) {
68 		close(fDevice);
69 		if (fCached)
70 			remove_cached_device_blocks(fDevice, NO_WRITES);
71 	}
72 }
73 
74 
75 int
76 DeviceOpener::Open(const char *device, int mode)
77 {
78 	fDevice = open(device, mode);
79 	return fDevice;
80 }
81 
82 
83 status_t
84 DeviceOpener::InitCache(off_t numBlocks)
85 {
86 	if (init_cache_for_device(fDevice, numBlocks) == B_OK) {
87 		fCached = true;
88 		return B_OK;
89 	}
90 
91 	return B_ERROR;
92 }
93 
94 
95 void
96 DeviceOpener::RemoveCache(int mode)
97 {
98 	if (!fCached)
99 		return;
100 
101 	remove_cached_device_blocks(fDevice, mode);
102 	fCached = false;
103 }
104 
105 
106 void
107 DeviceOpener::Keep()
108 {
109 	fDevice = -1;
110 }
111 
112 
113 /** Returns the size of the device in bytes. It uses B_GET_GEOMETRY
114  *	to compute the size, or fstat() if that failed.
115  */
116 
117 status_t
118 DeviceOpener::GetSize(off_t *_size, uint32 *_blockSize)
119 {
120 	device_geometry geometry;
121 	if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) {
122 		// maybe it's just a file
123 		struct stat stat;
124 		if (fstat(fDevice, &stat) < 0)
125 			return B_ERROR;
126 
127 		if (_size)
128 			*_size = stat.st_size;
129 		if (_blockSize)	// that shouldn't cause us any problems
130 			*_blockSize = 512;
131 
132 		return B_OK;
133 	}
134 
135 	if (_size)
136 		*_size = geometry.head_count * geometry.cylinder_count * geometry.sectors_per_track;
137 	if (_blockSize)
138 		*_blockSize = geometry.bytes_per_sector;
139 
140 	return B_OK;
141 }
142 
143 
144 //	#pragma mark -
145 
146 
147 bool
148 disk_super_block::IsValid()
149 {
150 	if (Magic1() != (int32)SUPER_BLOCK_MAGIC1
151 		|| Magic2() != (int32)SUPER_BLOCK_MAGIC2
152 		|| Magic3() != (int32)SUPER_BLOCK_MAGIC3
153 		|| (int32)block_size != inode_size
154 		|| ByteOrder() != SUPER_BLOCK_FS_LENDIAN
155 		|| (1UL << BlockShift()) != BlockSize()
156 		|| AllocationGroups() < 1
157 		|| AllocationGroupShift() < 1
158 		|| BlocksPerAllocationGroup() < 1
159 		|| NumBlocks() < 10
160 		|| AllocationGroups() != divide_roundup(NumBlocks(),
161 			1L << AllocationGroupShift()))
162 		return false;
163 
164 	return true;
165 }
166 
167 
168 void
169 disk_super_block::Initialize(const char *diskName, off_t numBlocks, uint32 blockSize)
170 {
171 	memset(this, 0, sizeof(disk_super_block));
172 
173 	magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1);
174 	magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2);
175 	magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3);
176 	fs_byte_order = SUPER_BLOCK_FS_LENDIAN;
177 	flags = SUPER_BLOCK_DISK_CLEAN;
178 
179 	strlcpy(name, diskName, sizeof(name));
180 
181 	block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize);
182 	for (block_shift = 9; (1UL << block_shift) < blockSize; block_shift++);
183 
184 	num_blocks = numBlocks;
185 	used_blocks = 0;
186 
187 	// Get the minimum ag_shift (that's determined by the block size)
188 
189 	blocks_per_ag = 1;
190 	ag_shift = 13;
191 
192 	int32 bitsPerBlock = blockSize << 3;
193 	off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock;
194 
195 	for (int32 i = 8192; i < bitsPerBlock; i *= 2) {
196 		ag_shift++;
197 	}
198 
199 	// Many allocation groups help applying allocation policies, but if
200 	// they are too small, we will need to many block_runs to cover large
201 	// files (see above to get an explanation of the kDesiredAllocationGroups
202 	// constant).
203 
204 	while (true) {
205 		num_ags = (bitmapBlocks + blocks_per_ag - 1) / blocks_per_ag;
206 		if (num_ags > kDesiredAllocationGroups) {
207 			if (ag_shift == 16)
208 				break;
209 
210 			ag_shift++;
211 			blocks_per_ag *= 2;
212 		} else
213 			break;
214 	}
215 }
216 
217 
218 //	#pragma mark -
219 
220 
221 Volume::Volume(nspace_id id)
222 	:
223 	fID(id),
224 	fBlockAllocator(this),
225 	fLock("bfs volume"),
226 	fRootNode(NULL),
227 	fIndicesNode(NULL),
228 	fDirtyCachedBlocks(0),
229 	fUniqueID(0),
230 	fFlags(0)
231 {
232 }
233 
234 
235 Volume::~Volume()
236 {
237 }
238 
239 
240 bool
241 Volume::IsValidSuperBlock()
242 {
243 	return fSuperBlock.IsValid();
244 }
245 
246 
247 void
248 Volume::Panic()
249 {
250 	FATAL(("we have to panic... switch to read-only mode!\n"));
251 	fFlags |= VOLUME_READ_ONLY;
252 #ifdef USER
253 	debugger("BFS panics!");
254 #elif defined(DEBUG)
255 	kernel_debugger("BFS panics!");
256 #endif
257 }
258 
259 
260 status_t
261 Volume::Mount(const char *deviceName, uint32 flags)
262 {
263 	if (flags & B_MOUNT_READ_ONLY)
264 		fFlags |= VOLUME_READ_ONLY;
265 
266 	// ToDo: validate the FS in write mode as well!
267 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \
268 	|| (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY))
269 	// in big endian mode, we only mount read-only for now
270 	flags |= B_MOUNT_READ_ONLY;
271 #endif
272 
273 	DeviceOpener opener(deviceName, flags & B_MOUNT_READ_ONLY ? O_RDONLY : O_RDWR);
274 
275 	// if we couldn't open the device, try read-only (don't rely on a specific error code)
276 	if (opener.Device() < B_OK && (flags & B_MOUNT_READ_ONLY) == 0) {
277 		opener.Open(deviceName, O_RDONLY);
278 		fFlags |= VOLUME_READ_ONLY;
279 	}
280 
281 	fDevice = opener.Device();
282 	if (fDevice < B_OK)
283 		RETURN_ERROR(fDevice);
284 
285 	// check if it's a regular file, and if so, disable the cache for the
286 	// underlaying file system
287 	struct stat stat;
288 	if (fstat(fDevice, &stat) < 0)
289 		RETURN_ERROR(B_ERROR);
290 
291 #ifndef NO_FILE_UNCACHED_IO
292 	if (stat.st_mode & S_FILE && ioctl(fDevice, IOCTL_FILE_UNCACHED_IO, NULL) < 0) {
293 		// mount read-only if the cache couldn't be disabled
294 #	ifdef DEBUG
295 		FATAL(("couldn't disable cache for image file - system may dead-lock!\n"));
296 #	else
297 		FATAL(("couldn't disable cache for image file!\n"));
298 		Panic();
299 #	endif
300 	}
301 #endif
302 
303 	// read the super block
304 	char buffer[1024];
305 	if (read_pos(fDevice, 0, buffer, sizeof(buffer)) != sizeof(buffer))
306 		return B_IO_ERROR;
307 
308 	status_t status = B_OK;
309 
310 	// Note: that does work only for x86, for PowerPC, the super block
311 	// is located at offset 0!
312 	memcpy(&fSuperBlock, buffer + 512, sizeof(disk_super_block));
313 	if (!IsValidSuperBlock()) {
314 #ifndef BFS_LITTLE_ENDIAN_ONLY
315 		memcpy(&fSuperBlock, buffer, sizeof(disk_super_block));
316 		if (!IsValidSuperBlock())
317 			return B_BAD_VALUE;
318 #else
319 		return B_BAD_VALUE;
320 #endif
321 	}
322 
323 	if (!IsValidSuperBlock()) {
324 		FATAL(("invalid super block!\n"));
325 		return B_BAD_VALUE;
326 	}
327 
328 	// check if the device size is large enough to hold the file system
329 	off_t diskSize;
330 	if (opener.GetSize(&diskSize) < B_OK)
331 		RETURN_ERROR(B_ERROR);
332 	if (diskSize < (NumBlocks() << BlockShift()))
333 		RETURN_ERROR(B_BAD_VALUE);
334 
335 	// set the current log pointers, so that journaling will work correctly
336 	fLogStart = fSuperBlock.LogStart();
337 	fLogEnd = fSuperBlock.LogEnd();
338 
339 	// initialize short hands to the super block (to save byte swapping)
340 	fBlockSize = fSuperBlock.BlockSize();
341 	fBlockShift = fSuperBlock.BlockShift();
342 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
343 
344 	if (opener.InitCache(NumBlocks()) != B_OK)
345 		return B_ERROR;
346 
347 	fJournal = new Journal(this);
348 	// replaying the log is the first thing we will do on this disk
349 	if (fJournal && fJournal->InitCheck() < B_OK
350 		|| fBlockAllocator.Initialize() < B_OK) {
351 		// ToDo: improve error reporting for a bad journal
352 		FATAL(("could not initialize journal/block bitmap allocator!\n"));
353 		return B_NO_MEMORY;
354 	}
355 
356 	fRootNode = new Inode(this, ToVnode(Root()));
357 	if (fRootNode && fRootNode->InitCheck() == B_OK) {
358 		if (new_vnode(fID, ToVnode(Root()), (void *)fRootNode) == B_OK) {
359 			// try to get indices root dir
360 
361 			// question: why doesn't get_vnode() work here??
362 			// answer: we have not yet backpropagated the pointer to the
363 			// volume in bfs_mount(), so bfs_read_vnode() can't get it.
364 			// But it's not needed to do that anyway.
365 
366 			if (!Indices().IsZero())
367 				fIndicesNode = new Inode(this, ToVnode(Indices()));
368 
369 			if (fIndicesNode == NULL
370 				|| fIndicesNode->InitCheck() < B_OK
371 				|| !fIndicesNode->IsContainer()) {
372 				INFORM(("bfs: volume doesn't have indices!\n"));
373 
374 				if (fIndicesNode) {
375 					// if this is the case, the index root node is gone bad, and
376 					// BFS switch to read-only mode
377 					fFlags |= VOLUME_READ_ONLY;
378 					delete fIndicesNode;
379 					fIndicesNode = NULL;
380 				}
381 			}
382 
383 			// all went fine
384 			opener.Keep();
385 			return B_OK;
386 		} else
387 			status = B_NO_MEMORY;
388 	} else
389 		status = B_BAD_VALUE;
390 
391 	FATAL(("could not create root node: new_vnode() failed!\n"));
392 
393 	return status;
394 }
395 
396 
397 status_t
398 Volume::Unmount()
399 {
400 	// This will also flush the log & all blocks to disk
401 	delete fJournal;
402 	fJournal = NULL;
403 
404 	delete fIndicesNode;
405 
406 	remove_cached_device_blocks(fDevice, IsReadOnly() ? NO_WRITES : ALLOW_WRITES);
407 	close(fDevice);
408 
409 	return B_OK;
410 }
411 
412 
413 status_t
414 Volume::Sync()
415 {
416 	return fJournal->FlushLogAndBlocks();
417 }
418 
419 
420 status_t
421 Volume::ValidateBlockRun(block_run run)
422 {
423 	if (run.AllocationGroup() < 0 || run.AllocationGroup() > (int32)AllocationGroups()
424 		|| run.Start() > (1UL << AllocationGroupShift())
425 		|| run.length == 0
426 		|| uint32(run.Length() + run.Start()) > (1UL << AllocationGroupShift())) {
427 		Panic();
428 		FATAL(("*** invalid run(%ld,%d,%d)\n", run.AllocationGroup(), run.Start(), run.Length()));
429 		return B_BAD_DATA;
430 	}
431 	return B_OK;
432 }
433 
434 
435 block_run
436 Volume::ToBlockRun(off_t block) const
437 {
438 	block_run run;
439 	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(block >> AllocationGroupShift());
440 	run.start = HOST_ENDIAN_TO_BFS_INT16(block & ((1LL << AllocationGroupShift()) - 1));
441 	run.length = HOST_ENDIAN_TO_BFS_INT16(1);
442 	return run;
443 }
444 
445 
446 status_t
447 Volume::CreateIndicesRoot(Transaction *transaction)
448 {
449 	off_t id;
450 	status_t status = Inode::Create(transaction, NULL, NULL,
451 		S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, &id, &fIndicesNode);
452 	if (status < B_OK)
453 		RETURN_ERROR(status);
454 
455 	fSuperBlock.indices = ToBlockRun(id);
456 	return WriteSuperBlock();
457 }
458 
459 
460 status_t
461 Volume::AllocateForInode(Transaction *transaction, const Inode *parent, mode_t type, block_run &run)
462 {
463 	return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(), type, run);
464 }
465 
466 
467 status_t
468 Volume::WriteSuperBlock()
469 {
470 	if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block)) != sizeof(disk_super_block))
471 		return B_IO_ERROR;
472 
473 	return B_OK;
474 }
475 
476 
477 void
478 Volume::UpdateLiveQueries(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey,
479 	size_t oldLength, const uint8 *newKey, size_t newLength)
480 {
481 	if (fQueryLock.Lock() < B_OK)
482 		return;
483 
484 	Query *query = NULL;
485 	while ((query = fQueries.Next(query)) != NULL)
486 		query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey, newLength);
487 
488 	fQueryLock.Unlock();
489 }
490 
491 
492 /** Checks if there is a live query whose results depend on the presence
493  *	or value of the specified attribute.
494  *	Don't use it if you already have all the data together to evaluate
495  *	the queries - it wouldn't safe you anything in this case.
496  */
497 
498 bool
499 Volume::CheckForLiveQuery(const char *attribute)
500 {
501 	// ToDo: check for a live query that depends on the specified attribute
502 	return true;
503 }
504 
505 
506 void
507 Volume::AddQuery(Query *query)
508 {
509 	if (fQueryLock.Lock() < B_OK)
510 		return;
511 
512 	fQueries.Add(query);
513 
514 	fQueryLock.Unlock();
515 }
516 
517 
518 void
519 Volume::RemoveQuery(Query *query)
520 {
521 	if (fQueryLock.Lock() < B_OK)
522 		return;
523 
524 	fQueries.Remove(query);
525 
526 	fQueryLock.Unlock();
527 }
528 
529 
530 //	#pragma mark -
531 //	Disk initialization
532 
533 
534 #ifdef USER
535 extern "C" void kill_device_vnodes(dev_t id);
536 	// This call is only available in the userland fs_shell
537 
538 status_t
539 Volume::Initialize(const char *device, const char *name, uint32 blockSize, uint32 flags)
540 {
541 	// although there is no really good reason for it, we won't
542 	// accept '/' in disk names (mkbfs does this, too - and since
543 	// Tracker names mounted volumes like their name)
544 	if (strchr(name, '/') != NULL)
545 		return B_BAD_VALUE;
546 
547 	if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096 && blockSize != 8192)
548 		return B_BAD_VALUE;
549 
550 	DeviceOpener opener(device, O_RDWR);
551 	if (opener.Device() < B_OK)
552 		return B_BAD_VALUE;
553 
554 	fDevice = opener.Device();
555 
556 	uint32 deviceBlockSize;
557 	off_t deviceSize;
558 	if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK)
559 		return B_ERROR;
560 
561 	off_t numBlocks = deviceSize / blockSize;
562 
563 	// create valid super block
564 
565 	fSuperBlock.Initialize(name, numBlocks, blockSize);
566 
567 	// initialize short hands to the super block (to save byte swapping)
568 	fBlockSize = fSuperBlock.BlockSize();
569 	fBlockShift = fSuperBlock.BlockShift();
570 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
571 
572 	// since the allocator has not been initialized yet, we
573 	// cannot use BlockAllocator::BitmapSize() here
574 	fSuperBlock.log_blocks = ToBlockRun(AllocationGroups()
575 		* fSuperBlock.BlocksPerAllocationGroup() + 1);
576 	fSuperBlock.log_blocks.length = 2048;
577 		// ToDo: set the log size depending on the disk size
578 	fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(ToBlock(Log()));
579 
580 	// set the current log pointers, so that journaling will work correctly
581 	fLogStart = fSuperBlock.LogStart();
582 	fLogEnd = fSuperBlock.LogEnd();
583 
584 	if (!IsValidSuperBlock())
585 		RETURN_ERROR(B_ERROR);
586 
587 	if (opener.InitCache(numBlocks) != B_OK)
588 		return B_ERROR;
589 
590 	fJournal = new Journal(this);
591 	if (fJournal == NULL || fJournal->InitCheck() < B_OK)
592 		RETURN_ERROR(B_ERROR);
593 
594 	// ready to write data to disk
595 
596 	Transaction transaction(this, 0);
597 
598 	if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK)
599 		RETURN_ERROR(B_ERROR);
600 
601 	off_t id;
602 	status_t status = Inode::Create(&transaction, NULL, NULL,
603 		S_DIRECTORY | 0755, 0, 0, &id, &fRootNode);
604 	if (status < B_OK)
605 		RETURN_ERROR(status);
606 
607 	fSuperBlock.root_dir = ToBlockRun(id);
608 
609 	if ((flags & VOLUME_NO_INDICES) == 0) {
610 		// The indices root directory will be created automatically
611 		// when the standard indices are created (or any other).
612 		Index index(this);
613 		status = index.Create(&transaction, "name", B_STRING_TYPE);
614 		if (status < B_OK)
615 			return status;
616 
617 		status = index.Create(&transaction, "last_modified", B_INT64_TYPE);
618 		if (status < B_OK)
619 			return status;
620 
621 		status = index.Create(&transaction, "size", B_INT64_TYPE);
622 		if (status < B_OK)
623 			return status;
624 	}
625 
626 	WriteSuperBlock();
627 	transaction.Done();
628 
629 	put_vnode(ID(), fRootNode->ID());
630 	if (fIndicesNode != NULL)
631 		put_vnode(ID(), fIndicesNode->ID());
632 
633 	kill_device_vnodes(ID());
634 		// This call is only available in the userland fs_shell
635 
636 	Sync();
637 	opener.RemoveCache(ALLOW_WRITES);
638 	return B_OK;
639 }
640 #endif
641