xref: /haiku/src/add-ons/kernel/file_systems/bfs/Volume.cpp (revision db408f1835c789c3a0e7aea7a8a18ad630750ff0)
1 /*
2  * Copyright 2001-2019, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //! superblock, mounting, etc.
8 
9 
10 #include "Attribute.h"
11 #include "CheckVisitor.h"
12 #include "Debug.h"
13 #include "Inode.h"
14 #include "Journal.h"
15 #include "Query.h"
16 #include "Volume.h"
17 
18 
19 static const int32 kDesiredAllocationGroups = 56;
20 	// This is the number of allocation groups that will be tried
21 	// to be given for newly initialized disks.
22 	// That's only relevant for smaller disks, though, since any
23 	// of today's disk sizes already reach the maximum length
24 	// of an allocation group (65536 blocks).
25 	// It seems to create appropriate numbers for smaller disks
26 	// with this setting, though (i.e. you can create a 400 MB
27 	// file on a 1 GB disk without the need for double indirect
28 	// blocks).
29 
30 
31 class DeviceOpener {
32 public:
33 						DeviceOpener(int fd, int mode);
34 						DeviceOpener(const char* device, int mode);
35 						~DeviceOpener();
36 
37 			int			Open(const char* device, int mode);
38 			int			Open(int fd, int mode);
39 			void*		InitCache(off_t numBlocks, uint32 blockSize);
40 			void		RemoveCache(bool allowWrites);
41 
42 			void		Keep();
43 
44 			int			Device() const { return fDevice; }
45 			int			Mode() const { return fMode; }
46 			bool		IsReadOnly() const { return _IsReadOnly(fMode); }
47 
48 			status_t	GetSize(off_t* _size, uint32* _blockSize = NULL);
49 
50 private:
51 	static	bool		_IsReadOnly(int mode)
52 							{ return (mode & O_RWMASK) == O_RDONLY;}
53 	static	bool		_IsReadWrite(int mode)
54 							{ return (mode & O_RWMASK) == O_RDWR;}
55 
56 			int			fDevice;
57 			int			fMode;
58 			void*		fBlockCache;
59 };
60 
61 
62 DeviceOpener::DeviceOpener(const char* device, int mode)
63 	:
64 	fBlockCache(NULL)
65 {
66 	Open(device, mode);
67 }
68 
69 
70 DeviceOpener::DeviceOpener(int fd, int mode)
71 	:
72 	fBlockCache(NULL)
73 {
74 	Open(fd, mode);
75 }
76 
77 
78 DeviceOpener::~DeviceOpener()
79 {
80 	if (fDevice >= 0) {
81 		RemoveCache(false);
82 		close(fDevice);
83 	}
84 }
85 
86 
87 int
88 DeviceOpener::Open(const char* device, int mode)
89 {
90 	fDevice = open(device, mode | O_NOCACHE);
91 	if (fDevice < 0)
92 		fDevice = errno;
93 
94 	if (fDevice < 0 && _IsReadWrite(mode)) {
95 		// try again to open read-only (don't rely on a specific error code)
96 		return Open(device, O_RDONLY | O_NOCACHE);
97 	}
98 
99 	if (fDevice >= 0) {
100 		// opening succeeded
101 		fMode = mode;
102 		if (_IsReadWrite(mode)) {
103 			// check out if the device really allows for read/write access
104 			device_geometry geometry;
105 			if (!ioctl(fDevice, B_GET_GEOMETRY, &geometry)) {
106 				if (geometry.read_only) {
107 					// reopen device read-only
108 					close(fDevice);
109 					return Open(device, O_RDONLY | O_NOCACHE);
110 				}
111 			}
112 		}
113 	}
114 
115 	return fDevice;
116 }
117 
118 
119 int
120 DeviceOpener::Open(int fd, int mode)
121 {
122 	fDevice = dup(fd);
123 	if (fDevice < 0)
124 		return errno;
125 
126 	fMode = mode;
127 
128 	return fDevice;
129 }
130 
131 
132 void*
133 DeviceOpener::InitCache(off_t numBlocks, uint32 blockSize)
134 {
135 	return fBlockCache = block_cache_create(fDevice, numBlocks, blockSize,
136 		IsReadOnly());
137 }
138 
139 
140 void
141 DeviceOpener::RemoveCache(bool allowWrites)
142 {
143 	if (fBlockCache == NULL)
144 		return;
145 
146 	block_cache_delete(fBlockCache, allowWrites);
147 	fBlockCache = NULL;
148 }
149 
150 
151 void
152 DeviceOpener::Keep()
153 {
154 	fDevice = -1;
155 }
156 
157 
158 /*!	Returns the size of the device in bytes. It uses B_GET_GEOMETRY
159 	to compute the size, or fstat() if that failed.
160 */
161 status_t
162 DeviceOpener::GetSize(off_t* _size, uint32* _blockSize)
163 {
164 	device_geometry geometry;
165 	if (ioctl(fDevice, B_GET_GEOMETRY, &geometry) < 0) {
166 		// maybe it's just a file
167 		struct stat stat;
168 		if (fstat(fDevice, &stat) < 0)
169 			return B_ERROR;
170 
171 		if (_size)
172 			*_size = stat.st_size;
173 		if (_blockSize)	// that shouldn't cause us any problems
174 			*_blockSize = 512;
175 
176 		return B_OK;
177 	}
178 
179 	if (_size) {
180 		*_size = 1LL * geometry.head_count * geometry.cylinder_count
181 			* geometry.sectors_per_track * geometry.bytes_per_sector;
182 	}
183 	if (_blockSize)
184 		*_blockSize = geometry.bytes_per_sector;
185 
186 	return B_OK;
187 }
188 
189 
190 //	#pragma mark -
191 
192 
193 bool
194 disk_super_block::IsValid() const
195 {
196 	if (Magic1() != (int32)SUPER_BLOCK_MAGIC1
197 		|| Magic2() != (int32)SUPER_BLOCK_MAGIC2
198 		|| Magic3() != (int32)SUPER_BLOCK_MAGIC3
199 		|| (int32)block_size != inode_size
200 		|| ByteOrder() != SUPER_BLOCK_FS_LENDIAN
201 		|| (1UL << BlockShift()) != BlockSize()
202 		|| AllocationGroups() < 1
203 		|| AllocationGroupShift() < 1
204 		|| BlocksPerAllocationGroup() < 1
205 		|| NumBlocks() < 10
206 		|| AllocationGroups() != divide_roundup(NumBlocks(),
207 			1L << AllocationGroupShift()))
208 		return false;
209 
210 	return true;
211 }
212 
213 
214 void
215 disk_super_block::Initialize(const char* diskName, off_t numBlocks,
216 	uint32 blockSize)
217 {
218 	memset(this, 0, sizeof(disk_super_block));
219 
220 	magic1 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC1);
221 	magic2 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC2);
222 	magic3 = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_MAGIC3);
223 	fs_byte_order = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_FS_LENDIAN);
224 	flags = HOST_ENDIAN_TO_BFS_INT32(SUPER_BLOCK_DISK_CLEAN);
225 
226 	strlcpy(name, diskName, sizeof(name));
227 
228 	int32 blockShift = 9;
229 	while ((1UL << blockShift) < blockSize) {
230 		blockShift++;
231 	}
232 
233 	block_size = inode_size = HOST_ENDIAN_TO_BFS_INT32(blockSize);
234 	block_shift = HOST_ENDIAN_TO_BFS_INT32(blockShift);
235 
236 	num_blocks = HOST_ENDIAN_TO_BFS_INT64(numBlocks);
237 	used_blocks = 0;
238 
239 	// Get the minimum ag_shift (that's determined by the block size)
240 
241 	int32 bitsPerBlock = blockSize << 3;
242 	off_t bitmapBlocks = (numBlocks + bitsPerBlock - 1) / bitsPerBlock;
243 	int32 blocksPerGroup = 1;
244 	int32 groupShift = 13;
245 
246 	for (int32 i = 8192; i < bitsPerBlock; i *= 2) {
247 		groupShift++;
248 	}
249 
250 	// Many allocation groups help applying allocation policies, but if
251 	// they are too small, we will need to many block_runs to cover large
252 	// files (see above to get an explanation of the kDesiredAllocationGroups
253 	// constant).
254 
255 	int32 numGroups;
256 
257 	while (true) {
258 		numGroups = (bitmapBlocks + blocksPerGroup - 1) / blocksPerGroup;
259 		if (numGroups > kDesiredAllocationGroups) {
260 			if (groupShift == 16)
261 				break;
262 
263 			groupShift++;
264 			blocksPerGroup *= 2;
265 		} else
266 			break;
267 	}
268 
269 	num_ags = HOST_ENDIAN_TO_BFS_INT32(numGroups);
270 	blocks_per_ag = HOST_ENDIAN_TO_BFS_INT32(blocksPerGroup);
271 	ag_shift = HOST_ENDIAN_TO_BFS_INT32(groupShift);
272 }
273 
274 
275 //	#pragma mark -
276 
277 
278 Volume::Volume(fs_volume* volume)
279 	:
280 	fVolume(volume),
281 	fBlockAllocator(this),
282 	fRootNode(NULL),
283 	fIndicesNode(NULL),
284 	fDirtyCachedBlocks(0),
285 	fFlags(0),
286 	fCheckingThread(-1),
287 	fCheckVisitor(NULL)
288 {
289 	mutex_init(&fLock, "bfs volume");
290 	mutex_init(&fQueryLock, "bfs queries");
291 }
292 
293 
294 Volume::~Volume()
295 {
296 	mutex_destroy(&fQueryLock);
297 	mutex_destroy(&fLock);
298 }
299 
300 
301 bool
302 Volume::IsValidSuperBlock() const
303 {
304 	return fSuperBlock.IsValid();
305 }
306 
307 
308 /*!	Checks whether the given block number may be the location of an inode block.
309 */
310 bool
311 Volume::IsValidInodeBlock(off_t block) const
312 {
313 	return block > fSuperBlock.LogEnd() && block < NumBlocks();
314 }
315 
316 
317 void
318 Volume::Panic()
319 {
320 	FATAL(("Disk corrupted... switch to read-only mode!\n"));
321 	fFlags |= VOLUME_READ_ONLY;
322 #if KDEBUG
323 	kernel_debugger("BFS panics!");
324 #endif
325 }
326 
327 
328 status_t
329 Volume::Mount(const char* deviceName, uint32 flags)
330 {
331 	// TODO: validate the FS in write mode as well!
332 #if (B_HOST_IS_LENDIAN && defined(BFS_BIG_ENDIAN_ONLY)) \
333 	|| (B_HOST_IS_BENDIAN && defined(BFS_LITTLE_ENDIAN_ONLY))
334 	// in big endian mode, we only mount read-only for now
335 	flags |= B_MOUNT_READ_ONLY;
336 #endif
337 
338 	DeviceOpener opener(deviceName, (flags & B_MOUNT_READ_ONLY) != 0
339 		? O_RDONLY : O_RDWR);
340 	fDevice = opener.Device();
341 	if (fDevice < B_OK)
342 		RETURN_ERROR(fDevice);
343 
344 	if (opener.IsReadOnly())
345 		fFlags |= VOLUME_READ_ONLY;
346 
347 	// read the superblock
348 	if (Identify(fDevice, &fSuperBlock) != B_OK) {
349 		FATAL(("invalid superblock!\n"));
350 		return B_BAD_VALUE;
351 	}
352 
353 	// initialize short hands to the superblock (to save byte swapping)
354 	fBlockSize = fSuperBlock.BlockSize();
355 	fBlockShift = fSuperBlock.BlockShift();
356 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
357 
358 	// check if the device size is large enough to hold the file system
359 	off_t diskSize;
360 	if (opener.GetSize(&diskSize, &fDeviceBlockSize) != B_OK)
361 		RETURN_ERROR(B_ERROR);
362 	if (diskSize < (NumBlocks() << BlockShift())) {
363 		FATAL(("Disk size (%" B_PRIdOFF " bytes) < file system size (%"
364 			B_PRIdOFF " bytes)!\n", diskSize, NumBlocks() << BlockShift()));
365 		RETURN_ERROR(B_BAD_VALUE);
366 	}
367 
368 	// set the current log pointers, so that journaling will work correctly
369 	fLogStart = fSuperBlock.LogStart();
370 	fLogEnd = fSuperBlock.LogEnd();
371 
372 	if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
373 		return B_ERROR;
374 
375 	fJournal = new(std::nothrow) Journal(this);
376 	if (fJournal == NULL)
377 		return B_NO_MEMORY;
378 
379 	status_t status = fJournal->InitCheck();
380 	if (status < B_OK) {
381 		FATAL(("could not initialize journal: %s!\n", strerror(status)));
382 		return status;
383 	}
384 
385 	// replaying the log is the first thing we will do on this disk
386 	status = fJournal->ReplayLog();
387 	if (status != B_OK) {
388 		FATAL(("Replaying log failed, data may be corrupted, volume "
389 			"read-only.\n"));
390 		fFlags |= VOLUME_READ_ONLY;
391 			// TODO: if this is the boot volume, Bootscript will assume this
392 			// is a CD...
393 			// TODO: it would be nice to have a user visible alert instead
394 			// of letting him just find this in the syslog.
395 	}
396 
397 	status = fBlockAllocator.Initialize();
398 	if (status != B_OK) {
399 		FATAL(("could not initialize block bitmap allocator!\n"));
400 		return status;
401 	}
402 
403 	fRootNode = new(std::nothrow) Inode(this, ToVnode(Root()));
404 	if (fRootNode != NULL && fRootNode->InitCheck() == B_OK) {
405 		status = publish_vnode(fVolume, ToVnode(Root()), (void*)fRootNode,
406 			&gBFSVnodeOps, fRootNode->Mode(), 0);
407 		if (status == B_OK) {
408 			// try to get indices root dir
409 
410 			if (!Indices().IsZero()) {
411 				fIndicesNode = new(std::nothrow) Inode(this,
412 					ToVnode(Indices()));
413 			}
414 
415 			if (fIndicesNode == NULL
416 				|| fIndicesNode->InitCheck() < B_OK
417 				|| !fIndicesNode->IsContainer()) {
418 				INFORM(("bfs: volume doesn't have indices!\n"));
419 
420 				if (fIndicesNode) {
421 					// if this is the case, the index root node is gone bad,
422 					// and BFS switch to read-only mode
423 					fFlags |= VOLUME_READ_ONLY;
424 					delete fIndicesNode;
425 					fIndicesNode = NULL;
426 				}
427 			} else {
428 				// we don't use the vnode layer to access the indices node
429 			}
430 		} else {
431 			FATAL(("could not create root node: publish_vnode() failed!\n"));
432 			delete fRootNode;
433 			return status;
434 		}
435 	} else {
436 		status = B_BAD_VALUE;
437 		FATAL(("could not create root node!\n"));
438 
439 		// We need to wait for the block allocator to finish
440 		fBlockAllocator.Uninitialize();
441 		return status;
442 	}
443 
444 	// all went fine
445 	opener.Keep();
446 	return B_OK;
447 }
448 
449 
450 status_t
451 Volume::Unmount()
452 {
453 	put_vnode(fVolume, ToVnode(Root()));
454 
455 	fBlockAllocator.Uninitialize();
456 
457 	// This will also flush the log & all blocks to disk
458 	delete fJournal;
459 	fJournal = NULL;
460 
461 	delete fIndicesNode;
462 
463 	block_cache_delete(fBlockCache, !IsReadOnly());
464 	close(fDevice);
465 
466 	return B_OK;
467 }
468 
469 
470 status_t
471 Volume::Sync()
472 {
473 	return fJournal->FlushLogAndBlocks();
474 }
475 
476 
477 status_t
478 Volume::ValidateBlockRun(block_run run)
479 {
480 	if (run.AllocationGroup() < 0
481 		|| run.AllocationGroup() > (int32)AllocationGroups()
482 		|| run.Start() > (1UL << AllocationGroupShift())
483 		|| run.length == 0
484 		|| uint32(run.Length() + run.Start())
485 				> (1UL << AllocationGroupShift())) {
486 		Panic();
487 		FATAL(("*** invalid run(%d,%d,%d)\n", (int)run.AllocationGroup(),
488 			run.Start(), run.Length()));
489 		return B_BAD_DATA;
490 	}
491 	return B_OK;
492 }
493 
494 
495 block_run
496 Volume::ToBlockRun(off_t block) const
497 {
498 	block_run run;
499 	run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(
500 		block >> AllocationGroupShift());
501 	run.start = HOST_ENDIAN_TO_BFS_INT16(
502 		block & ((1LL << AllocationGroupShift()) - 1));
503 	run.length = HOST_ENDIAN_TO_BFS_INT16(1);
504 	return run;
505 }
506 
507 
508 status_t
509 Volume::CreateIndicesRoot(Transaction& transaction)
510 {
511 	off_t id;
512 	status_t status = Inode::Create(transaction, NULL, NULL,
513 		S_INDEX_DIR | S_STR_INDEX | S_DIRECTORY | 0700, 0, 0, NULL, &id,
514 		&fIndicesNode, NULL, BFS_DO_NOT_PUBLISH_VNODE);
515 	if (status < B_OK)
516 		RETURN_ERROR(status);
517 
518 	fSuperBlock.indices = ToBlockRun(id);
519 	return WriteSuperBlock();
520 }
521 
522 
523 status_t
524 Volume::CreateVolumeID(Transaction& transaction)
525 {
526 	Attribute attr(fRootNode);
527 	status_t status;
528 	attr_cookie* cookie;
529 	status = attr.Create("be:volume_id", B_UINT64_TYPE, O_RDWR, &cookie);
530 	if (status == B_OK) {
531 		static bool seeded = false;
532 		if (!seeded) {
533 			// seed the random number generator for the be:volume_id attribute.
534 			srand(time(NULL));
535 			seeded = true;
536 		}
537 		uint64_t id;
538 		size_t length = sizeof(id);
539 		id = ((uint64_t)rand() << 32) | rand();
540 		attr.Write(transaction, cookie, 0, (uint8_t *)&id, &length, NULL);
541 	}
542 	return status;
543 }
544 
545 
546 
547 status_t
548 Volume::AllocateForInode(Transaction& transaction, const Inode* parent,
549 	mode_t type, block_run& run)
550 {
551 	return fBlockAllocator.AllocateForInode(transaction, &parent->BlockRun(),
552 		type, run);
553 }
554 
555 
556 status_t
557 Volume::WriteSuperBlock()
558 {
559 	if (write_pos(fDevice, 512, &fSuperBlock, sizeof(disk_super_block))
560 			!= sizeof(disk_super_block))
561 		return B_IO_ERROR;
562 
563 	return B_OK;
564 }
565 
566 
567 void
568 Volume::UpdateLiveQueries(Inode* inode, const char* attribute, int32 type,
569 	const uint8* oldKey, size_t oldLength, const uint8* newKey,
570 	size_t newLength)
571 {
572 	MutexLocker _(fQueryLock);
573 
574 	SinglyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
575 	while (iterator.HasNext()) {
576 		Query* query = iterator.Next();
577 		query->LiveUpdate(inode, attribute, type, oldKey, oldLength, newKey,
578 			newLength);
579 	}
580 }
581 
582 
583 void
584 Volume::UpdateLiveQueriesRenameMove(Inode* inode, ino_t oldDirectoryID,
585 	const char* oldName, ino_t newDirectoryID, const char* newName)
586 {
587 	MutexLocker _(fQueryLock);
588 
589 	size_t oldLength = strlen(oldName);
590 	size_t newLength = strlen(newName);
591 
592 	SinglyLinkedList<Query>::Iterator iterator = fQueries.GetIterator();
593 	while (iterator.HasNext()) {
594 		Query* query = iterator.Next();
595 		query->LiveUpdateRenameMove(inode, oldDirectoryID, oldName, oldLength,
596 			newDirectoryID, newName, newLength);
597 	}
598 }
599 
600 
601 /*!	Checks if there is a live query whose results depend on the presence
602 	or value of the specified attribute.
603 	Don't use it if you already have all the data together to evaluate
604 	the queries - it wouldn't safe you anything in this case.
605 */
606 bool
607 Volume::CheckForLiveQuery(const char* attribute)
608 {
609 	// TODO: check for a live query that depends on the specified attribute
610 	return true;
611 }
612 
613 
614 void
615 Volume::AddQuery(Query* query)
616 {
617 	MutexLocker _(fQueryLock);
618 	fQueries.Add(query);
619 }
620 
621 
622 void
623 Volume::RemoveQuery(Query* query)
624 {
625 	MutexLocker _(fQueryLock);
626 	fQueries.Remove(query);
627 }
628 
629 
630 status_t
631 Volume::CreateCheckVisitor()
632 {
633 	if (fCheckVisitor != NULL)
634 		return B_BUSY;
635 
636 	fCheckVisitor = new(std::nothrow) ::CheckVisitor(this);
637 	if (fCheckVisitor == NULL)
638 		return B_NO_MEMORY;
639 
640 	return B_OK;
641 }
642 
643 
644 void
645 Volume::DeleteCheckVisitor()
646 {
647 	delete fCheckVisitor;
648 	fCheckVisitor = NULL;
649 }
650 
651 
652 //	#pragma mark - Disk scanning and initialization
653 
654 
655 /*static*/ status_t
656 Volume::CheckSuperBlock(const uint8* data, uint32* _offset)
657 {
658 	disk_super_block* superBlock = (disk_super_block*)(data + 512);
659 	if (superBlock->IsValid()) {
660 		if (_offset != NULL)
661 			*_offset = 512;
662 		return B_OK;
663 	}
664 
665 #ifndef BFS_LITTLE_ENDIAN_ONLY
666 	// For PPC, the superblock might be located at offset 0
667 	superBlock = (disk_super_block*)data;
668 	if (superBlock->IsValid()) {
669 		if (_offset != NULL)
670 			*_offset = 0;
671 		return B_OK;
672 	}
673 #endif
674 
675 	return B_BAD_VALUE;
676 }
677 
678 
679 /*static*/ status_t
680 Volume::Identify(int fd, disk_super_block* superBlock)
681 {
682 	uint8 buffer[1024];
683 	if (read_pos(fd, 0, buffer, sizeof(buffer)) != sizeof(buffer))
684 		return B_IO_ERROR;
685 
686 	uint32 offset;
687 	if (CheckSuperBlock(buffer, &offset) != B_OK)
688 		return B_BAD_VALUE;
689 
690 	memcpy(superBlock, buffer + offset, sizeof(disk_super_block));
691 	return B_OK;
692 }
693 
694 
695 status_t
696 Volume::Initialize(int fd, const char* name, uint32 blockSize,
697 	uint32 flags)
698 {
699 	// although there is no really good reason for it, we won't
700 	// accept '/' in disk names (mkbfs does this, too - and since
701 	// Tracker names mounted volumes like their name)
702 	if (strchr(name, '/') != NULL)
703 		return B_BAD_VALUE;
704 
705 	if (blockSize != 1024 && blockSize != 2048 && blockSize != 4096
706 		&& blockSize != 8192)
707 		return B_BAD_VALUE;
708 
709 	DeviceOpener opener(fd, O_RDWR);
710 	if (opener.Device() < B_OK)
711 		return B_BAD_VALUE;
712 
713 	if (opener.IsReadOnly())
714 		return B_READ_ONLY_DEVICE;
715 
716 	fDevice = opener.Device();
717 
718 	uint32 deviceBlockSize;
719 	off_t deviceSize;
720 	if (opener.GetSize(&deviceSize, &deviceBlockSize) < B_OK)
721 		return B_ERROR;
722 
723 	off_t numBlocks = deviceSize / blockSize;
724 
725 	// create valid superblock
726 
727 	fSuperBlock.Initialize(name, numBlocks, blockSize);
728 
729 	// initialize short hands to the superblock (to save byte swapping)
730 	fBlockSize = fSuperBlock.BlockSize();
731 	fBlockShift = fSuperBlock.BlockShift();
732 	fAllocationGroupShift = fSuperBlock.AllocationGroupShift();
733 
734 	// determine log size depending on the size of the volume
735 	off_t logSize = 2048;
736 	if (numBlocks <= 20480)
737 		logSize = 512;
738 	if (deviceSize > 1LL * 1024 * 1024 * 1024)
739 		logSize = 4096;
740 
741 	// since the allocator has not been initialized yet, we
742 	// cannot use BlockAllocator::BitmapSize() here
743 	off_t bitmapBlocks = (numBlocks + blockSize * 8 - 1) / (blockSize * 8);
744 
745 	fSuperBlock.log_blocks = ToBlockRun(bitmapBlocks + 1);
746 	fSuperBlock.log_blocks.length = HOST_ENDIAN_TO_BFS_INT16(logSize);
747 	fSuperBlock.log_start = fSuperBlock.log_end = HOST_ENDIAN_TO_BFS_INT64(
748 		ToBlock(Log()));
749 
750 	// set the current log pointers, so that journaling will work correctly
751 	fLogStart = fSuperBlock.LogStart();
752 	fLogEnd = fSuperBlock.LogEnd();
753 
754 	if (!IsValidSuperBlock())
755 		RETURN_ERROR(B_ERROR);
756 
757 	if ((fBlockCache = opener.InitCache(NumBlocks(), fBlockSize)) == NULL)
758 		return B_ERROR;
759 
760 	fJournal = new(std::nothrow) Journal(this);
761 	if (fJournal == NULL || fJournal->InitCheck() < B_OK)
762 		RETURN_ERROR(B_ERROR);
763 
764 	// ready to write data to disk
765 
766 	Transaction transaction(this, 0);
767 
768 	if (fBlockAllocator.InitializeAndClearBitmap(transaction) < B_OK)
769 		RETURN_ERROR(B_ERROR);
770 
771 	off_t id;
772 	status_t status = Inode::Create(transaction, NULL, NULL,
773 		S_DIRECTORY | 0755, 0, 0, NULL, &id, &fRootNode);
774 	if (status < B_OK)
775 		RETURN_ERROR(status);
776 
777 	fSuperBlock.root_dir = ToBlockRun(id);
778 
779 	if ((flags & VOLUME_NO_INDICES) == 0) {
780 		// The indices root directory will be created automatically
781 		// when the standard indices are created (or any other).
782 		Index index(this);
783 		status = index.Create(transaction, "name", B_STRING_TYPE);
784 		if (status < B_OK)
785 			return status;
786 
787 		status = index.Create(transaction, "BEOS:APP_SIG", B_STRING_TYPE);
788 		if (status < B_OK)
789 			return status;
790 
791 		status = index.Create(transaction, "last_modified", B_INT64_TYPE);
792 		if (status < B_OK)
793 			return status;
794 
795 		status = index.Create(transaction, "size", B_INT64_TYPE);
796 		if (status < B_OK)
797 			return status;
798 	}
799 
800 	status = CreateVolumeID(transaction);
801 	if (status < B_OK)
802 		return status;
803 
804 	status = _EraseUnusedBootBlock();
805 	if (status < B_OK)
806 		return status;
807 
808 	status = WriteSuperBlock();
809 	if (status < B_OK)
810 		return status;
811 
812 	status = transaction.Done();
813 	if (status < B_OK)
814 		return status;
815 
816 	Sync();
817 	opener.RemoveCache(true);
818 	return B_OK;
819 }
820 
821 
822 /*!	Erase the first boot block, as we don't use it and there
823  *	might be leftovers from other file systems. This can cause
824  *	confusion for identifying the partition if not erased.
825  */
826 status_t
827 Volume::_EraseUnusedBootBlock()
828 {
829 	const int32 blockSize = 512;
830 	const char emptySector[blockSize] = { 0 };
831 	if (write_pos(fDevice, 0, emptySector, blockSize) != blockSize)
832 		return B_IO_ERROR;
833 
834 	return B_OK;
835 }
836