xref: /haiku/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp (revision d12bb8b14803d030b4a8fba91131e4bb96c4f406)
1 /*
2  * Copyright 2001-2017, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //!	file system interface to Haiku's vnode layer
8 
9 
10 #include "Attribute.h"
11 #include "CheckVisitor.h"
12 #include "Debug.h"
13 #include "Volume.h"
14 #include "Inode.h"
15 #include "Index.h"
16 #include "BPlusTree.h"
17 #include "Query.h"
18 #include "ResizeVisitor.h"
19 #include "bfs_control.h"
20 #include "bfs_disk_system.h"
21 
22 // TODO: temporary solution as long as there is no public I/O requests API
23 #ifndef FS_SHELL
24 #	include <io_requests.h>
25 #	include <util/fs_trim_support.h>
26 #endif
27 
28 
29 #define BFS_IO_SIZE	65536
30 
31 #if defined(BFS_LITTLE_ENDIAN_ONLY)
32 #define BFS_ENDIAN_SUFFIX ""
33 #define BFS_ENDIAN_PRETTY_SUFFIX ""
34 #else
35 #define BFS_ENDIAN_SUFFIX "_big"
36 #define BFS_ENDIAN_PRETTY_SUFFIX " (Big Endian)"
37 #endif
38 
39 
40 struct identify_cookie {
41 	disk_super_block super_block;
42 };
43 
44 extern void fill_stat_buffer(Inode* inode, struct stat& stat);
45 
46 
47 static void
48 fill_stat_time(const bfs_inode& node, struct stat& stat)
49 {
50 	bigtime_t now = real_time_clock_usecs();
51 	stat.st_atim.tv_sec = now / 1000000LL;
52 	stat.st_atim.tv_nsec = (now % 1000000LL) * 1000;
53 
54 	stat.st_mtim.tv_sec = bfs_inode::ToSecs(node.LastModifiedTime());
55 	stat.st_mtim.tv_nsec = bfs_inode::ToNsecs(node.LastModifiedTime());
56 	stat.st_crtim.tv_sec = bfs_inode::ToSecs(node.CreateTime());
57 	stat.st_crtim.tv_nsec = bfs_inode::ToNsecs(node.CreateTime());
58 
59 	// For BeOS compatibility, if on-disk ctime is invalid, fall back to mtime:
60 	bigtime_t changeTime = node.StatusChangeTime();
61 	if (changeTime < node.LastModifiedTime())
62 		stat.st_ctim = stat.st_mtim;
63 	else {
64 		stat.st_ctim.tv_sec = bfs_inode::ToSecs(changeTime);
65 		stat.st_ctim.tv_nsec = bfs_inode::ToNsecs(changeTime);
66 	}
67 }
68 
69 
70 void
71 fill_stat_buffer(Inode* inode, struct stat& stat)
72 {
73 	const bfs_inode& node = inode->Node();
74 
75 	stat.st_dev = inode->GetVolume()->ID();
76 	stat.st_ino = inode->ID();
77 	stat.st_nlink = 1;
78 	stat.st_blksize = BFS_IO_SIZE;
79 
80 	stat.st_uid = node.UserID();
81 	stat.st_gid = node.GroupID();
82 	stat.st_mode = node.Mode();
83 	stat.st_type = node.Type();
84 
85 	fill_stat_time(node, stat);
86 
87 	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
88 		// symlinks report the size of the link here
89 		stat.st_size = strlen(node.short_symlink);
90 	} else
91 		stat.st_size = inode->Size();
92 
93 	stat.st_blocks = inode->AllocatedSize() / 512;
94 }
95 
96 
97 //!	bfs_io() callback hook
98 static status_t
99 iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
100 	size_t size, struct file_io_vec* vecs, size_t* _count)
101 {
102 	Inode* inode = (Inode*)cookie;
103 
104 	return file_map_translate(inode->Map(), offset, size, vecs, _count,
105 		inode->GetVolume()->BlockSize());
106 }
107 
108 
109 //!	bfs_io() callback hook
110 static status_t
111 iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
112 	bool partialTransfer, size_t bytesTransferred)
113 {
114 	Inode* inode = (Inode*)cookie;
115 	rw_lock_read_unlock(&inode->Lock());
116 	return B_OK;
117 }
118 
119 
120 //	#pragma mark - Scanning
121 
122 
123 static float
124 bfs_identify_partition(int fd, partition_data* partition, void** _cookie)
125 {
126 	disk_super_block superBlock;
127 	status_t status = Volume::Identify(fd, &superBlock);
128 	if (status != B_OK)
129 		return -1;
130 
131 	identify_cookie* cookie = new(std::nothrow) identify_cookie;
132 	if (cookie == NULL)
133 		return -1;
134 
135 	memcpy(&cookie->super_block, &superBlock, sizeof(disk_super_block));
136 
137 	*_cookie = cookie;
138 	return 0.85f;
139 }
140 
141 
142 static status_t
143 bfs_scan_partition(int fd, partition_data* partition, void* _cookie)
144 {
145 	identify_cookie* cookie = (identify_cookie*)_cookie;
146 
147 	partition->status = B_PARTITION_VALID;
148 	partition->flags |= B_PARTITION_FILE_SYSTEM;
149 	partition->content_size = cookie->super_block.NumBlocks()
150 		* cookie->super_block.BlockSize();
151 	partition->block_size = cookie->super_block.BlockSize();
152 	partition->content_name = strdup(cookie->super_block.name);
153 	if (partition->content_name == NULL)
154 		return B_NO_MEMORY;
155 
156 	return B_OK;
157 }
158 
159 
160 static void
161 bfs_free_identify_partition_cookie(partition_data* partition, void* _cookie)
162 {
163 	identify_cookie* cookie = (identify_cookie*)_cookie;
164 	delete cookie;
165 }
166 
167 
168 //	#pragma mark -
169 
170 
171 static status_t
172 bfs_mount(fs_volume* _volume, const char* device, uint32 flags,
173 	const char* args, ino_t* _rootID)
174 {
175 	FUNCTION();
176 
177 	Volume* volume = new(std::nothrow) Volume(_volume);
178 	if (volume == NULL)
179 		return B_NO_MEMORY;
180 
181 	status_t status = volume->Mount(device, flags);
182 	if (status != B_OK) {
183 		delete volume;
184 		RETURN_ERROR(status);
185 	}
186 
187 	_volume->private_volume = volume;
188 	_volume->ops = &gBFSVolumeOps;
189 	*_rootID = volume->ToVnode(volume->Root());
190 
191 	INFORM(("mounted \"%s\" (root node at %" B_PRIdINO ", device = %s)\n",
192 		volume->Name(), *_rootID, device));
193 	return B_OK;
194 }
195 
196 
197 static status_t
198 bfs_unmount(fs_volume* _volume)
199 {
200 	FUNCTION();
201 	Volume* volume = (Volume*)_volume->private_volume;
202 
203 	status_t status = volume->Unmount();
204 	delete volume;
205 
206 	RETURN_ERROR(status);
207 }
208 
209 
210 static status_t
211 bfs_read_fs_stat(fs_volume* _volume, struct fs_info* info)
212 {
213 	FUNCTION();
214 
215 	Volume* volume = (Volume*)_volume->private_volume;
216 	MutexLocker locker(volume->Lock());
217 
218 	// File system flags.
219 	info->flags = B_FS_IS_PERSISTENT | B_FS_HAS_ATTR | B_FS_HAS_MIME
220 		| (volume->IndicesNode() != NULL ? B_FS_HAS_QUERY : 0)
221 		| (volume->IsReadOnly() ? B_FS_IS_READONLY : 0)
222 		| B_FS_SUPPORTS_MONITOR_CHILDREN;
223 
224 	info->io_size = BFS_IO_SIZE;
225 		// whatever is appropriate here?
226 
227 	info->block_size = volume->BlockSize();
228 	info->total_blocks = volume->NumBlocks();
229 	info->free_blocks = volume->FreeBlocks();
230 
231 	// Volume name
232 	strlcpy(info->volume_name, volume->Name(), sizeof(info->volume_name));
233 
234 	// File system name
235 	strlcpy(info->fsh_name, "bfs", sizeof(info->fsh_name));
236 
237 	return B_OK;
238 }
239 
240 
241 static status_t
242 bfs_write_fs_stat(fs_volume* _volume, const struct fs_info* info, uint32 mask)
243 {
244 	FUNCTION_START(("mask = %ld\n", mask));
245 
246 	Volume* volume = (Volume*)_volume->private_volume;
247 	if (volume->IsReadOnly())
248 		return B_READ_ONLY_DEVICE;
249 
250 	MutexLocker locker(volume->Lock());
251 
252 	status_t status = B_BAD_VALUE;
253 
254 	if (mask & FS_WRITE_FSINFO_NAME) {
255 		disk_super_block& superBlock = volume->SuperBlock();
256 
257 		strncpy(superBlock.name, info->volume_name,
258 			sizeof(superBlock.name) - 1);
259 		superBlock.name[sizeof(superBlock.name) - 1] = '\0';
260 
261 		status = volume->WriteSuperBlock();
262 	}
263 	return status;
264 }
265 
266 
267 static status_t
268 bfs_sync(fs_volume* _volume)
269 {
270 	FUNCTION();
271 
272 	Volume* volume = (Volume*)_volume->private_volume;
273 	return volume->Sync();
274 }
275 
276 
277 //	#pragma mark -
278 
279 
280 /*!	Reads in the node from disk and creates an inode object from it.
281 */
282 static status_t
283 bfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _node, int* _type,
284 	uint32* _flags, bool reenter)
285 {
286 	//FUNCTION_START(("ino_t = %Ld\n", id));
287 	Volume* volume = (Volume*)_volume->private_volume;
288 
289 	// first inode may be after the log area, we don't go through
290 	// the hassle and try to load an earlier block from disk
291 	if (id < volume->ToBlock(volume->Log()) + volume->Log().Length()
292 		|| id > volume->NumBlocks()) {
293 		INFORM(("inode at %" B_PRIdINO " requested!\n", id));
294 		return B_ERROR;
295 	}
296 
297 	CachedBlock cached(volume, id);
298 	bfs_inode* node = (bfs_inode*)cached.Block();
299 	if (node == NULL) {
300 		FATAL(("could not read inode: %" B_PRIdINO "\n", id));
301 		return B_IO_ERROR;
302 	}
303 
304 	status_t status = node->InitCheck(volume);
305 	if (status != B_OK) {
306 		if ((node->Flags() & INODE_DELETED) != 0) {
307 			INFORM(("inode at %" B_PRIdINO " is already deleted!\n", id));
308 		} else {
309 			FATAL(("inode at %" B_PRIdINO " could not be read: %s!\n", id,
310 				strerror(status)));
311 		}
312 		return status;
313 	}
314 
315 	Inode* inode = new(std::nothrow) Inode(volume, id);
316 	if (inode == NULL)
317 		return B_NO_MEMORY;
318 
319 	status = inode->InitCheck(false);
320 	if (status != B_OK)
321 		delete inode;
322 
323 	if (status == B_OK) {
324 		_node->private_node = inode;
325 		_node->ops = &gBFSVnodeOps;
326 		*_type = inode->Mode();
327 		*_flags = 0;
328 	}
329 
330 	return status;
331 }
332 
333 
334 static status_t
335 bfs_put_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
336 {
337 	Volume* volume = (Volume*)_volume->private_volume;
338 	Inode* inode = (Inode*)_node->private_node;
339 
340 	// since a directory's size can be changed without having it opened,
341 	// we need to take care about their preallocated blocks here
342 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()
343 		&& inode->NeedsTrimming()) {
344 		Transaction transaction(volume, inode->BlockNumber());
345 
346 		if (inode->TrimPreallocation(transaction) == B_OK)
347 			transaction.Done();
348 		else if (transaction.HasParent()) {
349 			// TODO: for now, we don't let sub-transactions fail
350 			transaction.Done();
351 		}
352 	}
353 
354 	delete inode;
355 	return B_OK;
356 }
357 
358 
359 static status_t
360 bfs_remove_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
361 {
362 	FUNCTION();
363 
364 	Volume* volume = (Volume*)_volume->private_volume;
365 	Inode* inode = (Inode*)_node->private_node;
366 
367 	// If the inode isn't in use anymore, we were called before
368 	// bfs_unlink() returns - in this case, we can just use the
369 	// transaction which has already deleted the inode.
370 	Transaction transaction(volume, volume->ToBlock(inode->Parent()));
371 
372 	// The file system check functionality uses this flag to prevent the space
373 	// used up by the inode from being freed - this flag is set only in
374 	// situations where this does not cause any harm as the block bitmap will
375 	// get fixed anyway in this case).
376 	if ((inode->Flags() & INODE_DONT_FREE_SPACE) != 0) {
377 		delete inode;
378 		return B_OK;
379 	}
380 
381 	ASSERT((inode->Flags() & INODE_DELETED) != 0);
382 
383 	status_t status = inode->Free(transaction);
384 	if (status == B_OK) {
385 		status = transaction.Done();
386 	} else if (transaction.HasParent()) {
387 		// TODO: for now, we don't let sub-transactions fail
388 		status = transaction.Done();
389 	}
390 
391 	volume->RemovedInodes().Remove(inode);
392 
393 	// TODO: the VFS currently does not allow this to fail
394 	delete inode;
395 
396 	return status;
397 }
398 
399 
400 static bool
401 bfs_can_page(fs_volume* _volume, fs_vnode* _v, void* _cookie)
402 {
403 	// TODO: we're obviously not even asked...
404 	return false;
405 }
406 
407 
408 static status_t
409 bfs_read_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
410 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
411 {
412 	Volume* volume = (Volume*)_volume->private_volume;
413 	Inode* inode = (Inode*)_node->private_node;
414 
415 	if (inode->FileCache() == NULL)
416 		RETURN_ERROR(B_BAD_VALUE);
417 
418 	InodeReadLocker _(inode);
419 
420 	uint32 vecIndex = 0;
421 	size_t vecOffset = 0;
422 	size_t bytesLeft = *_numBytes;
423 	status_t status;
424 
425 	while (true) {
426 		file_io_vec fileVecs[8];
427 		size_t fileVecCount = 8;
428 
429 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
430 			&fileVecCount, 0);
431 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
432 			break;
433 
434 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
435 
436 		size_t bytes = bytesLeft;
437 		status = read_file_io_vec_pages(volume->Device(), fileVecs,
438 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
439 		if (status != B_OK || !bufferOverflow)
440 			break;
441 
442 		pos += bytes;
443 		bytesLeft -= bytes;
444 	}
445 
446 	return status;
447 }
448 
449 
450 static status_t
451 bfs_write_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
452 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
453 {
454 	Volume* volume = (Volume*)_volume->private_volume;
455 	Inode* inode = (Inode*)_node->private_node;
456 
457 	if (volume->IsReadOnly())
458 		return B_READ_ONLY_DEVICE;
459 
460 	if (inode->FileCache() == NULL)
461 		RETURN_ERROR(B_BAD_VALUE);
462 
463 	InodeReadLocker _(inode);
464 
465 	uint32 vecIndex = 0;
466 	size_t vecOffset = 0;
467 	size_t bytesLeft = *_numBytes;
468 	status_t status;
469 
470 	while (true) {
471 		file_io_vec fileVecs[8];
472 		size_t fileVecCount = 8;
473 
474 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
475 			&fileVecCount, 0);
476 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
477 			break;
478 
479 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
480 
481 		size_t bytes = bytesLeft;
482 		status = write_file_io_vec_pages(volume->Device(), fileVecs,
483 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
484 		if (status != B_OK || !bufferOverflow)
485 			break;
486 
487 		pos += bytes;
488 		bytesLeft -= bytes;
489 	}
490 
491 	return status;
492 }
493 
494 
495 static status_t
496 bfs_io(fs_volume* _volume, fs_vnode* _node, void* _cookie, io_request* request)
497 {
498 	Volume* volume = (Volume*)_volume->private_volume;
499 	Inode* inode = (Inode*)_node->private_node;
500 
501 #ifndef FS_SHELL
502 	if (io_request_is_write(request) && volume->IsReadOnly()) {
503 		notify_io_request(request, B_READ_ONLY_DEVICE);
504 		return B_READ_ONLY_DEVICE;
505 	}
506 #endif
507 
508 	if (inode->FileCache() == NULL) {
509 #ifndef FS_SHELL
510 		notify_io_request(request, B_BAD_VALUE);
511 #endif
512 		RETURN_ERROR(B_BAD_VALUE);
513 	}
514 
515 	// We lock the node here and will unlock it in the "finished" hook.
516 	rw_lock_read_lock(&inode->Lock());
517 
518 	return do_iterative_fd_io(volume->Device(), request,
519 		iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
520 }
521 
522 
523 static status_t
524 bfs_get_file_map(fs_volume* _volume, fs_vnode* _node, off_t offset, size_t size,
525 	struct file_io_vec* vecs, size_t* _count)
526 {
527 	Volume* volume = (Volume*)_volume->private_volume;
528 	Inode* inode = (Inode*)_node->private_node;
529 
530 	int32 blockShift = volume->BlockShift();
531 	uint32 index = 0, max = *_count;
532 	block_run run;
533 	off_t fileOffset;
534 
535 	//FUNCTION_START(("offset = %Ld, size = %lu\n", offset, size));
536 
537 	while (true) {
538 		status_t status = inode->FindBlockRun(offset, run, fileOffset);
539 		if (status != B_OK)
540 			return status;
541 
542 		vecs[index].offset = volume->ToOffset(run) + offset - fileOffset;
543 		vecs[index].length = ((uint32)run.Length() << blockShift)
544 			- offset + fileOffset;
545 
546 		// are we already done?
547 		if ((uint64)size <= (uint64)vecs[index].length
548 			|| (uint64)offset + (uint64)vecs[index].length
549 				>= (uint64)inode->Size()) {
550 			if ((uint64)offset + (uint64)vecs[index].length
551 					> (uint64)inode->Size()) {
552 				// make sure the extent ends with the last official file
553 				// block (without taking any preallocations into account)
554 				vecs[index].length = round_up(inode->Size() - offset,
555 					volume->BlockSize());
556 			}
557 			*_count = index + 1;
558 			return B_OK;
559 		}
560 
561 		offset += vecs[index].length;
562 		size -= vecs[index].length;
563 		index++;
564 
565 		if (index >= max) {
566 			// we're out of file_io_vecs; let's bail out
567 			*_count = index;
568 			return B_BUFFER_OVERFLOW;
569 		}
570 	}
571 
572 	// can never get here
573 	return B_ERROR;
574 }
575 
576 
577 //	#pragma mark -
578 
579 
580 static status_t
581 bfs_lookup(fs_volume* _volume, fs_vnode* _directory, const char* file,
582 	ino_t* _vnodeID)
583 {
584 	Volume* volume = (Volume*)_volume->private_volume;
585 	Inode* directory = (Inode*)_directory->private_node;
586 
587 	InodeReadLocker locker(directory);
588 
589 	// check access permissions
590 	status_t status = directory->CheckPermissions(X_OK);
591 	if (status != B_OK)
592 		RETURN_ERROR(status);
593 
594 	BPlusTree* tree = directory->Tree();
595 	if (tree == NULL)
596 		RETURN_ERROR(B_BAD_VALUE);
597 
598 	status = tree->Find((uint8*)file, (uint16)strlen(file), _vnodeID);
599 	if (status != B_OK) {
600 		//PRINT(("bfs_walk() could not find %Ld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
601 		if (status == B_ENTRY_NOT_FOUND)
602 			entry_cache_add_missing(volume->ID(), directory->ID(), file);
603 
604 		return status;
605 	}
606 
607 	entry_cache_add(volume->ID(), directory->ID(), file, *_vnodeID);
608 
609 	locker.Unlock();
610 
611 	Inode* inode;
612 	status = get_vnode(volume->FSVolume(), *_vnodeID, (void**)&inode);
613 	if (status != B_OK) {
614 		REPORT_ERROR(status);
615 		return B_ENTRY_NOT_FOUND;
616 	}
617 
618 	return B_OK;
619 }
620 
621 
622 static status_t
623 bfs_get_vnode_name(fs_volume* _volume, fs_vnode* _node, char* buffer,
624 	size_t bufferSize)
625 {
626 	Inode* inode = (Inode*)_node->private_node;
627 
628 	return inode->GetName(buffer, bufferSize);
629 }
630 
631 
632 static status_t
633 bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
634 	void* buffer, size_t bufferLength)
635 {
636 	FUNCTION_START(("node = %p, cmd = %lu, buf = %p, len = %ld\n", _node, cmd,
637 		buffer, bufferLength));
638 
639 	Volume* volume = (Volume*)_volume->private_volume;
640 
641 	switch (cmd) {
642 #ifndef FS_SHELL
643 		case B_TRIM_DEVICE:
644 		{
645 			fs_trim_data* trimData;
646 			MemoryDeleter deleter;
647 			status_t status = get_trim_data_from_user(buffer, bufferLength,
648 				deleter, trimData);
649 			if (status != B_OK)
650 				return status;
651 
652 			trimData->trimmed_size = 0;
653 
654 			for (uint32 i = 0; i < trimData->range_count; i++) {
655 				uint64 trimmedSize = 0;
656 				status_t status = volume->Allocator().Trim(
657 					trimData->ranges[i].offset, trimData->ranges[i].size,
658 					trimmedSize);
659 				if (status != B_OK)
660 					return status;
661 
662 				trimData->trimmed_size += trimmedSize;
663 			}
664 
665 			return copy_trim_data_to_user(buffer, trimData);
666 		}
667 #endif
668 
669 		case BFS_IOCTL_VERSION:
670 		{
671 			uint32 version = 0x10000;
672 			return user_memcpy(buffer, &version, sizeof(uint32));
673 		}
674 		case BFS_IOCTL_START_CHECKING:
675 		{
676 			// start checking
677 			status_t status = volume->CreateCheckVisitor();
678 			if (status != B_OK)
679 				return status;
680 
681 			CheckVisitor* checker = volume->CheckVisitor();
682 
683 			if (user_memcpy(&checker->Control(), buffer,
684 					sizeof(check_control)) != B_OK) {
685 				return B_BAD_ADDRESS;
686 			}
687 
688 			status = checker->StartBitmapPass();
689 			if (status == B_OK) {
690 				file_cookie* cookie = (file_cookie*)_cookie;
691 				cookie->open_mode |= BFS_OPEN_MODE_CHECKING;
692 			}
693 
694 			return status;
695 		}
696 		case BFS_IOCTL_STOP_CHECKING:
697 		{
698 			// stop checking
699 			CheckVisitor* checker = volume->CheckVisitor();
700 			if (checker == NULL)
701 				return B_NO_INIT;
702 
703 			status_t status = checker->StopChecking();
704 
705 			if (status == B_OK) {
706 				file_cookie* cookie = (file_cookie*)_cookie;
707 				cookie->open_mode &= ~BFS_OPEN_MODE_CHECKING;
708 
709 				status = user_memcpy(buffer, &checker->Control(),
710 					sizeof(check_control));
711 			}
712 
713 			volume->DeleteCheckVisitor();
714 			volume->SetCheckingThread(-1);
715 
716 			return status;
717 		}
718 		case BFS_IOCTL_CHECK_NEXT_NODE:
719 		{
720 			// check next
721 			CheckVisitor* checker = volume->CheckVisitor();
722 			if (checker == NULL)
723 				return B_NO_INIT;
724 
725 			volume->SetCheckingThread(find_thread(NULL));
726 
727 			checker->Control().errors = 0;
728 
729 			status_t status = checker->Next();
730 			if (status == B_ENTRY_NOT_FOUND) {
731 				checker->Control().status = B_ENTRY_NOT_FOUND;
732 					// tells StopChecking() that we finished the pass
733 
734 				if (checker->Pass() == BFS_CHECK_PASS_BITMAP) {
735 					if (checker->WriteBackCheckBitmap() == B_OK)
736 						status = checker->StartIndexPass();
737 				}
738 			}
739 
740 			if (status == B_OK) {
741 				status = user_memcpy(buffer, &checker->Control(),
742 					sizeof(check_control));
743 			}
744 
745 			return status;
746 		}
747 		case BFS_IOCTL_UPDATE_BOOT_BLOCK:
748 		{
749 			// let's makebootable (or anyone else) update the boot block
750 			// while BFS is mounted
751 			update_boot_block update;
752 			if (bufferLength != sizeof(update_boot_block))
753 				return B_BAD_VALUE;
754 			if (user_memcpy(&update, buffer, sizeof(update_boot_block)) != B_OK)
755 				return B_BAD_ADDRESS;
756 
757 			uint32 minOffset = offsetof(disk_super_block, pad_to_block);
758 			if (update.offset < minOffset
759 				|| update.offset >= 512 || update.length > 512 - minOffset
760 				|| update.length + update.offset > 512) {
761 				return B_BAD_VALUE;
762 			}
763 			if (user_memcpy((uint8*)&volume->SuperBlock() + update.offset,
764 					update.data, update.length) != B_OK) {
765 				return B_BAD_ADDRESS;
766 			}
767 
768 			return volume->WriteSuperBlock();
769 		}
770 		case BFS_IOCTL_RESIZE:
771 		{
772 			if (bufferLength != sizeof(uint64))
773 				return B_BAD_VALUE;
774 
775 			uint64 size;
776 			if (user_memcpy((uint8*)&size, buffer, sizeof(uint64)) != B_OK)
777 				return B_BAD_ADDRESS;
778 
779 			ResizeVisitor resizer(volume);
780 			return resizer.Resize(size, -1);
781 		}
782 
783 #ifdef DEBUG_FRAGMENTER
784 		case 56741:
785 		{
786 			BlockAllocator& allocator = volume->Allocator();
787 			allocator.Fragment();
788 			return B_OK;
789 		}
790 #endif
791 
792 #ifdef DEBUG
793 		case 56742:
794 		{
795 			// allocate all free blocks and zero them out
796 			// (a test for the BlockAllocator)!
797 			BlockAllocator& allocator = volume->Allocator();
798 			Transaction transaction(volume, 0);
799 			CachedBlock cached(volume);
800 			block_run run;
801 			while (allocator.AllocateBlocks(transaction, 8, 0, 64, 1, run)
802 					== B_OK) {
803 				PRINT(("write block_run(%ld, %d, %d)\n", run.allocation_group,
804 					run.start, run.length));
805 				for (int32 i = 0;i < run.length;i++) {
806 					uint8* block = cached.SetToWritable(transaction, run);
807 					if (block != NULL)
808 						memset(block, 0, volume->BlockSize());
809 				}
810 			}
811 			return B_OK;
812 		}
813 #endif
814 	}
815 	return B_DEV_INVALID_IOCTL;
816 }
817 
818 
819 /*!	Sets the open-mode flags for the open file cookie - only
820 	supports O_APPEND currently, but that should be sufficient
821 	for a file system.
822 */
823 static status_t
824 bfs_set_flags(fs_volume* _volume, fs_vnode* _node, void* _cookie, int flags)
825 {
826 	FUNCTION_START(("node = %p, flags = %d", _node, flags));
827 
828 	file_cookie* cookie = (file_cookie*)_cookie;
829 	cookie->open_mode = (cookie->open_mode & ~O_APPEND) | (flags & O_APPEND);
830 
831 	return B_OK;
832 }
833 
834 
835 static status_t
836 bfs_fsync(fs_volume* _volume, fs_vnode* _node)
837 {
838 	FUNCTION();
839 
840 	Inode* inode = (Inode*)_node->private_node;
841 	return inode->Sync();
842 }
843 
844 
845 static status_t
846 bfs_read_stat(fs_volume* _volume, fs_vnode* _node, struct stat* stat)
847 {
848 	FUNCTION();
849 
850 	Inode* inode = (Inode*)_node->private_node;
851 	fill_stat_buffer(inode, *stat);
852 	return B_OK;
853 }
854 
855 
856 static status_t
857 bfs_write_stat(fs_volume* _volume, fs_vnode* _node, const struct stat* stat,
858 	uint32 mask)
859 {
860 	FUNCTION();
861 
862 	Volume* volume = (Volume*)_volume->private_volume;
863 	Inode* inode = (Inode*)_node->private_node;
864 
865 	if (volume->IsReadOnly())
866 		return B_READ_ONLY_DEVICE;
867 
868 	// TODO: we should definitely check a bit more if the new stats are
869 	//	valid - or even better, the VFS should check this before calling us
870 
871 	bfs_inode& node = inode->Node();
872 	bool updateTime = false;
873 	uid_t uid = geteuid();
874 
875 	bool isOwnerOrRoot = uid == 0 || uid == (uid_t)node.UserID();
876 	bool hasWriteAccess = inode->CheckPermissions(W_OK) == B_OK;
877 
878 	Transaction transaction(volume, inode->BlockNumber());
879 	inode->WriteLockInTransaction(transaction);
880 
881 	if ((mask & B_STAT_SIZE) != 0 && inode->Size() != stat->st_size) {
882 		// Since B_STAT_SIZE is the only thing that can fail directly, we
883 		// do it first, so that the inode state will still be consistent
884 		// with the on-disk version
885 		if (inode->IsDirectory())
886 			return B_IS_A_DIRECTORY;
887 		if (!inode->IsFile())
888 			return B_BAD_VALUE;
889 		if (!hasWriteAccess)
890 			RETURN_ERROR(B_NOT_ALLOWED);
891 
892 		off_t oldSize = inode->Size();
893 
894 		status_t status = inode->SetFileSize(transaction, stat->st_size);
895 		if (status != B_OK)
896 			return status;
897 
898 		// fill the new blocks (if any) with zeros
899 		if ((mask & B_STAT_SIZE_INSECURE) == 0) {
900 			// We must not keep the inode locked during a write operation,
901 			// or else we might deadlock.
902 			rw_lock_write_unlock(&inode->Lock());
903 			inode->FillGapWithZeros(oldSize, inode->Size());
904 			rw_lock_write_lock(&inode->Lock());
905 		}
906 
907 		if (!inode->IsDeleted()) {
908 			Index index(volume);
909 			index.UpdateSize(transaction, inode);
910 
911 			updateTime = true;
912 		}
913 	}
914 
915 	if ((mask & B_STAT_UID) != 0) {
916 		// only root should be allowed
917 		if (uid != 0)
918 			RETURN_ERROR(B_NOT_ALLOWED);
919 		node.uid = HOST_ENDIAN_TO_BFS_INT32(stat->st_uid);
920 		updateTime = true;
921 	}
922 
923 	if ((mask & B_STAT_GID) != 0) {
924 		// only the user or root can do that
925 		if (!isOwnerOrRoot)
926 			RETURN_ERROR(B_NOT_ALLOWED);
927 		node.gid = HOST_ENDIAN_TO_BFS_INT32(stat->st_gid);
928 		updateTime = true;
929 	}
930 
931 	if ((mask & B_STAT_MODE) != 0) {
932 		// only the user or root can do that
933 		if (!isOwnerOrRoot)
934 			RETURN_ERROR(B_NOT_ALLOWED);
935 		PRINT(("original mode = %ld, stat->st_mode = %d\n", node.Mode(),
936 			stat->st_mode));
937 		node.mode = HOST_ENDIAN_TO_BFS_INT32((node.Mode() & ~S_IUMSK)
938 			| (stat->st_mode & S_IUMSK));
939 		updateTime = true;
940 	}
941 
942 	if ((mask & B_STAT_CREATION_TIME) != 0) {
943 		// the user or root can do that or any user with write access
944 		if (!isOwnerOrRoot && !hasWriteAccess)
945 			RETURN_ERROR(B_NOT_ALLOWED);
946 		node.create_time
947 			= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_crtim));
948 	}
949 
950 	if ((mask & B_STAT_MODIFICATION_TIME) != 0) {
951 		// the user or root can do that or any user with write access
952 		if (!isOwnerOrRoot && !hasWriteAccess)
953 			RETURN_ERROR(B_NOT_ALLOWED);
954 		if (!inode->InLastModifiedIndex()) {
955 			// directory modification times are not part of the index
956 			node.last_modified_time
957 				= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_mtim));
958 		} else if (!inode->IsDeleted()) {
959 			// Index::UpdateLastModified() will set the new time in the inode
960 			Index index(volume);
961 			index.UpdateLastModified(transaction, inode,
962 				bfs_inode::ToInode(stat->st_mtim));
963 		}
964 	}
965 
966 	if ((mask & B_STAT_CHANGE_TIME) != 0 || updateTime) {
967 		// the user or root can do that or any user with write access
968 		if (!isOwnerOrRoot && !hasWriteAccess)
969 			RETURN_ERROR(B_NOT_ALLOWED);
970 		bigtime_t newTime;
971 		if ((mask & B_STAT_CHANGE_TIME) == 0)
972 			newTime = bfs_inode::ToInode(real_time_clock_usecs());
973 		else
974 			newTime = bfs_inode::ToInode(stat->st_ctim);
975 
976 		node.status_change_time = HOST_ENDIAN_TO_BFS_INT64(newTime);
977 	}
978 
979 	status_t status = inode->WriteBack(transaction);
980 	if (status == B_OK)
981 		status = transaction.Done();
982 	if (status == B_OK)
983 		notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(), mask);
984 
985 	return status;
986 }
987 
988 
989 status_t
990 bfs_create(fs_volume* _volume, fs_vnode* _directory, const char* name,
991 	int openMode, int mode, void** _cookie, ino_t* _vnodeID)
992 {
993 	FUNCTION_START(("name = \"%s\", perms = %d, openMode = %d\n", name, mode,
994 		openMode));
995 
996 	Volume* volume = (Volume*)_volume->private_volume;
997 	Inode* directory = (Inode*)_directory->private_node;
998 
999 	if (volume->IsReadOnly())
1000 		return B_READ_ONLY_DEVICE;
1001 
1002 	if (!directory->IsDirectory())
1003 		RETURN_ERROR(B_BAD_TYPE);
1004 
1005 	// We are creating the cookie at this point, so that we don't have
1006 	// to remove the inode if we don't have enough free memory later...
1007 	file_cookie* cookie = new(std::nothrow) file_cookie;
1008 	if (cookie == NULL)
1009 		RETURN_ERROR(B_NO_MEMORY);
1010 
1011 	// initialize the cookie
1012 	cookie->open_mode = openMode;
1013 	cookie->last_size = 0;
1014 	cookie->last_notification = system_time();
1015 
1016 	Transaction transaction(volume, directory->BlockNumber());
1017 
1018 	Inode* inode;
1019 	bool created;
1020 	status_t status = Inode::Create(transaction, directory, name,
1021 		S_FILE | (mode & S_IUMSK), openMode, 0, &created, _vnodeID, &inode);
1022 
1023 	// Disable the file cache, if requested?
1024 	if (status == B_OK && (openMode & O_NOCACHE) != 0
1025 		&& inode->FileCache() != NULL) {
1026 		status = file_cache_disable(inode->FileCache());
1027 	}
1028 
1029 	entry_cache_add(volume->ID(), directory->ID(), name, *_vnodeID);
1030 
1031 	if (status == B_OK)
1032 		status = transaction.Done();
1033 
1034 	if (status == B_OK) {
1035 		// register the cookie
1036 		*_cookie = cookie;
1037 
1038 		if (created) {
1039 			notify_entry_created(volume->ID(), directory->ID(), name,
1040 				*_vnodeID);
1041 		}
1042 	} else {
1043 		entry_cache_remove(volume->ID(), directory->ID(), name);
1044 		delete cookie;
1045 	}
1046 
1047 	return status;
1048 }
1049 
1050 
1051 static status_t
1052 bfs_create_symlink(fs_volume* _volume, fs_vnode* _directory, const char* name,
1053 	const char* path, int mode)
1054 {
1055 	FUNCTION_START(("name = \"%s\", path = \"%s\"\n", name, path));
1056 
1057 	Volume* volume = (Volume*)_volume->private_volume;
1058 	Inode* directory = (Inode*)_directory->private_node;
1059 
1060 	if (volume->IsReadOnly())
1061 		return B_READ_ONLY_DEVICE;
1062 
1063 	if (!directory->IsDirectory())
1064 		RETURN_ERROR(B_BAD_TYPE);
1065 
1066 	status_t status = directory->CheckPermissions(W_OK);
1067 	if (status < B_OK)
1068 		RETURN_ERROR(status);
1069 
1070 	Transaction transaction(volume, directory->BlockNumber());
1071 
1072 	Inode* link;
1073 	off_t id;
1074 	status = Inode::Create(transaction, directory, name, S_SYMLINK | 0777,
1075 		0, 0, NULL, &id, &link);
1076 	if (status < B_OK)
1077 		RETURN_ERROR(status);
1078 
1079 	size_t length = strlen(path);
1080 	if (length < SHORT_SYMLINK_NAME_LENGTH) {
1081 		strcpy(link->Node().short_symlink, path);
1082 	} else {
1083 		link->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_LONG_SYMLINK
1084 			| INODE_LOGGED);
1085 
1086 		// links usually don't have a file cache attached - but we now need one
1087 		link->SetFileCache(file_cache_create(volume->ID(), link->ID(), 0));
1088 		link->SetMap(file_map_create(volume->ID(), link->ID(), 0));
1089 
1090 		// The following call will have to write the inode back, so
1091 		// we don't have to do that here...
1092 		status = link->WriteAt(transaction, 0, (const uint8*)path, &length);
1093 	}
1094 
1095 	if (status == B_OK)
1096 		status = link->WriteBack(transaction);
1097 
1098 	// Inode::Create() left the inode locked in memory, and also doesn't
1099 	// publish links
1100 	publish_vnode(volume->FSVolume(), id, link, &gBFSVnodeOps, link->Mode(), 0);
1101 	put_vnode(volume->FSVolume(), id);
1102 
1103 	if (status == B_OK) {
1104 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1105 
1106 		status = transaction.Done();
1107 		if (status == B_OK)
1108 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1109 		else
1110 			entry_cache_remove(volume->ID(), directory->ID(), name);
1111 	}
1112 
1113 	return status;
1114 }
1115 
1116 
1117 status_t
1118 bfs_link(fs_volume* _volume, fs_vnode* dir, const char* name, fs_vnode* node)
1119 {
1120 	FUNCTION_START(("name = \"%s\"\n", name));
1121 
1122 	// This one won't be implemented in a binary compatible BFS
1123 	return B_UNSUPPORTED;
1124 }
1125 
1126 
1127 status_t
1128 bfs_unlink(fs_volume* _volume, fs_vnode* _directory, const char* name)
1129 {
1130 	FUNCTION_START(("name = \"%s\"\n", name));
1131 
1132 	if (!strcmp(name, "..") || !strcmp(name, "."))
1133 		return B_NOT_ALLOWED;
1134 
1135 	Volume* volume = (Volume*)_volume->private_volume;
1136 	Inode* directory = (Inode*)_directory->private_node;
1137 
1138 	status_t status = directory->CheckPermissions(W_OK);
1139 	if (status < B_OK)
1140 		return status;
1141 
1142 	Transaction transaction(volume, directory->BlockNumber());
1143 
1144 	off_t id;
1145 	status = directory->Remove(transaction, name, &id);
1146 	if (status == B_OK) {
1147 		entry_cache_remove(volume->ID(), directory->ID(), name);
1148 
1149 		status = transaction.Done();
1150 		if (status == B_OK)
1151 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1152 		else
1153 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1154 	}
1155 	return status;
1156 }
1157 
1158 
1159 status_t
1160 bfs_rename(fs_volume* _volume, fs_vnode* _oldDir, const char* oldName,
1161 	fs_vnode* _newDir, const char* newName)
1162 {
1163 	FUNCTION_START(("oldDir = %p, oldName = \"%s\", newDir = %p, newName = "
1164 		"\"%s\"\n", _oldDir, oldName, _newDir, newName));
1165 
1166 	Volume* volume = (Volume*)_volume->private_volume;
1167 	Inode* oldDirectory = (Inode*)_oldDir->private_node;
1168 	Inode* newDirectory = (Inode*)_newDir->private_node;
1169 
1170 	// are we already done?
1171 	if (oldDirectory == newDirectory && !strcmp(oldName, newName))
1172 		return B_OK;
1173 
1174 	Transaction transaction(volume, oldDirectory->BlockNumber());
1175 
1176 	oldDirectory->WriteLockInTransaction(transaction);
1177 	if (oldDirectory != newDirectory)
1178 		newDirectory->WriteLockInTransaction(transaction);
1179 
1180 	// are we allowed to do what we've been told?
1181 	status_t status = oldDirectory->CheckPermissions(W_OK);
1182 	if (status == B_OK)
1183 		status = newDirectory->CheckPermissions(W_OK);
1184 	if (status != B_OK)
1185 		return status;
1186 
1187 	// Get the directory's tree, and a pointer to the inode which should be
1188 	// changed
1189 	BPlusTree* tree = oldDirectory->Tree();
1190 	if (tree == NULL)
1191 		RETURN_ERROR(B_BAD_VALUE);
1192 
1193 	off_t id;
1194 	status = tree->Find((const uint8*)oldName, strlen(oldName), &id);
1195 	if (status != B_OK)
1196 		RETURN_ERROR(status);
1197 
1198 	Vnode vnode(volume, id);
1199 	Inode* inode;
1200 	if (vnode.Get(&inode) != B_OK)
1201 		return B_IO_ERROR;
1202 
1203 	// Don't move a directory into one of its children - we soar up
1204 	// from the newDirectory to either the root node or the old
1205 	// directory, whichever comes first.
1206 	// If we meet our inode on that way, we have to bail out.
1207 
1208 	if (oldDirectory != newDirectory) {
1209 		ino_t parent = newDirectory->ID();
1210 		ino_t root = volume->RootNode()->ID();
1211 
1212 		while (true) {
1213 			if (parent == id)
1214 				return B_BAD_VALUE;
1215 			else if (parent == root || parent == oldDirectory->ID())
1216 				break;
1217 
1218 			Vnode vnode(volume, parent);
1219 			Inode* parentNode;
1220 			if (vnode.Get(&parentNode) != B_OK)
1221 				return B_ERROR;
1222 
1223 			parent = volume->ToVnode(parentNode->Parent());
1224 		}
1225 	}
1226 
1227 	// Everything okay? Then lets get to work...
1228 
1229 	// First, try to make sure there is nothing that will stop us in
1230 	// the target directory - since this is the only non-critical
1231 	// failure, we will test this case first
1232 	BPlusTree* newTree = tree;
1233 	if (newDirectory != oldDirectory) {
1234 		newTree = newDirectory->Tree();
1235 		if (newTree == NULL)
1236 			RETURN_ERROR(B_BAD_VALUE);
1237 	}
1238 
1239 	status = newTree->Insert(transaction, (const uint8*)newName,
1240 		strlen(newName), id);
1241 	if (status == B_NAME_IN_USE) {
1242 		// If there is already a file with that name, we have to remove
1243 		// it, as long it's not a directory with files in it
1244 		off_t clobber;
1245 		if (newTree->Find((const uint8*)newName, strlen(newName), &clobber)
1246 				< B_OK)
1247 			return B_NAME_IN_USE;
1248 		if (clobber == id)
1249 			return B_BAD_VALUE;
1250 
1251 		Vnode vnode(volume, clobber);
1252 		Inode* other;
1253 		if (vnode.Get(&other) < B_OK)
1254 			return B_NAME_IN_USE;
1255 
1256 		// only allowed, if either both nodes are directories or neither is
1257 		if (inode->IsDirectory() != other->IsDirectory())
1258 			return other->IsDirectory() ? B_IS_A_DIRECTORY : B_NOT_A_DIRECTORY;
1259 
1260 		status = newDirectory->Remove(transaction, newName, NULL,
1261 			other->IsDirectory());
1262 		if (status < B_OK)
1263 			return status;
1264 
1265 		entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1266 
1267 		notify_entry_removed(volume->ID(), newDirectory->ID(), newName,
1268 			clobber);
1269 
1270 		status = newTree->Insert(transaction, (const uint8*)newName,
1271 			strlen(newName), id);
1272 	}
1273 	if (status != B_OK)
1274 		return status;
1275 
1276 	inode->WriteLockInTransaction(transaction);
1277 
1278 	volume->UpdateLiveQueriesRenameMove(inode, oldDirectory->ID(), oldName,
1279 		newDirectory->ID(), newName);
1280 
1281 	// update the name only when they differ
1282 	if (strcmp(oldName, newName)) {
1283 		status = inode->SetName(transaction, newName);
1284 		if (status == B_OK) {
1285 			Index index(volume);
1286 			index.UpdateName(transaction, oldName, newName, inode);
1287 		}
1288 	}
1289 
1290 	if (status == B_OK) {
1291 		status = tree->Remove(transaction, (const uint8*)oldName,
1292 			strlen(oldName), id);
1293 		if (status == B_OK) {
1294 			inode->Parent() = newDirectory->BlockRun();
1295 
1296 			// if it's a directory, update the parent directory pointer
1297 			// in its tree if necessary
1298 			BPlusTree* movedTree = inode->Tree();
1299 			if (oldDirectory != newDirectory
1300 				&& inode->IsDirectory()
1301 				&& movedTree != NULL) {
1302 				status = movedTree->Replace(transaction, (const uint8*)"..",
1303 					2, newDirectory->ID());
1304 
1305 				if (status == B_OK) {
1306 					// update/add the cache entry for the parent
1307 					entry_cache_add(volume->ID(), id, "..", newDirectory->ID());
1308 				}
1309 			}
1310 
1311 			if (status == B_OK && newDirectory != oldDirectory)
1312 				status = oldDirectory->ContainerContentsChanged(transaction);
1313 			if (status == B_OK)
1314 				status = newDirectory->ContainerContentsChanged(transaction);
1315 
1316 			if (status == B_OK)
1317 				status = inode->WriteBack(transaction);
1318 
1319 			if (status == B_OK) {
1320 				entry_cache_remove(volume->ID(), oldDirectory->ID(), oldName);
1321 				entry_cache_add(volume->ID(), newDirectory->ID(), newName, id);
1322 
1323 				status = transaction.Done();
1324 				if (status == B_OK) {
1325 					notify_entry_moved(volume->ID(), oldDirectory->ID(),
1326 						oldName, newDirectory->ID(), newName, id);
1327 					return B_OK;
1328 				}
1329 
1330 				entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1331 				entry_cache_add(volume->ID(), oldDirectory->ID(), oldName, id);
1332 			}
1333 		}
1334 	}
1335 
1336 	return status;
1337 }
1338 
1339 
1340 static status_t
1341 bfs_open(fs_volume* _volume, fs_vnode* _node, int openMode, void** _cookie)
1342 {
1343 	FUNCTION();
1344 
1345 	Volume* volume = (Volume*)_volume->private_volume;
1346 	Inode* inode = (Inode*)_node->private_node;
1347 
1348 	// Opening a directory read-only is allowed, although you can't read
1349 	// any data from it.
1350 	if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
1351 		return B_IS_A_DIRECTORY;
1352 	if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
1353 		return B_NOT_A_DIRECTORY;
1354 
1355 	status_t status = inode->CheckPermissions(open_mode_to_access(openMode)
1356 		| ((openMode & O_TRUNC) != 0 ? W_OK : 0));
1357 	if (status != B_OK)
1358 		RETURN_ERROR(status);
1359 
1360 	file_cookie* cookie = new(std::nothrow) file_cookie;
1361 	if (cookie == NULL)
1362 		RETURN_ERROR(B_NO_MEMORY);
1363 	ObjectDeleter<file_cookie> cookieDeleter(cookie);
1364 
1365 	// initialize the cookie
1366 	cookie->open_mode = openMode & BFS_OPEN_MODE_USER_MASK;
1367 	cookie->last_size = inode->Size();
1368 	cookie->last_notification = system_time();
1369 
1370 	// Disable the file cache, if requested?
1371 	CObjectDeleter<void> fileCacheEnabler(file_cache_enable);
1372 	if ((openMode & O_NOCACHE) != 0 && inode->FileCache() != NULL) {
1373 		status = file_cache_disable(inode->FileCache());
1374 		if (status != B_OK)
1375 			return status;
1376 		fileCacheEnabler.SetTo(inode->FileCache());
1377 	}
1378 
1379 	// Should we truncate the file?
1380 	if ((openMode & O_TRUNC) != 0) {
1381 		if ((openMode & O_RWMASK) == O_RDONLY)
1382 			return B_NOT_ALLOWED;
1383 
1384 		Transaction transaction(volume, inode->BlockNumber());
1385 		inode->WriteLockInTransaction(transaction);
1386 
1387 		status_t status = inode->SetFileSize(transaction, 0);
1388 		if (status == B_OK)
1389 			status = inode->WriteBack(transaction);
1390 		if (status == B_OK)
1391 			status = transaction.Done();
1392 		if (status != B_OK)
1393 			return status;
1394 	}
1395 
1396 	fileCacheEnabler.Detach();
1397 	cookieDeleter.Detach();
1398 	*_cookie = cookie;
1399 	return B_OK;
1400 }
1401 
1402 
1403 static status_t
1404 bfs_read(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1405 	void* buffer, size_t* _length)
1406 {
1407 	//FUNCTION();
1408 	Inode* inode = (Inode*)_node->private_node;
1409 
1410 	if (!inode->HasUserAccessableStream()) {
1411 		*_length = 0;
1412 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1413 	}
1414 
1415 	return inode->ReadAt(pos, (uint8*)buffer, _length);
1416 }
1417 
1418 
1419 static status_t
1420 bfs_write(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1421 	const void* buffer, size_t* _length)
1422 {
1423 	//FUNCTION();
1424 	Volume* volume = (Volume*)_volume->private_volume;
1425 	Inode* inode = (Inode*)_node->private_node;
1426 
1427 	if (volume->IsReadOnly())
1428 		return B_READ_ONLY_DEVICE;
1429 
1430 	if (!inode->HasUserAccessableStream()) {
1431 		*_length = 0;
1432 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1433 	}
1434 
1435 	file_cookie* cookie = (file_cookie*)_cookie;
1436 
1437 	if (cookie->open_mode & O_APPEND)
1438 		pos = inode->Size();
1439 
1440 	Transaction transaction;
1441 		// We are not starting the transaction here, since
1442 		// it might not be needed at all (the contents of
1443 		// regular files aren't logged)
1444 
1445 	status_t status = inode->WriteAt(transaction, pos, (const uint8*)buffer,
1446 		_length);
1447 	if (status == B_OK)
1448 		status = transaction.Done();
1449 	if (status == B_OK) {
1450 		InodeReadLocker locker(inode);
1451 
1452 		// periodically notify if the file size has changed
1453 		// TODO: should we better test for a change in the last_modified time only?
1454 		if (!inode->IsDeleted() && cookie->last_size != inode->Size()
1455 			&& system_time() > cookie->last_notification
1456 					+ INODE_NOTIFICATION_INTERVAL) {
1457 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1458 				B_STAT_MODIFICATION_TIME | B_STAT_SIZE | B_STAT_INTERIM_UPDATE);
1459 			cookie->last_size = inode->Size();
1460 			cookie->last_notification = system_time();
1461 		}
1462 	}
1463 
1464 	return status;
1465 }
1466 
1467 
1468 static status_t
1469 bfs_close(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1470 {
1471 	FUNCTION();
1472 	return B_OK;
1473 }
1474 
1475 
1476 static status_t
1477 bfs_free_cookie(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1478 {
1479 	FUNCTION();
1480 
1481 	file_cookie* cookie = (file_cookie*)_cookie;
1482 	Volume* volume = (Volume*)_volume->private_volume;
1483 	Inode* inode = (Inode*)_node->private_node;
1484 
1485 	Transaction transaction;
1486 	bool needsTrimming = false;
1487 
1488 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()) {
1489 		InodeReadLocker locker(inode);
1490 		needsTrimming = inode->NeedsTrimming();
1491 
1492 		if ((cookie->open_mode & O_RWMASK) != 0
1493 			&& !inode->IsDeleted()
1494 			&& (needsTrimming
1495 				|| inode->OldLastModified() != inode->LastModified()
1496 				|| (inode->InSizeIndex()
1497 					// TODO: this can prevent the size update notification
1498 					// for nodes not in the index!
1499 					&& inode->OldSize() != inode->Size()))) {
1500 			locker.Unlock();
1501 			transaction.Start(volume, inode->BlockNumber());
1502 		}
1503 	}
1504 
1505 	status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
1506 
1507 	if (status == B_OK) {
1508 		inode->WriteLockInTransaction(transaction);
1509 
1510 		// trim the preallocated blocks and update the size,
1511 		// and last_modified indices if needed
1512 		bool changedSize = false, changedTime = false;
1513 		Index index(volume);
1514 
1515 		if (needsTrimming) {
1516 			status = inode->TrimPreallocation(transaction);
1517 			if (status < B_OK) {
1518 				FATAL(("Could not trim preallocated blocks: inode %" B_PRIdINO
1519 					", transaction %d: %s!\n", inode->ID(),
1520 					(int)transaction.ID(), strerror(status)));
1521 
1522 				// we still want this transaction to succeed
1523 				status = B_OK;
1524 			}
1525 		}
1526 		if (inode->OldSize() != inode->Size()) {
1527 			if (inode->InSizeIndex())
1528 				index.UpdateSize(transaction, inode);
1529 			changedSize = true;
1530 		}
1531 		if (inode->OldLastModified() != inode->LastModified()) {
1532 			if (inode->InLastModifiedIndex()) {
1533 				index.UpdateLastModified(transaction, inode,
1534 					inode->LastModified());
1535 			}
1536 			changedTime = true;
1537 
1538 			// updating the index doesn't write back the inode
1539 			inode->WriteBack(transaction);
1540 		}
1541 
1542 		if (changedSize || changedTime) {
1543 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1544 				(changedTime ? B_STAT_MODIFICATION_TIME : 0)
1545 				| (changedSize ? B_STAT_SIZE : 0));
1546 		}
1547 	}
1548 	if (status == B_OK)
1549 		transaction.Done();
1550 
1551 	if ((cookie->open_mode & BFS_OPEN_MODE_CHECKING) != 0) {
1552 		// "chkbfs" exited abnormally, so we have to stop it here...
1553 		FATAL(("check process was aborted!\n"));
1554 		volume->CheckVisitor()->StopChecking();
1555 		volume->DeleteCheckVisitor();
1556 	}
1557 
1558 	if ((cookie->open_mode & O_NOCACHE) != 0 && inode->FileCache() != NULL)
1559 		file_cache_enable(inode->FileCache());
1560 
1561 	delete cookie;
1562 	return B_OK;
1563 }
1564 
1565 
1566 /*!	Checks access permissions, return B_NOT_ALLOWED if the action
1567 	is not allowed.
1568 */
1569 static status_t
1570 bfs_access(fs_volume* _volume, fs_vnode* _node, int accessMode)
1571 {
1572 	//FUNCTION();
1573 
1574 	Inode* inode = (Inode*)_node->private_node;
1575 	status_t status = inode->CheckPermissions(accessMode);
1576 	if (status < B_OK)
1577 		RETURN_ERROR(status);
1578 
1579 	return B_OK;
1580 }
1581 
1582 
1583 static status_t
1584 bfs_read_link(fs_volume* _volume, fs_vnode* _node, char* buffer,
1585 	size_t* _bufferSize)
1586 {
1587 	FUNCTION();
1588 
1589 	Inode* inode = (Inode*)_node->private_node;
1590 
1591 	if (!inode->IsSymLink())
1592 		RETURN_ERROR(B_BAD_VALUE);
1593 
1594 	if ((inode->Flags() & INODE_LONG_SYMLINK) != 0) {
1595 		if ((uint64)inode->Size() < (uint64)*_bufferSize)
1596 			*_bufferSize = inode->Size();
1597 
1598 		status_t status = inode->ReadAt(0, (uint8*)buffer, _bufferSize);
1599 		if (status < B_OK)
1600 			RETURN_ERROR(status);
1601 
1602 		return B_OK;
1603 	}
1604 
1605 	size_t linkLen = strlen(inode->Node().short_symlink);
1606 	if (linkLen < *_bufferSize)
1607 		*_bufferSize = linkLen;
1608 
1609 	return user_memcpy(buffer, inode->Node().short_symlink, *_bufferSize);
1610 }
1611 
1612 
1613 //	#pragma mark - Directory functions
1614 
1615 
1616 static status_t
1617 bfs_create_dir(fs_volume* _volume, fs_vnode* _directory, const char* name,
1618 	int mode)
1619 {
1620 	FUNCTION_START(("name = \"%s\", perms = %d\n", name, mode));
1621 
1622 	Volume* volume = (Volume*)_volume->private_volume;
1623 	Inode* directory = (Inode*)_directory->private_node;
1624 
1625 	if (volume->IsReadOnly())
1626 		return B_READ_ONLY_DEVICE;
1627 
1628 	if (!directory->IsDirectory())
1629 		RETURN_ERROR(B_BAD_TYPE);
1630 
1631 	status_t status = directory->CheckPermissions(W_OK);
1632 	if (status < B_OK)
1633 		RETURN_ERROR(status);
1634 
1635 	Transaction transaction(volume, directory->BlockNumber());
1636 
1637 	// Inode::Create() locks the inode if we pass the "id" parameter, but we
1638 	// need it anyway
1639 	off_t id;
1640 	status = Inode::Create(transaction, directory, name,
1641 		S_DIRECTORY | (mode & S_IUMSK), 0, 0, NULL, &id);
1642 	if (status == B_OK) {
1643 		put_vnode(volume->FSVolume(), id);
1644 
1645 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1646 
1647 		status = transaction.Done();
1648 		if (status == B_OK)
1649 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1650 		else
1651 			entry_cache_remove(volume->ID(), directory->ID(), name);
1652 	}
1653 
1654 	return status;
1655 }
1656 
1657 
1658 static status_t
1659 bfs_remove_dir(fs_volume* _volume, fs_vnode* _directory, const char* name)
1660 {
1661 	FUNCTION_START(("name = \"%s\"\n", name));
1662 
1663 	Volume* volume = (Volume*)_volume->private_volume;
1664 	Inode* directory = (Inode*)_directory->private_node;
1665 
1666 	Transaction transaction(volume, directory->BlockNumber());
1667 
1668 	off_t id;
1669 	status_t status = directory->Remove(transaction, name, &id, true);
1670 	if (status == B_OK) {
1671 		// Remove the cache entry for the directory and potentially also
1672 		// the parent entry still belonging to the directory
1673 		entry_cache_remove(volume->ID(), directory->ID(), name);
1674 		entry_cache_remove(volume->ID(), id, "..");
1675 
1676 		status = transaction.Done();
1677 		if (status == B_OK)
1678 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1679 		else {
1680 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1681 			entry_cache_add(volume->ID(), id, "..", id);
1682 		}
1683 	}
1684 
1685 	return status;
1686 }
1687 
1688 
1689 /*!	Opens a directory ready to be traversed.
1690 	bfs_open_dir() is also used by bfs_open_index_dir().
1691 */
1692 static status_t
1693 bfs_open_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1694 {
1695 	FUNCTION();
1696 
1697 	Inode* inode = (Inode*)_node->private_node;
1698 	status_t status = inode->CheckPermissions(R_OK);
1699 	if (status < B_OK)
1700 		RETURN_ERROR(status);
1701 
1702 	// we don't ask here for directories only, because the bfs_open_index_dir()
1703 	// function utilizes us (so we must be able to open indices as well)
1704 	if (!inode->IsContainer())
1705 		RETURN_ERROR(B_NOT_A_DIRECTORY);
1706 
1707 	BPlusTree* tree = inode->Tree();
1708 	if (tree == NULL)
1709 		RETURN_ERROR(B_BAD_VALUE);
1710 
1711 	TreeIterator* iterator = new(std::nothrow) TreeIterator(tree);
1712 	if (iterator == NULL)
1713 		RETURN_ERROR(B_NO_MEMORY);
1714 
1715 	*_cookie = iterator;
1716 	return B_OK;
1717 }
1718 
1719 
1720 static status_t
1721 bfs_read_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie,
1722 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1723 {
1724 	FUNCTION();
1725 
1726 	TreeIterator* iterator = (TreeIterator*)_cookie;
1727 	Volume* volume = (Volume*)_volume->private_volume;
1728 
1729 	uint32 maxCount = *_num;
1730 	uint32 count = 0;
1731 
1732 	while (count < maxCount && bufferSize > sizeof(struct dirent)) {
1733 		ino_t id;
1734 		uint16 length;
1735 		size_t nameBufferSize = bufferSize - sizeof(struct dirent) + 1;
1736 
1737 		status_t status = iterator->GetNextEntry(dirent->d_name, &length,
1738 			nameBufferSize, &id);
1739 
1740 		if (status == B_ENTRY_NOT_FOUND)
1741 			break;
1742 
1743 		if (status == B_BUFFER_OVERFLOW) {
1744 			// the remaining name buffer length was too small
1745 			if (count == 0)
1746 				RETURN_ERROR(B_BUFFER_OVERFLOW);
1747 			break;
1748 		}
1749 
1750 		if (status != B_OK)
1751 			RETURN_ERROR(status);
1752 
1753 		ASSERT(length < nameBufferSize);
1754 
1755 		dirent->d_dev = volume->ID();
1756 		dirent->d_ino = id;
1757 		dirent->d_reclen = sizeof(struct dirent) + length;
1758 
1759 		bufferSize -= dirent->d_reclen;
1760 		dirent = (struct dirent*)((uint8*)dirent + dirent->d_reclen);
1761 		count++;
1762 	}
1763 
1764 	*_num = count;
1765 	return B_OK;
1766 }
1767 
1768 
1769 /*!	Sets the TreeIterator back to the beginning of the directory. */
1770 static status_t
1771 bfs_rewind_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* _cookie)
1772 {
1773 	FUNCTION();
1774 	TreeIterator* iterator = (TreeIterator*)_cookie;
1775 
1776 	return iterator->Rewind();
1777 }
1778 
1779 
1780 static status_t
1781 bfs_close_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* /*_cookie*/)
1782 {
1783 	FUNCTION();
1784 	return B_OK;
1785 }
1786 
1787 
1788 static status_t
1789 bfs_free_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1790 {
1791 	delete (TreeIterator*)_cookie;
1792 	return B_OK;
1793 }
1794 
1795 
1796 //	#pragma mark - Attribute functions
1797 
1798 
1799 static status_t
1800 bfs_open_attr_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1801 {
1802 	Inode* inode = (Inode*)_node->private_node;
1803 
1804 	FUNCTION();
1805 
1806 	AttributeIterator* iterator = new(std::nothrow) AttributeIterator(inode);
1807 	if (iterator == NULL)
1808 		RETURN_ERROR(B_NO_MEMORY);
1809 
1810 	*_cookie = iterator;
1811 	return B_OK;
1812 }
1813 
1814 
1815 static status_t
1816 bfs_close_attr_dir(fs_volume* _volume, fs_vnode* node, void* cookie)
1817 {
1818 	FUNCTION();
1819 	return B_OK;
1820 }
1821 
1822 
1823 static status_t
1824 bfs_free_attr_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1825 {
1826 	FUNCTION();
1827 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1828 
1829 	delete iterator;
1830 	return B_OK;
1831 }
1832 
1833 
1834 static status_t
1835 bfs_rewind_attr_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1836 {
1837 	FUNCTION();
1838 
1839 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1840 	RETURN_ERROR(iterator->Rewind());
1841 }
1842 
1843 
1844 static status_t
1845 bfs_read_attr_dir(fs_volume* _volume, fs_vnode* node, void* _cookie,
1846 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1847 {
1848 	FUNCTION();
1849 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1850 
1851 	uint32 type;
1852 	size_t length;
1853 	status_t status = iterator->GetNext(dirent->d_name, &length, &type,
1854 		&dirent->d_ino);
1855 	if (status == B_ENTRY_NOT_FOUND) {
1856 		*_num = 0;
1857 		return B_OK;
1858 	} else if (status != B_OK) {
1859 		RETURN_ERROR(status);
1860 	}
1861 
1862 	Volume* volume = (Volume*)_volume->private_volume;
1863 
1864 	dirent->d_dev = volume->ID();
1865 	dirent->d_reclen = sizeof(struct dirent) + length;
1866 
1867 	*_num = 1;
1868 	return B_OK;
1869 }
1870 
1871 
1872 static status_t
1873 bfs_create_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1874 	uint32 type, int openMode, void** _cookie)
1875 {
1876 	FUNCTION();
1877 
1878 	Volume* volume = (Volume*)_volume->private_volume;
1879 	if (volume->IsReadOnly())
1880 		return B_READ_ONLY_DEVICE;
1881 
1882 	Inode* inode = (Inode*)_node->private_node;
1883 	Attribute attribute(inode);
1884 
1885 	return attribute.Create(name, type, openMode, (attr_cookie**)_cookie);
1886 }
1887 
1888 
1889 static status_t
1890 bfs_open_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1891 	int openMode, void** _cookie)
1892 {
1893 	FUNCTION();
1894 
1895 	Inode* inode = (Inode*)_node->private_node;
1896 	Attribute attribute(inode);
1897 
1898 	return attribute.Open(name, openMode, (attr_cookie**)_cookie);
1899 }
1900 
1901 
1902 static status_t
1903 bfs_close_attr(fs_volume* _volume, fs_vnode* _file, void* cookie)
1904 {
1905 	return B_OK;
1906 }
1907 
1908 
1909 static status_t
1910 bfs_free_attr_cookie(fs_volume* _volume, fs_vnode* _file, void* cookie)
1911 {
1912 	delete (attr_cookie*)cookie;
1913 	return B_OK;
1914 }
1915 
1916 
1917 static status_t
1918 bfs_read_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie, off_t pos,
1919 	void* buffer, size_t* _length)
1920 {
1921 	FUNCTION();
1922 
1923 	attr_cookie* cookie = (attr_cookie*)_cookie;
1924 	Inode* inode = (Inode*)_file->private_node;
1925 
1926 	Attribute attribute(inode, cookie);
1927 
1928 	return attribute.Read(cookie, pos, (uint8*)buffer, _length);
1929 }
1930 
1931 
1932 static status_t
1933 bfs_write_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1934 	off_t pos, const void* buffer, size_t* _length)
1935 {
1936 	FUNCTION();
1937 
1938 	attr_cookie* cookie = (attr_cookie*)_cookie;
1939 	Volume* volume = (Volume*)_volume->private_volume;
1940 	Inode* inode = (Inode*)_file->private_node;
1941 
1942 	Transaction transaction(volume, inode->BlockNumber());
1943 	Attribute attribute(inode, cookie);
1944 
1945 	bool created;
1946 	status_t status = attribute.Write(transaction, cookie, pos,
1947 		(const uint8*)buffer, _length, &created);
1948 	if (status == B_OK) {
1949 		status = transaction.Done();
1950 		if (status == B_OK) {
1951 			notify_attribute_changed(volume->ID(), inode->ParentID(),
1952 				inode->ID(), cookie->name,
1953 				created ? B_ATTR_CREATED : B_ATTR_CHANGED);
1954 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1955 				B_STAT_CHANGE_TIME);
1956 		}
1957 	}
1958 
1959 	return status;
1960 }
1961 
1962 
1963 static status_t
1964 bfs_read_attr_stat(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1965 	struct stat* stat)
1966 {
1967 	FUNCTION();
1968 
1969 	attr_cookie* cookie = (attr_cookie*)_cookie;
1970 	Inode* inode = (Inode*)_file->private_node;
1971 
1972 	Attribute attribute(inode, cookie);
1973 
1974 	return attribute.Stat(*stat);
1975 }
1976 
1977 
1978 static status_t
1979 bfs_write_attr_stat(fs_volume* _volume, fs_vnode* file, void* cookie,
1980 	const struct stat* stat, int statMask)
1981 {
1982 	// TODO: Implement (at least setting the size)!
1983 	return EOPNOTSUPP;
1984 }
1985 
1986 
1987 static status_t
1988 bfs_rename_attr(fs_volume* _volume, fs_vnode* fromFile, const char* fromName,
1989 	fs_vnode* toFile, const char* toName)
1990 {
1991 	FUNCTION_START(("name = \"%s\", to = \"%s\"\n", fromName, toName));
1992 
1993 	// TODO: implement bfs_rename_attr()!
1994 	// There will probably be an API to move one attribute to another file,
1995 	// making that function much more complicated - oh joy ;-)
1996 
1997 	return EOPNOTSUPP;
1998 }
1999 
2000 
2001 static status_t
2002 bfs_remove_attr(fs_volume* _volume, fs_vnode* _node, const char* name)
2003 {
2004 	FUNCTION_START(("name = \"%s\"\n", name));
2005 
2006 	Volume* volume = (Volume*)_volume->private_volume;
2007 	Inode* inode = (Inode*)_node->private_node;
2008 
2009 	status_t status = inode->CheckPermissions(W_OK);
2010 	if (status != B_OK)
2011 		return status;
2012 
2013 	Transaction transaction(volume, inode->BlockNumber());
2014 
2015 	status = inode->RemoveAttribute(transaction, name);
2016 	if (status == B_OK)
2017 		status = transaction.Done();
2018 	if (status == B_OK) {
2019 		notify_attribute_changed(volume->ID(), inode->ParentID(), inode->ID(),
2020 			name, B_ATTR_REMOVED);
2021 	}
2022 
2023 	return status;
2024 }
2025 
2026 
2027 //	#pragma mark - Special Nodes
2028 
2029 
2030 status_t
2031 bfs_create_special_node(fs_volume* _volume, fs_vnode* _directory,
2032 	const char* name, fs_vnode* subVnode, mode_t mode, uint32 flags,
2033 	fs_vnode* _superVnode, ino_t* _nodeID)
2034 {
2035 	// no need to support entry-less nodes
2036 	if (name == NULL)
2037 		return B_UNSUPPORTED;
2038 
2039 	FUNCTION_START(("name = \"%s\", mode = %d, flags = 0x%lx, subVnode: %p\n",
2040 		name, mode, flags, subVnode));
2041 
2042 	Volume* volume = (Volume*)_volume->private_volume;
2043 	Inode* directory = (Inode*)_directory->private_node;
2044 
2045 	if (volume->IsReadOnly())
2046 		return B_READ_ONLY_DEVICE;
2047 
2048 	if (!directory->IsDirectory())
2049 		RETURN_ERROR(B_BAD_TYPE);
2050 
2051 	status_t status = directory->CheckPermissions(W_OK);
2052 	if (status < B_OK)
2053 		RETURN_ERROR(status);
2054 
2055 	Transaction transaction(volume, directory->BlockNumber());
2056 
2057 	off_t id;
2058 	Inode* inode;
2059 	status = Inode::Create(transaction, directory, name, mode, O_EXCL, 0, NULL,
2060 		&id, &inode, subVnode ? subVnode->ops : NULL, flags);
2061 	if (status == B_OK) {
2062 		_superVnode->private_node = inode;
2063 		_superVnode->ops = &gBFSVnodeOps;
2064 		*_nodeID = id;
2065 
2066 		entry_cache_add(volume->ID(), directory->ID(), name, id);
2067 
2068 		status = transaction.Done();
2069 		if (status == B_OK)
2070 			notify_entry_created(volume->ID(), directory->ID(), name, id);
2071 		else
2072 			entry_cache_remove(volume->ID(), directory->ID(), name);
2073 	}
2074 
2075 	return status;
2076 }
2077 
2078 
2079 //	#pragma mark - Index functions
2080 
2081 
2082 static status_t
2083 bfs_open_index_dir(fs_volume* _volume, void** _cookie)
2084 {
2085 	FUNCTION();
2086 
2087 	Volume* volume = (Volume*)_volume->private_volume;
2088 
2089 	if (volume->IndicesNode() == NULL) {
2090 		// This volume does not have any indices
2091 		RETURN_ERROR(B_ENTRY_NOT_FOUND);
2092 	}
2093 
2094 	// Since the indices root node is just a directory, and we are storing
2095 	// a pointer to it in our Volume object, we can just use the directory
2096 	// traversal functions.
2097 	// In fact we're storing it in the Volume object for that reason.
2098 
2099 	fs_vnode indicesNode;
2100 	indicesNode.private_node = volume->IndicesNode();
2101 
2102 	RETURN_ERROR(bfs_open_dir(_volume, &indicesNode, _cookie));
2103 }
2104 
2105 
2106 static status_t
2107 bfs_close_index_dir(fs_volume* _volume, void* _cookie)
2108 {
2109 	FUNCTION();
2110 
2111 	Volume* volume = (Volume*)_volume->private_volume;
2112 
2113 	fs_vnode indicesNode;
2114 	indicesNode.private_node = volume->IndicesNode();
2115 
2116 	RETURN_ERROR(bfs_close_dir(_volume, &indicesNode, _cookie));
2117 }
2118 
2119 
2120 static status_t
2121 bfs_free_index_dir_cookie(fs_volume* _volume, void* _cookie)
2122 {
2123 	FUNCTION();
2124 
2125 	Volume* volume = (Volume*)_volume->private_volume;
2126 
2127 	fs_vnode indicesNode;
2128 	indicesNode.private_node = volume->IndicesNode();
2129 
2130 	RETURN_ERROR(bfs_free_dir_cookie(_volume, &indicesNode, _cookie));
2131 }
2132 
2133 
2134 static status_t
2135 bfs_rewind_index_dir(fs_volume* _volume, void* _cookie)
2136 {
2137 	FUNCTION();
2138 
2139 	Volume* volume = (Volume*)_volume->private_volume;
2140 
2141 	fs_vnode indicesNode;
2142 	indicesNode.private_node = volume->IndicesNode();
2143 
2144 	RETURN_ERROR(bfs_rewind_dir(_volume, &indicesNode, _cookie));
2145 }
2146 
2147 
2148 static status_t
2149 bfs_read_index_dir(fs_volume* _volume, void* _cookie, struct dirent* dirent,
2150 	size_t bufferSize, uint32* _num)
2151 {
2152 	FUNCTION();
2153 
2154 	Volume* volume = (Volume*)_volume->private_volume;
2155 
2156 	fs_vnode indicesNode;
2157 	indicesNode.private_node = volume->IndicesNode();
2158 
2159 	RETURN_ERROR(bfs_read_dir(_volume, &indicesNode, _cookie, dirent,
2160 		bufferSize, _num));
2161 }
2162 
2163 
2164 static status_t
2165 bfs_create_index(fs_volume* _volume, const char* name, uint32 type,
2166 	uint32 flags)
2167 {
2168 	FUNCTION_START(("name = \"%s\", type = %ld, flags = %ld\n", name, type, flags));
2169 
2170 	Volume* volume = (Volume*)_volume->private_volume;
2171 
2172 	if (volume->IsReadOnly())
2173 		return B_READ_ONLY_DEVICE;
2174 
2175 	// only root users are allowed to create indices
2176 	if (geteuid() != 0)
2177 		return B_NOT_ALLOWED;
2178 
2179 	Transaction transaction(volume, volume->Indices());
2180 
2181 	Index index(volume);
2182 	status_t status = index.Create(transaction, name, type);
2183 
2184 	if (status == B_OK)
2185 		status = transaction.Done();
2186 
2187 	RETURN_ERROR(status);
2188 }
2189 
2190 
2191 static status_t
2192 bfs_remove_index(fs_volume* _volume, const char* name)
2193 {
2194 	FUNCTION();
2195 
2196 	Volume* volume = (Volume*)_volume->private_volume;
2197 
2198 	if (volume->IsReadOnly())
2199 		return B_READ_ONLY_DEVICE;
2200 
2201 	// only root users are allowed to remove indices
2202 	if (geteuid() != 0)
2203 		return B_NOT_ALLOWED;
2204 
2205 	Inode* indices = volume->IndicesNode();
2206 	if (indices == NULL)
2207 		return B_ENTRY_NOT_FOUND;
2208 
2209 	Transaction transaction(volume, volume->Indices());
2210 
2211 	status_t status = indices->Remove(transaction, name);
2212 	if (status == B_OK)
2213 		status = transaction.Done();
2214 
2215 	RETURN_ERROR(status);
2216 }
2217 
2218 
2219 static status_t
2220 bfs_stat_index(fs_volume* _volume, const char* name, struct stat* stat)
2221 {
2222 	FUNCTION_START(("name = %s\n", name));
2223 
2224 	Volume* volume = (Volume*)_volume->private_volume;
2225 
2226 	Index index(volume);
2227 	status_t status = index.SetTo(name);
2228 	if (status < B_OK)
2229 		RETURN_ERROR(status);
2230 
2231 	bfs_inode& node = index.Node()->Node();
2232 
2233 	stat->st_type = index.Type();
2234 	stat->st_mode = node.Mode();
2235 
2236 	stat->st_size = node.data.Size();
2237 	stat->st_blocks = index.Node()->AllocatedSize() / 512;
2238 
2239 	stat->st_nlink = 1;
2240 	stat->st_blksize = 65536;
2241 
2242 	stat->st_uid = node.UserID();
2243 	stat->st_gid = node.GroupID();
2244 
2245 	fill_stat_time(node, *stat);
2246 
2247 	return B_OK;
2248 }
2249 
2250 
2251 //	#pragma mark - Query functions
2252 
2253 
2254 static status_t
2255 bfs_open_query(fs_volume* _volume, const char* queryString, uint32 flags,
2256 	port_id port, uint32 token, void** _cookie)
2257 {
2258 	FUNCTION_START(("bfs_open_query(\"%s\", flags = %lu, port_id = %ld, token = %ld)\n",
2259 		queryString, flags, port, token));
2260 
2261 	Volume* volume = (Volume*)_volume->private_volume;
2262 
2263 	Expression* expression = new(std::nothrow) Expression((char*)queryString);
2264 	if (expression == NULL)
2265 		RETURN_ERROR(B_NO_MEMORY);
2266 
2267 	if (expression->InitCheck() < B_OK) {
2268 		INFORM(("Could not parse query \"%s\", stopped at: \"%s\"\n",
2269 			queryString, expression->Position()));
2270 
2271 		delete expression;
2272 		RETURN_ERROR(B_BAD_VALUE);
2273 	}
2274 
2275 	Query* query = new(std::nothrow) Query(volume, expression, flags);
2276 	if (query == NULL) {
2277 		delete expression;
2278 		RETURN_ERROR(B_NO_MEMORY);
2279 	}
2280 
2281 	if (flags & B_LIVE_QUERY)
2282 		query->SetLiveMode(port, token);
2283 
2284 	*_cookie = (void*)query;
2285 
2286 	return B_OK;
2287 }
2288 
2289 
2290 static status_t
2291 bfs_close_query(fs_volume* _volume, void* cookie)
2292 {
2293 	FUNCTION();
2294 	return B_OK;
2295 }
2296 
2297 
2298 static status_t
2299 bfs_free_query_cookie(fs_volume* _volume, void* cookie)
2300 {
2301 	FUNCTION();
2302 
2303 	Query* query = (Query*)cookie;
2304 	Expression* expression = query->GetExpression();
2305 	delete query;
2306 	delete expression;
2307 
2308 	return B_OK;
2309 }
2310 
2311 
2312 static status_t
2313 bfs_read_query(fs_volume* /*_volume*/, void* cookie, struct dirent* dirent,
2314 	size_t bufferSize, uint32* _num)
2315 {
2316 	FUNCTION();
2317 	Query* query = (Query*)cookie;
2318 	status_t status = query->GetNextEntry(dirent, bufferSize);
2319 	if (status == B_OK)
2320 		*_num = 1;
2321 	else if (status == B_ENTRY_NOT_FOUND)
2322 		*_num = 0;
2323 	else
2324 		return status;
2325 
2326 	return B_OK;
2327 }
2328 
2329 
2330 static status_t
2331 bfs_rewind_query(fs_volume* /*_volume*/, void* cookie)
2332 {
2333 	FUNCTION();
2334 
2335 	Query* query = (Query*)cookie;
2336 	return query->Rewind();
2337 }
2338 
2339 
2340 //	#pragma mark -
2341 
2342 
2343 static uint32
2344 bfs_get_supported_operations(partition_data* partition, uint32 mask)
2345 {
2346 	// TODO: We should at least check the partition size.
2347 	return B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2348 		| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2349 		| B_DISK_SYSTEM_SUPPORTS_WRITING;
2350 }
2351 
2352 
2353 static status_t
2354 bfs_initialize(int fd, partition_id partitionID, const char* name,
2355 	const char* parameterString, off_t /*partitionSize*/, disk_job_id job)
2356 {
2357 	// check name
2358 	status_t status = check_volume_name(name);
2359 	if (status != B_OK)
2360 		return status;
2361 
2362 	// parse parameters
2363 	initialize_parameters parameters;
2364 	status = parse_initialize_parameters(parameterString, parameters);
2365 	if (status != B_OK)
2366 		return status;
2367 
2368 	update_disk_device_job_progress(job, 0);
2369 
2370 	// initialize the volume
2371 	Volume volume(NULL);
2372 	status = volume.Initialize(fd, name, parameters.blockSize,
2373 		parameters.flags);
2374 	if (status < B_OK) {
2375 		INFORM(("Initializing volume failed: %s\n", strerror(status)));
2376 		return status;
2377 	}
2378 
2379 	// rescan partition
2380 	status = scan_partition(partitionID);
2381 	if (status != B_OK)
2382 		return status;
2383 
2384 	update_disk_device_job_progress(job, 1);
2385 
2386 	// print some info, if desired
2387 	if (parameters.verbose) {
2388 		disk_super_block super = volume.SuperBlock();
2389 
2390 		INFORM(("Disk was initialized successfully.\n"));
2391 		INFORM(("\tname: \"%s\"\n", super.name));
2392 		INFORM(("\tnum blocks: %" B_PRIdOFF "\n", super.NumBlocks()));
2393 		INFORM(("\tused blocks: %" B_PRIdOFF "\n", super.UsedBlocks()));
2394 		INFORM(("\tblock size: %u bytes\n", (unsigned)super.BlockSize()));
2395 		INFORM(("\tnum allocation groups: %d\n",
2396 			(int)super.AllocationGroups()));
2397 		INFORM(("\tallocation group size: %ld blocks\n",
2398 			1L << super.AllocationGroupShift()));
2399 		INFORM(("\tlog size: %u blocks\n", super.log_blocks.Length()));
2400 	}
2401 
2402 	return B_OK;
2403 }
2404 
2405 
2406 static status_t
2407 bfs_uninitialize(int fd, partition_id partitionID, off_t partitionSize,
2408 	uint32 blockSize, disk_job_id job)
2409 {
2410 	if (blockSize == 0)
2411 		return B_BAD_VALUE;
2412 
2413 	update_disk_device_job_progress(job, 0.0);
2414 
2415 	// just overwrite the superblock
2416 	disk_super_block superBlock;
2417 	memset(&superBlock, 0, sizeof(superBlock));
2418 
2419 	if (write_pos(fd, 512, &superBlock, sizeof(superBlock)) < 0)
2420 		return errno;
2421 
2422 	update_disk_device_job_progress(job, 1.0);
2423 
2424 	return B_OK;
2425 }
2426 
2427 
2428 //	#pragma mark -
2429 
2430 
2431 static status_t
2432 bfs_std_ops(int32 op, ...)
2433 {
2434 	switch (op) {
2435 		case B_MODULE_INIT:
2436 #ifdef BFS_DEBUGGER_COMMANDS
2437 			add_debugger_commands();
2438 #endif
2439 			return B_OK;
2440 		case B_MODULE_UNINIT:
2441 #ifdef BFS_DEBUGGER_COMMANDS
2442 			remove_debugger_commands();
2443 #endif
2444 			return B_OK;
2445 
2446 		default:
2447 			return B_ERROR;
2448 	}
2449 }
2450 
2451 fs_volume_ops gBFSVolumeOps = {
2452 	&bfs_unmount,
2453 	&bfs_read_fs_stat,
2454 	&bfs_write_fs_stat,
2455 	&bfs_sync,
2456 	&bfs_get_vnode,
2457 
2458 	/* index directory & index operations */
2459 	&bfs_open_index_dir,
2460 	&bfs_close_index_dir,
2461 	&bfs_free_index_dir_cookie,
2462 	&bfs_read_index_dir,
2463 	&bfs_rewind_index_dir,
2464 
2465 	&bfs_create_index,
2466 	&bfs_remove_index,
2467 	&bfs_stat_index,
2468 
2469 	/* query operations */
2470 	&bfs_open_query,
2471 	&bfs_close_query,
2472 	&bfs_free_query_cookie,
2473 	&bfs_read_query,
2474 	&bfs_rewind_query,
2475 };
2476 
2477 fs_vnode_ops gBFSVnodeOps = {
2478 	/* vnode operations */
2479 	&bfs_lookup,
2480 	&bfs_get_vnode_name,
2481 	&bfs_put_vnode,
2482 	&bfs_remove_vnode,
2483 
2484 	/* VM file access */
2485 	&bfs_can_page,
2486 	&bfs_read_pages,
2487 	&bfs_write_pages,
2488 
2489 	&bfs_io,
2490 	NULL,	// cancel_io()
2491 
2492 	&bfs_get_file_map,
2493 
2494 	&bfs_ioctl,
2495 	&bfs_set_flags,
2496 	NULL,	// fs_select
2497 	NULL,	// fs_deselect
2498 	&bfs_fsync,
2499 
2500 	&bfs_read_link,
2501 	&bfs_create_symlink,
2502 
2503 	&bfs_link,
2504 	&bfs_unlink,
2505 	&bfs_rename,
2506 
2507 	&bfs_access,
2508 	&bfs_read_stat,
2509 	&bfs_write_stat,
2510 	NULL,	// fs_preallocate
2511 
2512 	/* file operations */
2513 	&bfs_create,
2514 	&bfs_open,
2515 	&bfs_close,
2516 	&bfs_free_cookie,
2517 	&bfs_read,
2518 	&bfs_write,
2519 
2520 	/* directory operations */
2521 	&bfs_create_dir,
2522 	&bfs_remove_dir,
2523 	&bfs_open_dir,
2524 	&bfs_close_dir,
2525 	&bfs_free_dir_cookie,
2526 	&bfs_read_dir,
2527 	&bfs_rewind_dir,
2528 
2529 	/* attribute directory operations */
2530 	&bfs_open_attr_dir,
2531 	&bfs_close_attr_dir,
2532 	&bfs_free_attr_dir_cookie,
2533 	&bfs_read_attr_dir,
2534 	&bfs_rewind_attr_dir,
2535 
2536 	/* attribute operations */
2537 	&bfs_create_attr,
2538 	&bfs_open_attr,
2539 	&bfs_close_attr,
2540 	&bfs_free_attr_cookie,
2541 	&bfs_read_attr,
2542 	&bfs_write_attr,
2543 
2544 	&bfs_read_attr_stat,
2545 	&bfs_write_attr_stat,
2546 	&bfs_rename_attr,
2547 	&bfs_remove_attr,
2548 
2549 	/* special nodes */
2550 	&bfs_create_special_node
2551 };
2552 
2553 static file_system_module_info sBeFileSystem = {
2554 	{
2555 		"file_systems/bfs" BFS_ENDIAN_SUFFIX B_CURRENT_FS_API_VERSION,
2556 		0,
2557 		bfs_std_ops,
2558 	},
2559 
2560 	"bfs" BFS_ENDIAN_SUFFIX,						// short_name
2561 	"Be File System" BFS_ENDIAN_PRETTY_SUFFIX,		// pretty_name
2562 
2563 	// DDM flags
2564 	0
2565 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING
2566 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING
2567 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING
2568 //	| B_DISK_SYSTEM_SUPPORTS_MOVING
2569 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME
2570 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS
2571 	| B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2572 	| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2573 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING
2574 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING_WHILE_MOUNTED
2575 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING_WHILE_MOUNTED
2576 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING_WHILE_MOUNTED
2577 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING_WHILE_MOUNTED
2578 //	| B_DISK_SYSTEM_SUPPORTS_MOVING_WHILE_MOUNTED
2579 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME_WHILE_MOUNTED
2580 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS_WHILE_MOUNTED
2581 	| B_DISK_SYSTEM_SUPPORTS_WRITING
2582 	,
2583 
2584 	// scanning
2585 	bfs_identify_partition,
2586 	bfs_scan_partition,
2587 	bfs_free_identify_partition_cookie,
2588 	NULL,	// free_partition_content_cookie()
2589 
2590 	&bfs_mount,
2591 
2592 	/* capability querying operations */
2593 	&bfs_get_supported_operations,
2594 
2595 	NULL,	// validate_resize
2596 	NULL,	// validate_move
2597 	NULL,	// validate_set_content_name
2598 	NULL,	// validate_set_content_parameters
2599 	NULL,	// validate_initialize,
2600 
2601 	/* shadow partition modification */
2602 	NULL,	// shadow_changed
2603 
2604 	/* writing */
2605 	NULL,	// defragment
2606 	NULL,	// repair
2607 	NULL,	// resize
2608 	NULL,	// move
2609 	NULL,	// set_content_name
2610 	NULL,	// set_content_parameters
2611 	bfs_initialize,
2612 	bfs_uninitialize
2613 };
2614 
2615 module_info* modules[] = {
2616 	(module_info*)&sBeFileSystem,
2617 	NULL,
2618 };
2619