xref: /haiku/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp (revision be012e21222c4d8d70082d12353acb163dc60ba8)
1 /*
2  * Copyright 2001-2013, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //!	file system interface to Haiku's vnode layer
8 
9 
10 #include "Debug.h"
11 #include "Volume.h"
12 #include "Inode.h"
13 #include "Index.h"
14 #include "BPlusTree.h"
15 #include "Query.h"
16 #include "Attribute.h"
17 #include "bfs_control.h"
18 #include "bfs_disk_system.h"
19 
20 // TODO: temporary solution as long as there is no public I/O requests API
21 #ifndef BFS_SHELL
22 #	include <io_requests.h>
23 #	include <util/fs_trim_support.h>
24 #endif
25 
26 
27 #define BFS_IO_SIZE	65536
28 
29 
30 struct identify_cookie {
31 	disk_super_block super_block;
32 };
33 
34 extern void fill_stat_buffer(Inode* inode, struct stat& stat);
35 
36 
37 static void
38 fill_stat_time(const bfs_inode& node, struct stat& stat)
39 {
40 	bigtime_t now = real_time_clock_usecs();
41 	stat.st_atim.tv_sec = now / 1000000LL;
42 	stat.st_atim.tv_nsec = (now % 1000000LL) * 1000;
43 
44 	stat.st_mtim.tv_sec = bfs_inode::ToSecs(node.LastModifiedTime());
45 	stat.st_mtim.tv_nsec = bfs_inode::ToNsecs(node.LastModifiedTime());
46 	stat.st_crtim.tv_sec = bfs_inode::ToSecs(node.CreateTime());
47 	stat.st_crtim.tv_nsec = bfs_inode::ToNsecs(node.CreateTime());
48 
49 	// For BeOS compatibility, if on-disk ctime is invalid, fall back to mtime:
50 	bigtime_t changeTime = node.StatusChangeTime();
51 	if (changeTime < node.LastModifiedTime())
52 		stat.st_ctim = stat.st_mtim;
53 	else {
54 		stat.st_ctim.tv_sec = bfs_inode::ToSecs(changeTime);
55 		stat.st_ctim.tv_nsec = bfs_inode::ToNsecs(changeTime);
56 	}
57 }
58 
59 
60 void
61 fill_stat_buffer(Inode* inode, struct stat& stat)
62 {
63 	const bfs_inode& node = inode->Node();
64 
65 	stat.st_dev = inode->GetVolume()->ID();
66 	stat.st_ino = inode->ID();
67 	stat.st_nlink = 1;
68 	stat.st_blksize = BFS_IO_SIZE;
69 
70 	stat.st_uid = node.UserID();
71 	stat.st_gid = node.GroupID();
72 	stat.st_mode = node.Mode();
73 	stat.st_type = node.Type();
74 
75 	fill_stat_time(node, stat);
76 
77 	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
78 		// symlinks report the size of the link here
79 		stat.st_size = strlen(node.short_symlink);
80 	} else
81 		stat.st_size = inode->Size();
82 
83 	stat.st_blocks = inode->AllocatedSize() / 512;
84 }
85 
86 
87 //!	bfs_io() callback hook
88 static status_t
89 iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
90 	size_t size, struct file_io_vec* vecs, size_t* _count)
91 {
92 	Inode* inode = (Inode*)cookie;
93 
94 	return file_map_translate(inode->Map(), offset, size, vecs, _count,
95 		inode->GetVolume()->BlockSize());
96 }
97 
98 
99 //!	bfs_io() callback hook
100 static status_t
101 iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
102 	bool partialTransfer, size_t bytesTransferred)
103 {
104 	Inode* inode = (Inode*)cookie;
105 	rw_lock_read_unlock(&inode->Lock());
106 	return B_OK;
107 }
108 
109 
110 //	#pragma mark - Scanning
111 
112 
113 static float
114 bfs_identify_partition(int fd, partition_data* partition, void** _cookie)
115 {
116 	disk_super_block superBlock;
117 	status_t status = Volume::Identify(fd, &superBlock);
118 	if (status != B_OK)
119 		return -1;
120 
121 	identify_cookie* cookie = new(std::nothrow) identify_cookie;
122 	if (cookie == NULL)
123 		return -1;
124 
125 	memcpy(&cookie->super_block, &superBlock, sizeof(disk_super_block));
126 
127 	*_cookie = cookie;
128 	return 0.8f;
129 }
130 
131 
132 static status_t
133 bfs_scan_partition(int fd, partition_data* partition, void* _cookie)
134 {
135 	identify_cookie* cookie = (identify_cookie*)_cookie;
136 
137 	partition->status = B_PARTITION_VALID;
138 	partition->flags |= B_PARTITION_FILE_SYSTEM;
139 	partition->content_size = cookie->super_block.NumBlocks()
140 		* cookie->super_block.BlockSize();
141 	partition->block_size = cookie->super_block.BlockSize();
142 	partition->content_name = strdup(cookie->super_block.name);
143 	if (partition->content_name == NULL)
144 		return B_NO_MEMORY;
145 
146 	return B_OK;
147 }
148 
149 
150 static void
151 bfs_free_identify_partition_cookie(partition_data* partition, void* _cookie)
152 {
153 	identify_cookie* cookie = (identify_cookie*)_cookie;
154 	delete cookie;
155 }
156 
157 
158 //	#pragma mark -
159 
160 
161 static status_t
162 bfs_mount(fs_volume* _volume, const char* device, uint32 flags,
163 	const char* args, ino_t* _rootID)
164 {
165 	FUNCTION();
166 
167 	Volume* volume = new(std::nothrow) Volume(_volume);
168 	if (volume == NULL)
169 		return B_NO_MEMORY;
170 
171 	status_t status = volume->Mount(device, flags);
172 	if (status != B_OK) {
173 		delete volume;
174 		RETURN_ERROR(status);
175 	}
176 
177 	_volume->private_volume = volume;
178 	_volume->ops = &gBFSVolumeOps;
179 	*_rootID = volume->ToVnode(volume->Root());
180 
181 	INFORM(("mounted \"%s\" (root node at %" B_PRIdINO ", device = %s)\n",
182 		volume->Name(), *_rootID, device));
183 	return B_OK;
184 }
185 
186 
187 static status_t
188 bfs_unmount(fs_volume* _volume)
189 {
190 	FUNCTION();
191 	Volume* volume = (Volume*)_volume->private_volume;
192 
193 	status_t status = volume->Unmount();
194 	delete volume;
195 
196 	RETURN_ERROR(status);
197 }
198 
199 
200 static status_t
201 bfs_read_fs_stat(fs_volume* _volume, struct fs_info* info)
202 {
203 	FUNCTION();
204 
205 	Volume* volume = (Volume*)_volume->private_volume;
206 	MutexLocker locker(volume->Lock());
207 
208 	// File system flags.
209 	info->flags = B_FS_IS_PERSISTENT | B_FS_HAS_ATTR | B_FS_HAS_MIME
210 		| (volume->IndicesNode() != NULL ? B_FS_HAS_QUERY : 0)
211 		| (volume->IsReadOnly() ? B_FS_IS_READONLY : 0);
212 
213 	info->io_size = BFS_IO_SIZE;
214 		// whatever is appropriate here?
215 
216 	info->block_size = volume->BlockSize();
217 	info->total_blocks = volume->NumBlocks();
218 	info->free_blocks = volume->FreeBlocks();
219 
220 	// Volume name
221 	strlcpy(info->volume_name, volume->Name(), sizeof(info->volume_name));
222 
223 	// File system name
224 	strlcpy(info->fsh_name, "bfs", sizeof(info->fsh_name));
225 
226 	return B_OK;
227 }
228 
229 
230 static status_t
231 bfs_write_fs_stat(fs_volume* _volume, const struct fs_info* info, uint32 mask)
232 {
233 	FUNCTION_START(("mask = %ld\n", mask));
234 
235 	Volume* volume = (Volume*)_volume->private_volume;
236 	if (volume->IsReadOnly())
237 		return B_READ_ONLY_DEVICE;
238 
239 	MutexLocker locker(volume->Lock());
240 
241 	status_t status = B_BAD_VALUE;
242 
243 	if (mask & FS_WRITE_FSINFO_NAME) {
244 		disk_super_block& superBlock = volume->SuperBlock();
245 
246 		strncpy(superBlock.name, info->volume_name,
247 			sizeof(superBlock.name) - 1);
248 		superBlock.name[sizeof(superBlock.name) - 1] = '\0';
249 
250 		status = volume->WriteSuperBlock();
251 	}
252 	return status;
253 }
254 
255 
256 static status_t
257 bfs_sync(fs_volume* _volume)
258 {
259 	FUNCTION();
260 
261 	Volume* volume = (Volume*)_volume->private_volume;
262 	return volume->Sync();
263 }
264 
265 
266 //	#pragma mark -
267 
268 
269 /*!	Reads in the node from disk and creates an inode object from it.
270 */
271 static status_t
272 bfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _node, int* _type,
273 	uint32* _flags, bool reenter)
274 {
275 	//FUNCTION_START(("ino_t = %Ld\n", id));
276 	Volume* volume = (Volume*)_volume->private_volume;
277 
278 	// first inode may be after the log area, we don't go through
279 	// the hassle and try to load an earlier block from disk
280 	if (id < volume->ToBlock(volume->Log()) + volume->Log().Length()
281 		|| id > volume->NumBlocks()) {
282 		INFORM(("inode at %" B_PRIdINO " requested!\n", id));
283 		return B_ERROR;
284 	}
285 
286 	CachedBlock cached(volume, id);
287 	bfs_inode* node = (bfs_inode*)cached.Block();
288 	if (node == NULL) {
289 		FATAL(("could not read inode: %" B_PRIdINO "\n", id));
290 		return B_IO_ERROR;
291 	}
292 
293 	status_t status = node->InitCheck(volume);
294 	if (status != B_OK) {
295 		if ((node->Flags() & INODE_DELETED) != 0) {
296 			INFORM(("inode at %" B_PRIdINO " is already deleted!\n", id));
297 		} else {
298 			FATAL(("inode at %" B_PRIdINO " could not be read: %s!\n", id,
299 				strerror(status)));
300 		}
301 		return status;
302 	}
303 
304 	Inode* inode = new(std::nothrow) Inode(volume, id);
305 	if (inode == NULL)
306 		return B_NO_MEMORY;
307 
308 	status = inode->InitCheck(false);
309 	if (status != B_OK)
310 		delete inode;
311 
312 	if (status == B_OK) {
313 		_node->private_node = inode;
314 		_node->ops = &gBFSVnodeOps;
315 		*_type = inode->Mode();
316 		*_flags = 0;
317 	}
318 
319 	return status;
320 }
321 
322 
323 static status_t
324 bfs_put_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
325 {
326 	Volume* volume = (Volume*)_volume->private_volume;
327 	Inode* inode = (Inode*)_node->private_node;
328 
329 	// since a directory's size can be changed without having it opened,
330 	// we need to take care about their preallocated blocks here
331 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()
332 		&& inode->NeedsTrimming()) {
333 		Transaction transaction(volume, inode->BlockNumber());
334 
335 		if (inode->TrimPreallocation(transaction) == B_OK)
336 			transaction.Done();
337 		else if (transaction.HasParent()) {
338 			// TODO: for now, we don't let sub-transactions fail
339 			transaction.Done();
340 		}
341 	}
342 
343 	delete inode;
344 	return B_OK;
345 }
346 
347 
348 static status_t
349 bfs_remove_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
350 {
351 	FUNCTION();
352 
353 	Volume* volume = (Volume*)_volume->private_volume;
354 	Inode* inode = (Inode*)_node->private_node;
355 
356 	// If the inode isn't in use anymore, we were called before
357 	// bfs_unlink() returns - in this case, we can just use the
358 	// transaction which has already deleted the inode.
359 	Transaction transaction(volume, volume->ToBlock(inode->Parent()));
360 
361 	// The file system check functionality uses this flag to prevent the space
362 	// used up by the inode from being freed - this flag is set only in
363 	// situations where this does not cause any harm as the block bitmap will
364 	// get fixed anyway in this case).
365 	if ((inode->Flags() & INODE_DONT_FREE_SPACE) != 0) {
366 		delete inode;
367 		return B_OK;
368 	}
369 
370 	ASSERT((inode->Flags() & INODE_DELETED) != 0);
371 
372 	status_t status = inode->Free(transaction);
373 	if (status == B_OK) {
374 		status = transaction.Done();
375 	} else if (transaction.HasParent()) {
376 		// TODO: for now, we don't let sub-transactions fail
377 		status = transaction.Done();
378 	}
379 
380 	volume->RemovedInodes().Remove(inode);
381 
382 	// TODO: the VFS currently does not allow this to fail
383 	delete inode;
384 
385 	return status;
386 }
387 
388 
389 static bool
390 bfs_can_page(fs_volume* _volume, fs_vnode* _v, void* _cookie)
391 {
392 	// TODO: we're obviously not even asked...
393 	return false;
394 }
395 
396 
397 static status_t
398 bfs_read_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
399 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
400 {
401 	Volume* volume = (Volume*)_volume->private_volume;
402 	Inode* inode = (Inode*)_node->private_node;
403 
404 	if (inode->FileCache() == NULL)
405 		RETURN_ERROR(B_BAD_VALUE);
406 
407 	InodeReadLocker _(inode);
408 
409 	uint32 vecIndex = 0;
410 	size_t vecOffset = 0;
411 	size_t bytesLeft = *_numBytes;
412 	status_t status;
413 
414 	while (true) {
415 		file_io_vec fileVecs[8];
416 		size_t fileVecCount = 8;
417 
418 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
419 			&fileVecCount, 0);
420 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
421 			break;
422 
423 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
424 
425 		size_t bytes = bytesLeft;
426 		status = read_file_io_vec_pages(volume->Device(), fileVecs,
427 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
428 		if (status != B_OK || !bufferOverflow)
429 			break;
430 
431 		pos += bytes;
432 		bytesLeft -= bytes;
433 	}
434 
435 	return status;
436 }
437 
438 
439 static status_t
440 bfs_write_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
441 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
442 {
443 	Volume* volume = (Volume*)_volume->private_volume;
444 	Inode* inode = (Inode*)_node->private_node;
445 
446 	if (volume->IsReadOnly())
447 		return B_READ_ONLY_DEVICE;
448 
449 	if (inode->FileCache() == NULL)
450 		RETURN_ERROR(B_BAD_VALUE);
451 
452 	InodeReadLocker _(inode);
453 
454 	uint32 vecIndex = 0;
455 	size_t vecOffset = 0;
456 	size_t bytesLeft = *_numBytes;
457 	status_t status;
458 
459 	while (true) {
460 		file_io_vec fileVecs[8];
461 		size_t fileVecCount = 8;
462 
463 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
464 			&fileVecCount, 0);
465 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
466 			break;
467 
468 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
469 
470 		size_t bytes = bytesLeft;
471 		status = write_file_io_vec_pages(volume->Device(), fileVecs,
472 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
473 		if (status != B_OK || !bufferOverflow)
474 			break;
475 
476 		pos += bytes;
477 		bytesLeft -= bytes;
478 	}
479 
480 	return status;
481 }
482 
483 
484 static status_t
485 bfs_io(fs_volume* _volume, fs_vnode* _node, void* _cookie, io_request* request)
486 {
487 	Volume* volume = (Volume*)_volume->private_volume;
488 	Inode* inode = (Inode*)_node->private_node;
489 
490 #ifndef BFS_SHELL
491 	if (io_request_is_write(request) && volume->IsReadOnly()) {
492 		notify_io_request(request, B_READ_ONLY_DEVICE);
493 		return B_READ_ONLY_DEVICE;
494 	}
495 #endif
496 
497 	if (inode->FileCache() == NULL) {
498 #ifndef BFS_SHELL
499 		notify_io_request(request, B_BAD_VALUE);
500 #endif
501 		RETURN_ERROR(B_BAD_VALUE);
502 	}
503 
504 	// We lock the node here and will unlock it in the "finished" hook.
505 	rw_lock_read_lock(&inode->Lock());
506 
507 	return do_iterative_fd_io(volume->Device(), request,
508 		iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
509 }
510 
511 
512 static status_t
513 bfs_get_file_map(fs_volume* _volume, fs_vnode* _node, off_t offset, size_t size,
514 	struct file_io_vec* vecs, size_t* _count)
515 {
516 	Volume* volume = (Volume*)_volume->private_volume;
517 	Inode* inode = (Inode*)_node->private_node;
518 
519 	int32 blockShift = volume->BlockShift();
520 	uint32 index = 0, max = *_count;
521 	block_run run;
522 	off_t fileOffset;
523 
524 	//FUNCTION_START(("offset = %Ld, size = %lu\n", offset, size));
525 
526 	while (true) {
527 		status_t status = inode->FindBlockRun(offset, run, fileOffset);
528 		if (status != B_OK)
529 			return status;
530 
531 		vecs[index].offset = volume->ToOffset(run) + offset - fileOffset;
532 		vecs[index].length = ((uint32)run.Length() << blockShift)
533 			- offset + fileOffset;
534 
535 		// are we already done?
536 		if ((uint64)size <= (uint64)vecs[index].length
537 			|| (uint64)offset + (uint64)vecs[index].length
538 				>= (uint64)inode->Size()) {
539 			if ((uint64)offset + (uint64)vecs[index].length
540 					> (uint64)inode->Size()) {
541 				// make sure the extent ends with the last official file
542 				// block (without taking any preallocations into account)
543 				vecs[index].length = round_up(inode->Size() - offset,
544 					volume->BlockSize());
545 			}
546 			*_count = index + 1;
547 			return B_OK;
548 		}
549 
550 		offset += vecs[index].length;
551 		size -= vecs[index].length;
552 		index++;
553 
554 		if (index >= max) {
555 			// we're out of file_io_vecs; let's bail out
556 			*_count = index;
557 			return B_BUFFER_OVERFLOW;
558 		}
559 	}
560 
561 	// can never get here
562 	return B_ERROR;
563 }
564 
565 
566 //	#pragma mark -
567 
568 
569 static status_t
570 bfs_lookup(fs_volume* _volume, fs_vnode* _directory, const char* file,
571 	ino_t* _vnodeID)
572 {
573 	Volume* volume = (Volume*)_volume->private_volume;
574 	Inode* directory = (Inode*)_directory->private_node;
575 
576 	InodeReadLocker locker(directory);
577 
578 	// check access permissions
579 	status_t status = directory->CheckPermissions(X_OK);
580 	if (status != B_OK)
581 		RETURN_ERROR(status);
582 
583 	BPlusTree* tree = directory->Tree();
584 	if (tree == NULL)
585 		RETURN_ERROR(B_BAD_VALUE);
586 
587 	status = tree->Find((uint8*)file, (uint16)strlen(file), _vnodeID);
588 	if (status != B_OK) {
589 		//PRINT(("bfs_walk() could not find %Ld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
590 		return status;
591 	}
592 
593 	entry_cache_add(volume->ID(), directory->ID(), file, *_vnodeID);
594 
595 	locker.Unlock();
596 
597 	Inode* inode;
598 	status = get_vnode(volume->FSVolume(), *_vnodeID, (void**)&inode);
599 	if (status != B_OK) {
600 		REPORT_ERROR(status);
601 		return B_ENTRY_NOT_FOUND;
602 	}
603 
604 	return B_OK;
605 }
606 
607 
608 static status_t
609 bfs_get_vnode_name(fs_volume* _volume, fs_vnode* _node, char* buffer,
610 	size_t bufferSize)
611 {
612 	Inode* inode = (Inode*)_node->private_node;
613 
614 	return inode->GetName(buffer, bufferSize);
615 }
616 
617 
618 static status_t
619 bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
620 	void* buffer, size_t bufferLength)
621 {
622 	FUNCTION_START(("node = %p, cmd = %lu, buf = %p, len = %ld\n", _node, cmd,
623 		buffer, bufferLength));
624 
625 	Volume* volume = (Volume*)_volume->private_volume;
626 
627 	switch (cmd) {
628 #ifndef BFS_SHELL
629 		case B_TRIM_DEVICE:
630 		{
631 			fs_trim_data* trimData;
632 			MemoryDeleter deleter;
633 			status_t status = get_trim_data_from_user(buffer, bufferLength,
634 				deleter, trimData);
635 			if (status != B_OK)
636 				return status;
637 
638 			trimData->trimmed_size = 0;
639 
640 			for (uint32 i = 0; i < trimData->range_count; i++) {
641 				uint64 trimmedSize = 0;
642 				status_t status = volume->Allocator().Trim(
643 					trimData->ranges[i].offset, trimData->ranges[i].size,
644 					trimmedSize);
645 				if (status != B_OK)
646 					return status;
647 
648 				trimData->trimmed_size += trimmedSize;
649 			}
650 
651 			return copy_trim_data_to_user(buffer, trimData);
652 		}
653 #endif
654 
655 		case BFS_IOCTL_VERSION:
656 		{
657 			uint32 version = 0x10000;
658 			return user_memcpy(buffer, &version, sizeof(uint32));
659 		}
660 		case BFS_IOCTL_START_CHECKING:
661 		{
662 			// start checking
663 			BlockAllocator& allocator = volume->Allocator();
664 			check_control control;
665 			if (user_memcpy(&control, buffer, sizeof(check_control)) != B_OK)
666 				return B_BAD_ADDRESS;
667 
668 			status_t status = allocator.StartChecking(&control);
669 			if (status == B_OK) {
670 				file_cookie* cookie = (file_cookie*)_cookie;
671 				cookie->open_mode |= BFS_OPEN_MODE_CHECKING;
672 			}
673 
674 			return status;
675 		}
676 		case BFS_IOCTL_STOP_CHECKING:
677 		{
678 			// stop checking
679 			BlockAllocator& allocator = volume->Allocator();
680 			check_control control;
681 
682 			status_t status = allocator.StopChecking(&control);
683 			if (status == B_OK) {
684 				file_cookie* cookie = (file_cookie*)_cookie;
685 				cookie->open_mode &= ~BFS_OPEN_MODE_CHECKING;
686 			}
687 			if (status == B_OK)
688 				status = user_memcpy(buffer, &control, sizeof(check_control));
689 
690 			return status;
691 		}
692 		case BFS_IOCTL_CHECK_NEXT_NODE:
693 		{
694 			// check next
695 			BlockAllocator& allocator = volume->Allocator();
696 			check_control control;
697 
698 			status_t status = allocator.CheckNextNode(&control);
699 			if (status == B_OK)
700 				status = user_memcpy(buffer, &control, sizeof(check_control));
701 
702 			return status;
703 		}
704 		case BFS_IOCTL_UPDATE_BOOT_BLOCK:
705 		{
706 			// let's makebootable (or anyone else) update the boot block
707 			// while BFS is mounted
708 			update_boot_block update;
709 			if (bufferLength != sizeof(update_boot_block))
710 				return B_BAD_VALUE;
711 			if (user_memcpy(&update, buffer, sizeof(update_boot_block)) != B_OK)
712 				return B_BAD_ADDRESS;
713 			if (update.offset < offsetof(disk_super_block, pad_to_block)
714 				|| update.length + update.offset > 512)
715 				return B_BAD_VALUE;
716 			if (user_memcpy((uint8*)&volume->SuperBlock() + update.offset,
717 					update.data, update.length) != B_OK)
718 				return B_BAD_ADDRESS;
719 
720 			return volume->WriteSuperBlock();
721 		}
722 
723 #ifdef DEBUG_FRAGMENTER
724 		case 56741:
725 		{
726 			BlockAllocator& allocator = volume->Allocator();
727 			allocator.Fragment();
728 			return B_OK;
729 		}
730 #endif
731 
732 #ifdef DEBUG
733 		case 56742:
734 		{
735 			// allocate all free blocks and zero them out
736 			// (a test for the BlockAllocator)!
737 			BlockAllocator& allocator = volume->Allocator();
738 			Transaction transaction(volume, 0);
739 			CachedBlock cached(volume);
740 			block_run run;
741 			while (allocator.AllocateBlocks(transaction, 8, 0, 64, 1, run)
742 					== B_OK) {
743 				PRINT(("write block_run(%ld, %d, %d)\n", run.allocation_group,
744 					run.start, run.length));
745 				for (int32 i = 0;i < run.length;i++) {
746 					uint8* block = cached.SetToWritable(transaction, run);
747 					if (block != NULL)
748 						memset(block, 0, volume->BlockSize());
749 				}
750 			}
751 			return B_OK;
752 		}
753 #endif
754 	}
755 	return B_DEV_INVALID_IOCTL;
756 }
757 
758 
759 /*!	Sets the open-mode flags for the open file cookie - only
760 	supports O_APPEND currently, but that should be sufficient
761 	for a file system.
762 */
763 static status_t
764 bfs_set_flags(fs_volume* _volume, fs_vnode* _node, void* _cookie, int flags)
765 {
766 	FUNCTION_START(("node = %p, flags = %d", _node, flags));
767 
768 	file_cookie* cookie = (file_cookie*)_cookie;
769 	cookie->open_mode = (cookie->open_mode & ~O_APPEND) | (flags & O_APPEND);
770 
771 	return B_OK;
772 }
773 
774 
775 static status_t
776 bfs_fsync(fs_volume* _volume, fs_vnode* _node)
777 {
778 	FUNCTION();
779 
780 	Inode* inode = (Inode*)_node->private_node;
781 	return inode->Sync();
782 }
783 
784 
785 static status_t
786 bfs_read_stat(fs_volume* _volume, fs_vnode* _node, struct stat* stat)
787 {
788 	FUNCTION();
789 
790 	Inode* inode = (Inode*)_node->private_node;
791 	fill_stat_buffer(inode, *stat);
792 	return B_OK;
793 }
794 
795 
796 static status_t
797 bfs_write_stat(fs_volume* _volume, fs_vnode* _node, const struct stat* stat,
798 	uint32 mask)
799 {
800 	FUNCTION();
801 
802 	Volume* volume = (Volume*)_volume->private_volume;
803 	Inode* inode = (Inode*)_node->private_node;
804 
805 	if (volume->IsReadOnly())
806 		return B_READ_ONLY_DEVICE;
807 
808 	// TODO: we should definitely check a bit more if the new stats are
809 	//	valid - or even better, the VFS should check this before calling us
810 
811 	bfs_inode& node = inode->Node();
812 	bool updateTime = false;
813 	uid_t uid = geteuid();
814 
815 	bool isOwnerOrRoot = uid == 0 || uid == (uid_t)node.UserID();
816 	bool hasWriteAccess = inode->CheckPermissions(W_OK) == B_OK;
817 
818 	Transaction transaction(volume, inode->BlockNumber());
819 	inode->WriteLockInTransaction(transaction);
820 
821 	if ((mask & B_STAT_SIZE) != 0 && inode->Size() != stat->st_size) {
822 		// Since B_STAT_SIZE is the only thing that can fail directly, we
823 		// do it first, so that the inode state will still be consistent
824 		// with the on-disk version
825 		if (inode->IsDirectory())
826 			return B_IS_A_DIRECTORY;
827 		if (!inode->IsFile())
828 			return B_BAD_VALUE;
829 		if (!hasWriteAccess)
830 			RETURN_ERROR(B_NOT_ALLOWED);
831 
832 		off_t oldSize = inode->Size();
833 
834 		status_t status = inode->SetFileSize(transaction, stat->st_size);
835 		if (status != B_OK)
836 			return status;
837 
838 		// fill the new blocks (if any) with zeros
839 		if ((mask & B_STAT_SIZE_INSECURE) == 0) {
840 			// We must not keep the inode locked during a write operation,
841 			// or else we might deadlock.
842 			rw_lock_write_unlock(&inode->Lock());
843 			inode->FillGapWithZeros(oldSize, inode->Size());
844 			rw_lock_write_lock(&inode->Lock());
845 		}
846 
847 		if (!inode->IsDeleted()) {
848 			Index index(volume);
849 			index.UpdateSize(transaction, inode);
850 
851 			updateTime = true;
852 		}
853 	}
854 
855 	if ((mask & B_STAT_UID) != 0) {
856 		// only root should be allowed
857 		if (uid != 0)
858 			RETURN_ERROR(B_NOT_ALLOWED);
859 		node.uid = HOST_ENDIAN_TO_BFS_INT32(stat->st_uid);
860 		updateTime = true;
861 	}
862 
863 	if ((mask & B_STAT_GID) != 0) {
864 		// only the user or root can do that
865 		if (!isOwnerOrRoot)
866 			RETURN_ERROR(B_NOT_ALLOWED);
867 		node.gid = HOST_ENDIAN_TO_BFS_INT32(stat->st_gid);
868 		updateTime = true;
869 	}
870 
871 	if ((mask & B_STAT_MODE) != 0) {
872 		// only the user or root can do that
873 		if (!isOwnerOrRoot)
874 			RETURN_ERROR(B_NOT_ALLOWED);
875 		PRINT(("original mode = %ld, stat->st_mode = %d\n", node.Mode(),
876 			stat->st_mode));
877 		node.mode = HOST_ENDIAN_TO_BFS_INT32((node.Mode() & ~S_IUMSK)
878 			| (stat->st_mode & S_IUMSK));
879 		updateTime = true;
880 	}
881 
882 	if ((mask & B_STAT_CREATION_TIME) != 0) {
883 		// the user or root can do that or any user with write access
884 		if (!isOwnerOrRoot && !hasWriteAccess)
885 			RETURN_ERROR(B_NOT_ALLOWED);
886 		node.create_time
887 			= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_crtim));
888 	}
889 
890 	if ((mask & B_STAT_MODIFICATION_TIME) != 0) {
891 		// the user or root can do that or any user with write access
892 		if (!isOwnerOrRoot && !hasWriteAccess)
893 			RETURN_ERROR(B_NOT_ALLOWED);
894 		if (!inode->InLastModifiedIndex()) {
895 			// directory modification times are not part of the index
896 			node.last_modified_time
897 				= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_mtim));
898 		} else if (!inode->IsDeleted()) {
899 			// Index::UpdateLastModified() will set the new time in the inode
900 			Index index(volume);
901 			index.UpdateLastModified(transaction, inode,
902 				bfs_inode::ToInode(stat->st_mtim));
903 		}
904 	}
905 
906 	if ((mask & B_STAT_CHANGE_TIME) != 0 || updateTime) {
907 		// the user or root can do that or any user with write access
908 		if (!isOwnerOrRoot && !hasWriteAccess)
909 			RETURN_ERROR(B_NOT_ALLOWED);
910 		bigtime_t newTime;
911 		if ((mask & B_STAT_CHANGE_TIME) == 0)
912 			newTime = bfs_inode::ToInode(real_time_clock_usecs());
913 		else
914 			newTime = bfs_inode::ToInode(stat->st_ctim);
915 
916 		node.status_change_time = HOST_ENDIAN_TO_BFS_INT64(newTime);
917 	}
918 
919 	status_t status = inode->WriteBack(transaction);
920 	if (status == B_OK)
921 		status = transaction.Done();
922 	if (status == B_OK)
923 		notify_stat_changed(volume->ID(), inode->ID(), mask);
924 
925 	return status;
926 }
927 
928 
929 status_t
930 bfs_create(fs_volume* _volume, fs_vnode* _directory, const char* name,
931 	int openMode, int mode, void** _cookie, ino_t* _vnodeID)
932 {
933 	FUNCTION_START(("name = \"%s\", perms = %d, openMode = %d\n", name, mode,
934 		openMode));
935 
936 	Volume* volume = (Volume*)_volume->private_volume;
937 	Inode* directory = (Inode*)_directory->private_node;
938 
939 	if (volume->IsReadOnly())
940 		return B_READ_ONLY_DEVICE;
941 
942 	if (!directory->IsDirectory())
943 		RETURN_ERROR(B_BAD_TYPE);
944 
945 	// We are creating the cookie at this point, so that we don't have
946 	// to remove the inode if we don't have enough free memory later...
947 	file_cookie* cookie = new(std::nothrow) file_cookie;
948 	if (cookie == NULL)
949 		RETURN_ERROR(B_NO_MEMORY);
950 
951 	// initialize the cookie
952 	cookie->open_mode = openMode;
953 	cookie->last_size = 0;
954 	cookie->last_notification = system_time();
955 
956 	Transaction transaction(volume, directory->BlockNumber());
957 
958 	Inode* inode;
959 	bool created;
960 	status_t status = Inode::Create(transaction, directory, name,
961 		S_FILE | (mode & S_IUMSK), openMode, 0, &created, _vnodeID, &inode);
962 
963 	// Disable the file cache, if requested?
964 	if (status == B_OK && (openMode & O_NOCACHE) != 0
965 		&& inode->FileCache() != NULL) {
966 		status = file_cache_disable(inode->FileCache());
967 	}
968 
969 	entry_cache_add(volume->ID(), directory->ID(), name, *_vnodeID);
970 
971 	if (status == B_OK)
972 		status = transaction.Done();
973 
974 	if (status == B_OK) {
975 		// register the cookie
976 		*_cookie = cookie;
977 
978 		if (created) {
979 			notify_entry_created(volume->ID(), directory->ID(), name,
980 				*_vnodeID);
981 		}
982 	} else {
983 		entry_cache_remove(volume->ID(), directory->ID(), name);
984 		delete cookie;
985 	}
986 
987 	return status;
988 }
989 
990 
991 static status_t
992 bfs_create_symlink(fs_volume* _volume, fs_vnode* _directory, const char* name,
993 	const char* path, int mode)
994 {
995 	FUNCTION_START(("name = \"%s\", path = \"%s\"\n", name, path));
996 
997 	Volume* volume = (Volume*)_volume->private_volume;
998 	Inode* directory = (Inode*)_directory->private_node;
999 
1000 	if (volume->IsReadOnly())
1001 		return B_READ_ONLY_DEVICE;
1002 
1003 	if (!directory->IsDirectory())
1004 		RETURN_ERROR(B_BAD_TYPE);
1005 
1006 	status_t status = directory->CheckPermissions(W_OK);
1007 	if (status < B_OK)
1008 		RETURN_ERROR(status);
1009 
1010 	Transaction transaction(volume, directory->BlockNumber());
1011 
1012 	Inode* link;
1013 	off_t id;
1014 	status = Inode::Create(transaction, directory, name, S_SYMLINK | 0777,
1015 		0, 0, NULL, &id, &link);
1016 	if (status < B_OK)
1017 		RETURN_ERROR(status);
1018 
1019 	size_t length = strlen(path);
1020 	if (length < SHORT_SYMLINK_NAME_LENGTH) {
1021 		strcpy(link->Node().short_symlink, path);
1022 	} else {
1023 		link->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_LONG_SYMLINK
1024 			| INODE_LOGGED);
1025 
1026 		// links usually don't have a file cache attached - but we now need one
1027 		link->SetFileCache(file_cache_create(volume->ID(), link->ID(), 0));
1028 		link->SetMap(file_map_create(volume->ID(), link->ID(), 0));
1029 
1030 		// The following call will have to write the inode back, so
1031 		// we don't have to do that here...
1032 		status = link->WriteAt(transaction, 0, (const uint8*)path, &length);
1033 	}
1034 
1035 	if (status == B_OK)
1036 		status = link->WriteBack(transaction);
1037 
1038 	// Inode::Create() left the inode locked in memory, and also doesn't
1039 	// publish links
1040 	publish_vnode(volume->FSVolume(), id, link, &gBFSVnodeOps, link->Mode(), 0);
1041 	put_vnode(volume->FSVolume(), id);
1042 
1043 	if (status == B_OK) {
1044 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1045 
1046 		status = transaction.Done();
1047 		if (status == B_OK)
1048 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1049 		else
1050 			entry_cache_remove(volume->ID(), directory->ID(), name);
1051 	}
1052 
1053 	return status;
1054 }
1055 
1056 
1057 status_t
1058 bfs_link(fs_volume* _volume, fs_vnode* dir, const char* name, fs_vnode* node)
1059 {
1060 	FUNCTION_START(("name = \"%s\"\n", name));
1061 
1062 	// This one won't be implemented in a binary compatible BFS
1063 	return B_UNSUPPORTED;
1064 }
1065 
1066 
1067 status_t
1068 bfs_unlink(fs_volume* _volume, fs_vnode* _directory, const char* name)
1069 {
1070 	FUNCTION_START(("name = \"%s\"\n", name));
1071 
1072 	if (!strcmp(name, "..") || !strcmp(name, "."))
1073 		return B_NOT_ALLOWED;
1074 
1075 	Volume* volume = (Volume*)_volume->private_volume;
1076 	Inode* directory = (Inode*)_directory->private_node;
1077 
1078 	status_t status = directory->CheckPermissions(W_OK);
1079 	if (status < B_OK)
1080 		return status;
1081 
1082 	Transaction transaction(volume, directory->BlockNumber());
1083 
1084 	off_t id;
1085 	status = directory->Remove(transaction, name, &id);
1086 	if (status == B_OK) {
1087 		entry_cache_remove(volume->ID(), directory->ID(), name);
1088 
1089 		status = transaction.Done();
1090 		if (status == B_OK)
1091 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1092 		else
1093 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1094 	}
1095 	return status;
1096 }
1097 
1098 
1099 status_t
1100 bfs_rename(fs_volume* _volume, fs_vnode* _oldDir, const char* oldName,
1101 	fs_vnode* _newDir, const char* newName)
1102 {
1103 	FUNCTION_START(("oldDir = %p, oldName = \"%s\", newDir = %p, newName = "
1104 		"\"%s\"\n", _oldDir, oldName, _newDir, newName));
1105 
1106 	Volume* volume = (Volume*)_volume->private_volume;
1107 	Inode* oldDirectory = (Inode*)_oldDir->private_node;
1108 	Inode* newDirectory = (Inode*)_newDir->private_node;
1109 
1110 	// are we already done?
1111 	if (oldDirectory == newDirectory && !strcmp(oldName, newName))
1112 		return B_OK;
1113 
1114 	Transaction transaction(volume, oldDirectory->BlockNumber());
1115 
1116 	oldDirectory->WriteLockInTransaction(transaction);
1117 	if (oldDirectory != newDirectory)
1118 		newDirectory->WriteLockInTransaction(transaction);
1119 
1120 	// are we allowed to do what we've been told?
1121 	status_t status = oldDirectory->CheckPermissions(W_OK);
1122 	if (status == B_OK)
1123 		status = newDirectory->CheckPermissions(W_OK);
1124 	if (status != B_OK)
1125 		return status;
1126 
1127 	// Get the directory's tree, and a pointer to the inode which should be
1128 	// changed
1129 	BPlusTree* tree = oldDirectory->Tree();
1130 	if (tree == NULL)
1131 		RETURN_ERROR(B_BAD_VALUE);
1132 
1133 	off_t id;
1134 	status = tree->Find((const uint8*)oldName, strlen(oldName), &id);
1135 	if (status != B_OK)
1136 		RETURN_ERROR(status);
1137 
1138 	Vnode vnode(volume, id);
1139 	Inode* inode;
1140 	if (vnode.Get(&inode) != B_OK)
1141 		return B_IO_ERROR;
1142 
1143 	// Don't move a directory into one of its children - we soar up
1144 	// from the newDirectory to either the root node or the old
1145 	// directory, whichever comes first.
1146 	// If we meet our inode on that way, we have to bail out.
1147 
1148 	if (oldDirectory != newDirectory) {
1149 		ino_t parent = newDirectory->ID();
1150 		ino_t root = volume->RootNode()->ID();
1151 
1152 		while (true) {
1153 			if (parent == id)
1154 				return B_BAD_VALUE;
1155 			else if (parent == root || parent == oldDirectory->ID())
1156 				break;
1157 
1158 			Vnode vnode(volume, parent);
1159 			Inode* parentNode;
1160 			if (vnode.Get(&parentNode) != B_OK)
1161 				return B_ERROR;
1162 
1163 			parent = volume->ToVnode(parentNode->Parent());
1164 		}
1165 	}
1166 
1167 	// Everything okay? Then lets get to work...
1168 
1169 	// First, try to make sure there is nothing that will stop us in
1170 	// the target directory - since this is the only non-critical
1171 	// failure, we will test this case first
1172 	BPlusTree* newTree = tree;
1173 	if (newDirectory != oldDirectory) {
1174 		newTree = newDirectory->Tree();
1175 		if (newTree == NULL)
1176 			RETURN_ERROR(B_BAD_VALUE);
1177 	}
1178 
1179 	status = newTree->Insert(transaction, (const uint8*)newName,
1180 		strlen(newName), id);
1181 	if (status == B_NAME_IN_USE) {
1182 		// If there is already a file with that name, we have to remove
1183 		// it, as long it's not a directory with files in it
1184 		off_t clobber;
1185 		if (newTree->Find((const uint8*)newName, strlen(newName), &clobber)
1186 				< B_OK)
1187 			return B_NAME_IN_USE;
1188 		if (clobber == id)
1189 			return B_BAD_VALUE;
1190 
1191 		Vnode vnode(volume, clobber);
1192 		Inode* other;
1193 		if (vnode.Get(&other) < B_OK)
1194 			return B_NAME_IN_USE;
1195 
1196 		// only allowed, if either both nodes are directories or neither is
1197 		if (inode->IsDirectory() != other->IsDirectory())
1198 			return other->IsDirectory() ? B_IS_A_DIRECTORY : B_NOT_A_DIRECTORY;
1199 
1200 		status = newDirectory->Remove(transaction, newName, NULL,
1201 			other->IsDirectory());
1202 		if (status < B_OK)
1203 			return status;
1204 
1205 		entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1206 
1207 		notify_entry_removed(volume->ID(), newDirectory->ID(), newName,
1208 			clobber);
1209 
1210 		status = newTree->Insert(transaction, (const uint8*)newName,
1211 			strlen(newName), id);
1212 	}
1213 	if (status != B_OK)
1214 		return status;
1215 
1216 	inode->WriteLockInTransaction(transaction);
1217 
1218 	volume->UpdateLiveQueriesRenameMove(inode, oldDirectory->ID(), oldName,
1219 		newDirectory->ID(), newName);
1220 
1221 	// update the name only when they differ
1222 	if (strcmp(oldName, newName)) {
1223 		status = inode->SetName(transaction, newName);
1224 		if (status == B_OK) {
1225 			Index index(volume);
1226 			index.UpdateName(transaction, oldName, newName, inode);
1227 		}
1228 	}
1229 
1230 	if (status == B_OK) {
1231 		status = tree->Remove(transaction, (const uint8*)oldName,
1232 			strlen(oldName), id);
1233 		if (status == B_OK) {
1234 			inode->Parent() = newDirectory->BlockRun();
1235 
1236 			// if it's a directory, update the parent directory pointer
1237 			// in its tree if necessary
1238 			BPlusTree* movedTree = inode->Tree();
1239 			if (oldDirectory != newDirectory
1240 				&& inode->IsDirectory()
1241 				&& movedTree != NULL) {
1242 				status = movedTree->Replace(transaction, (const uint8*)"..",
1243 					2, newDirectory->ID());
1244 
1245 				if (status == B_OK) {
1246 					// update/add the cache entry for the parent
1247 					entry_cache_add(volume->ID(), id, "..", newDirectory->ID());
1248 				}
1249 			}
1250 
1251 			if (status == B_OK && newDirectory != oldDirectory)
1252 				status = oldDirectory->ContainerContentsChanged(transaction);
1253 			if (status == B_OK)
1254 				status = newDirectory->ContainerContentsChanged(transaction);
1255 
1256 			if (status == B_OK)
1257 				status = inode->WriteBack(transaction);
1258 
1259 			if (status == B_OK) {
1260 				entry_cache_remove(volume->ID(), oldDirectory->ID(), oldName);
1261 				entry_cache_add(volume->ID(), newDirectory->ID(), newName, id);
1262 
1263 				status = transaction.Done();
1264 				if (status == B_OK) {
1265 					notify_entry_moved(volume->ID(), oldDirectory->ID(),
1266 						oldName, newDirectory->ID(), newName, id);
1267 					return B_OK;
1268 				}
1269 
1270 				entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1271 				entry_cache_add(volume->ID(), oldDirectory->ID(), oldName, id);
1272 			}
1273 		}
1274 	}
1275 
1276 	return status;
1277 }
1278 
1279 
1280 static status_t
1281 bfs_open(fs_volume* _volume, fs_vnode* _node, int openMode, void** _cookie)
1282 {
1283 	FUNCTION();
1284 
1285 	Volume* volume = (Volume*)_volume->private_volume;
1286 	Inode* inode = (Inode*)_node->private_node;
1287 
1288 	// Opening a directory read-only is allowed, although you can't read
1289 	// any data from it.
1290 	if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
1291 		return B_IS_A_DIRECTORY;
1292 	if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
1293 		return B_NOT_A_DIRECTORY;
1294 
1295 	status_t status = inode->CheckPermissions(open_mode_to_access(openMode)
1296 		| ((openMode & O_TRUNC) != 0 ? W_OK : 0));
1297 	if (status != B_OK)
1298 		RETURN_ERROR(status);
1299 
1300 	file_cookie* cookie = new(std::nothrow) file_cookie;
1301 	if (cookie == NULL)
1302 		RETURN_ERROR(B_NO_MEMORY);
1303 	ObjectDeleter<file_cookie> cookieDeleter(cookie);
1304 
1305 	// initialize the cookie
1306 	cookie->open_mode = openMode & BFS_OPEN_MODE_USER_MASK;
1307 	cookie->last_size = inode->Size();
1308 	cookie->last_notification = system_time();
1309 
1310 	// Disable the file cache, if requested?
1311 	CObjectDeleter<void> fileCacheEnabler(file_cache_enable);
1312 	if ((openMode & O_NOCACHE) != 0 && inode->FileCache() != NULL) {
1313 		status = file_cache_disable(inode->FileCache());
1314 		if (status != B_OK)
1315 			return status;
1316 		fileCacheEnabler.SetTo(inode->FileCache());
1317 	}
1318 
1319 	// Should we truncate the file?
1320 	if ((openMode & O_TRUNC) != 0) {
1321 		if ((openMode & O_RWMASK) == O_RDONLY)
1322 			return B_NOT_ALLOWED;
1323 
1324 		Transaction transaction(volume, inode->BlockNumber());
1325 		inode->WriteLockInTransaction(transaction);
1326 
1327 		status_t status = inode->SetFileSize(transaction, 0);
1328 		if (status == B_OK)
1329 			status = inode->WriteBack(transaction);
1330 		if (status == B_OK)
1331 			status = transaction.Done();
1332 		if (status != B_OK)
1333 			return status;
1334 	}
1335 
1336 	fileCacheEnabler.Detach();
1337 	cookieDeleter.Detach();
1338 	*_cookie = cookie;
1339 	return B_OK;
1340 }
1341 
1342 
1343 static status_t
1344 bfs_read(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1345 	void* buffer, size_t* _length)
1346 {
1347 	//FUNCTION();
1348 	Inode* inode = (Inode*)_node->private_node;
1349 
1350 	if (!inode->HasUserAccessableStream()) {
1351 		*_length = 0;
1352 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1353 	}
1354 
1355 	return inode->ReadAt(pos, (uint8*)buffer, _length);
1356 }
1357 
1358 
1359 static status_t
1360 bfs_write(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1361 	const void* buffer, size_t* _length)
1362 {
1363 	//FUNCTION();
1364 	Volume* volume = (Volume*)_volume->private_volume;
1365 	Inode* inode = (Inode*)_node->private_node;
1366 
1367 	if (volume->IsReadOnly())
1368 		return B_READ_ONLY_DEVICE;
1369 
1370 	if (!inode->HasUserAccessableStream()) {
1371 		*_length = 0;
1372 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1373 	}
1374 
1375 	file_cookie* cookie = (file_cookie*)_cookie;
1376 
1377 	if (cookie->open_mode & O_APPEND)
1378 		pos = inode->Size();
1379 
1380 	Transaction transaction;
1381 		// We are not starting the transaction here, since
1382 		// it might not be needed at all (the contents of
1383 		// regular files aren't logged)
1384 
1385 	status_t status = inode->WriteAt(transaction, pos, (const uint8*)buffer,
1386 		_length);
1387 	if (status == B_OK)
1388 		status = transaction.Done();
1389 	if (status == B_OK) {
1390 		InodeReadLocker locker(inode);
1391 
1392 		// periodically notify if the file size has changed
1393 		// TODO: should we better test for a change in the last_modified time only?
1394 		if (!inode->IsDeleted() && cookie->last_size != inode->Size()
1395 			&& system_time() > cookie->last_notification
1396 					+ INODE_NOTIFICATION_INTERVAL) {
1397 			notify_stat_changed(volume->ID(), inode->ID(),
1398 				B_STAT_MODIFICATION_TIME | B_STAT_SIZE | B_STAT_INTERIM_UPDATE);
1399 			cookie->last_size = inode->Size();
1400 			cookie->last_notification = system_time();
1401 		}
1402 	}
1403 
1404 	return status;
1405 }
1406 
1407 
1408 static status_t
1409 bfs_close(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1410 {
1411 	FUNCTION();
1412 	return B_OK;
1413 }
1414 
1415 
1416 static status_t
1417 bfs_free_cookie(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1418 {
1419 	FUNCTION();
1420 
1421 	file_cookie* cookie = (file_cookie*)_cookie;
1422 	Volume* volume = (Volume*)_volume->private_volume;
1423 	Inode* inode = (Inode*)_node->private_node;
1424 
1425 	Transaction transaction;
1426 	bool needsTrimming = false;
1427 
1428 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()) {
1429 		InodeReadLocker locker(inode);
1430 		needsTrimming = inode->NeedsTrimming();
1431 
1432 		if ((cookie->open_mode & O_RWMASK) != 0
1433 			&& !inode->IsDeleted()
1434 			&& (needsTrimming
1435 				|| inode->OldLastModified() != inode->LastModified()
1436 				|| (inode->InSizeIndex()
1437 					// TODO: this can prevent the size update notification
1438 					// for nodes not in the index!
1439 					&& inode->OldSize() != inode->Size()))) {
1440 			locker.Unlock();
1441 			transaction.Start(volume, inode->BlockNumber());
1442 		}
1443 	}
1444 
1445 	status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
1446 
1447 	if (status == B_OK) {
1448 		inode->WriteLockInTransaction(transaction);
1449 
1450 		// trim the preallocated blocks and update the size,
1451 		// and last_modified indices if needed
1452 		bool changedSize = false, changedTime = false;
1453 		Index index(volume);
1454 
1455 		if (needsTrimming) {
1456 			status = inode->TrimPreallocation(transaction);
1457 			if (status < B_OK) {
1458 				FATAL(("Could not trim preallocated blocks: inode %" B_PRIdINO
1459 					", transaction %d: %s!\n", inode->ID(),
1460 					(int)transaction.ID(), strerror(status)));
1461 
1462 				// we still want this transaction to succeed
1463 				status = B_OK;
1464 			}
1465 		}
1466 		if (inode->OldSize() != inode->Size()) {
1467 			if (inode->InSizeIndex())
1468 				index.UpdateSize(transaction, inode);
1469 			changedSize = true;
1470 		}
1471 		if (inode->OldLastModified() != inode->LastModified()) {
1472 			if (inode->InLastModifiedIndex()) {
1473 				index.UpdateLastModified(transaction, inode,
1474 					inode->LastModified());
1475 			}
1476 			changedTime = true;
1477 
1478 			// updating the index doesn't write back the inode
1479 			inode->WriteBack(transaction);
1480 		}
1481 
1482 		if (changedSize || changedTime) {
1483 			notify_stat_changed(volume->ID(), inode->ID(),
1484 				(changedTime ? B_STAT_MODIFICATION_TIME : 0)
1485 				| (changedSize ? B_STAT_SIZE : 0));
1486 		}
1487 	}
1488 	if (status == B_OK)
1489 		transaction.Done();
1490 
1491 	if ((cookie->open_mode & BFS_OPEN_MODE_CHECKING) != 0) {
1492 		// "chkbfs" exited abnormally, so we have to stop it here...
1493 		FATAL(("check process was aborted!\n"));
1494 		volume->Allocator().StopChecking(NULL);
1495 	}
1496 
1497 	if ((cookie->open_mode & O_NOCACHE) != 0 && inode->FileCache() != NULL)
1498 		file_cache_enable(inode->FileCache());
1499 
1500 	delete cookie;
1501 	return B_OK;
1502 }
1503 
1504 
1505 /*!	Checks access permissions, return B_NOT_ALLOWED if the action
1506 	is not allowed.
1507 */
1508 static status_t
1509 bfs_access(fs_volume* _volume, fs_vnode* _node, int accessMode)
1510 {
1511 	//FUNCTION();
1512 
1513 	Inode* inode = (Inode*)_node->private_node;
1514 	status_t status = inode->CheckPermissions(accessMode);
1515 	if (status < B_OK)
1516 		RETURN_ERROR(status);
1517 
1518 	return B_OK;
1519 }
1520 
1521 
1522 static status_t
1523 bfs_read_link(fs_volume* _volume, fs_vnode* _node, char* buffer,
1524 	size_t* _bufferSize)
1525 {
1526 	FUNCTION();
1527 
1528 	Inode* inode = (Inode*)_node->private_node;
1529 
1530 	if (!inode->IsSymLink())
1531 		RETURN_ERROR(B_BAD_VALUE);
1532 
1533 	if ((inode->Flags() & INODE_LONG_SYMLINK) != 0) {
1534 		if ((uint64)inode->Size() < (uint64)*_bufferSize)
1535 			*_bufferSize = inode->Size();
1536 
1537 		status_t status = inode->ReadAt(0, (uint8*)buffer, _bufferSize);
1538 		if (status < B_OK)
1539 			RETURN_ERROR(status);
1540 
1541 		return B_OK;
1542 	}
1543 
1544 	size_t linkLen = strlen(inode->Node().short_symlink);
1545 	if (linkLen < *_bufferSize)
1546 		*_bufferSize = linkLen;
1547 
1548 	return user_memcpy(buffer, inode->Node().short_symlink, *_bufferSize);
1549 }
1550 
1551 
1552 //	#pragma mark - Directory functions
1553 
1554 
1555 static status_t
1556 bfs_create_dir(fs_volume* _volume, fs_vnode* _directory, const char* name,
1557 	int mode)
1558 {
1559 	FUNCTION_START(("name = \"%s\", perms = %d\n", name, mode));
1560 
1561 	Volume* volume = (Volume*)_volume->private_volume;
1562 	Inode* directory = (Inode*)_directory->private_node;
1563 
1564 	if (volume->IsReadOnly())
1565 		return B_READ_ONLY_DEVICE;
1566 
1567 	if (!directory->IsDirectory())
1568 		RETURN_ERROR(B_BAD_TYPE);
1569 
1570 	status_t status = directory->CheckPermissions(W_OK);
1571 	if (status < B_OK)
1572 		RETURN_ERROR(status);
1573 
1574 	Transaction transaction(volume, directory->BlockNumber());
1575 
1576 	// Inode::Create() locks the inode if we pass the "id" parameter, but we
1577 	// need it anyway
1578 	off_t id;
1579 	status = Inode::Create(transaction, directory, name,
1580 		S_DIRECTORY | (mode & S_IUMSK), 0, 0, NULL, &id);
1581 	if (status == B_OK) {
1582 		put_vnode(volume->FSVolume(), id);
1583 
1584 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1585 
1586 		status = transaction.Done();
1587 		if (status == B_OK)
1588 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1589 		else
1590 			entry_cache_remove(volume->ID(), directory->ID(), name);
1591 	}
1592 
1593 	return status;
1594 }
1595 
1596 
1597 static status_t
1598 bfs_remove_dir(fs_volume* _volume, fs_vnode* _directory, const char* name)
1599 {
1600 	FUNCTION_START(("name = \"%s\"\n", name));
1601 
1602 	Volume* volume = (Volume*)_volume->private_volume;
1603 	Inode* directory = (Inode*)_directory->private_node;
1604 
1605 	Transaction transaction(volume, directory->BlockNumber());
1606 
1607 	off_t id;
1608 	status_t status = directory->Remove(transaction, name, &id, true);
1609 	if (status == B_OK) {
1610 		// Remove the cache entry for the directory and potentially also
1611 		// the parent entry still belonging to the directory
1612 		entry_cache_remove(volume->ID(), directory->ID(), name);
1613 		entry_cache_remove(volume->ID(), id, "..");
1614 
1615 		status = transaction.Done();
1616 		if (status == B_OK)
1617 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1618 		else {
1619 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1620 			entry_cache_add(volume->ID(), id, "..", id);
1621 		}
1622 	}
1623 
1624 	return status;
1625 }
1626 
1627 
1628 /*!	Opens a directory ready to be traversed.
1629 	bfs_open_dir() is also used by bfs_open_index_dir().
1630 */
1631 static status_t
1632 bfs_open_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1633 {
1634 	FUNCTION();
1635 
1636 	Inode* inode = (Inode*)_node->private_node;
1637 	status_t status = inode->CheckPermissions(R_OK);
1638 	if (status < B_OK)
1639 		RETURN_ERROR(status);
1640 
1641 	// we don't ask here for directories only, because the bfs_open_index_dir()
1642 	// function utilizes us (so we must be able to open indices as well)
1643 	if (!inode->IsContainer())
1644 		RETURN_ERROR(B_NOT_A_DIRECTORY);
1645 
1646 	BPlusTree* tree = inode->Tree();
1647 	if (tree == NULL)
1648 		RETURN_ERROR(B_BAD_VALUE);
1649 
1650 	TreeIterator* iterator = new(std::nothrow) TreeIterator(tree);
1651 	if (iterator == NULL)
1652 		RETURN_ERROR(B_NO_MEMORY);
1653 
1654 	*_cookie = iterator;
1655 	return B_OK;
1656 }
1657 
1658 
1659 static status_t
1660 bfs_read_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie,
1661 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1662 {
1663 	FUNCTION();
1664 
1665 	TreeIterator* iterator = (TreeIterator*)_cookie;
1666 	Volume* volume = (Volume*)_volume->private_volume;
1667 
1668 	uint32 maxCount = *_num;
1669 	uint32 count = 0;
1670 
1671 	while (count < maxCount && bufferSize > sizeof(struct dirent)) {
1672 		ino_t id;
1673 		uint16 length;
1674 		size_t nameBufferSize = bufferSize - sizeof(struct dirent) + 1;
1675 
1676 		status_t status = iterator->GetNextEntry(dirent->d_name, &length,
1677 			nameBufferSize, &id);
1678 
1679 		if (status == B_ENTRY_NOT_FOUND)
1680 			break;
1681 
1682 		if (status == B_BUFFER_OVERFLOW) {
1683 			// the remaining name buffer length was too small
1684 			if (count == 0)
1685 				RETURN_ERROR(B_BUFFER_OVERFLOW);
1686 			break;
1687 		}
1688 
1689 		if (status != B_OK)
1690 			RETURN_ERROR(status);
1691 
1692 		ASSERT(length < nameBufferSize);
1693 
1694 		dirent->d_dev = volume->ID();
1695 		dirent->d_ino = id;
1696 		dirent->d_reclen = sizeof(struct dirent) + length;
1697 
1698 		bufferSize -= dirent->d_reclen;
1699 		dirent = (struct dirent*)((uint8*)dirent + dirent->d_reclen);
1700 		count++;
1701 	}
1702 
1703 	*_num = count;
1704 	return B_OK;
1705 }
1706 
1707 
1708 /*!	Sets the TreeIterator back to the beginning of the directory. */
1709 static status_t
1710 bfs_rewind_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* _cookie)
1711 {
1712 	FUNCTION();
1713 	TreeIterator* iterator = (TreeIterator*)_cookie;
1714 
1715 	return iterator->Rewind();
1716 }
1717 
1718 
1719 static status_t
1720 bfs_close_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* /*_cookie*/)
1721 {
1722 	FUNCTION();
1723 	return B_OK;
1724 }
1725 
1726 
1727 static status_t
1728 bfs_free_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1729 {
1730 	delete (TreeIterator*)_cookie;
1731 	return B_OK;
1732 }
1733 
1734 
1735 //	#pragma mark - Attribute functions
1736 
1737 
1738 static status_t
1739 bfs_open_attr_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1740 {
1741 	Inode* inode = (Inode*)_node->private_node;
1742 
1743 	FUNCTION();
1744 
1745 	AttributeIterator* iterator = new(std::nothrow) AttributeIterator(inode);
1746 	if (iterator == NULL)
1747 		RETURN_ERROR(B_NO_MEMORY);
1748 
1749 	*_cookie = iterator;
1750 	return B_OK;
1751 }
1752 
1753 
1754 static status_t
1755 bfs_close_attr_dir(fs_volume* _volume, fs_vnode* node, void* cookie)
1756 {
1757 	FUNCTION();
1758 	return B_OK;
1759 }
1760 
1761 
1762 static status_t
1763 bfs_free_attr_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1764 {
1765 	FUNCTION();
1766 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1767 
1768 	delete iterator;
1769 	return B_OK;
1770 }
1771 
1772 
1773 static status_t
1774 bfs_rewind_attr_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1775 {
1776 	FUNCTION();
1777 
1778 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1779 	RETURN_ERROR(iterator->Rewind());
1780 }
1781 
1782 
1783 static status_t
1784 bfs_read_attr_dir(fs_volume* _volume, fs_vnode* node, void* _cookie,
1785 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1786 {
1787 	FUNCTION();
1788 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1789 
1790 	uint32 type;
1791 	size_t length;
1792 	status_t status = iterator->GetNext(dirent->d_name, &length, &type,
1793 		&dirent->d_ino);
1794 	if (status == B_ENTRY_NOT_FOUND) {
1795 		*_num = 0;
1796 		return B_OK;
1797 	} else if (status != B_OK) {
1798 		RETURN_ERROR(status);
1799 	}
1800 
1801 	Volume* volume = (Volume*)_volume->private_volume;
1802 
1803 	dirent->d_dev = volume->ID();
1804 	dirent->d_reclen = sizeof(struct dirent) + length;
1805 
1806 	*_num = 1;
1807 	return B_OK;
1808 }
1809 
1810 
1811 static status_t
1812 bfs_create_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1813 	uint32 type, int openMode, void** _cookie)
1814 {
1815 	FUNCTION();
1816 
1817 	Volume* volume = (Volume*)_volume->private_volume;
1818 	if (volume->IsReadOnly())
1819 		return B_READ_ONLY_DEVICE;
1820 
1821 	Inode* inode = (Inode*)_node->private_node;
1822 	Attribute attribute(inode);
1823 
1824 	return attribute.Create(name, type, openMode, (attr_cookie**)_cookie);
1825 }
1826 
1827 
1828 static status_t
1829 bfs_open_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1830 	int openMode, void** _cookie)
1831 {
1832 	FUNCTION();
1833 
1834 	Inode* inode = (Inode*)_node->private_node;
1835 	Attribute attribute(inode);
1836 
1837 	return attribute.Open(name, openMode, (attr_cookie**)_cookie);
1838 }
1839 
1840 
1841 static status_t
1842 bfs_close_attr(fs_volume* _volume, fs_vnode* _file, void* cookie)
1843 {
1844 	return B_OK;
1845 }
1846 
1847 
1848 static status_t
1849 bfs_free_attr_cookie(fs_volume* _volume, fs_vnode* _file, void* cookie)
1850 {
1851 	delete (attr_cookie*)cookie;
1852 	return B_OK;
1853 }
1854 
1855 
1856 static status_t
1857 bfs_read_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie, off_t pos,
1858 	void* buffer, size_t* _length)
1859 {
1860 	FUNCTION();
1861 
1862 	attr_cookie* cookie = (attr_cookie*)_cookie;
1863 	Inode* inode = (Inode*)_file->private_node;
1864 
1865 	Attribute attribute(inode, cookie);
1866 
1867 	return attribute.Read(cookie, pos, (uint8*)buffer, _length);
1868 }
1869 
1870 
1871 static status_t
1872 bfs_write_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1873 	off_t pos, const void* buffer, size_t* _length)
1874 {
1875 	FUNCTION();
1876 
1877 	attr_cookie* cookie = (attr_cookie*)_cookie;
1878 	Volume* volume = (Volume*)_volume->private_volume;
1879 	Inode* inode = (Inode*)_file->private_node;
1880 
1881 	Transaction transaction(volume, inode->BlockNumber());
1882 	Attribute attribute(inode, cookie);
1883 
1884 	bool created;
1885 	status_t status = attribute.Write(transaction, cookie, pos,
1886 		(const uint8*)buffer, _length, &created);
1887 	if (status == B_OK) {
1888 		status = transaction.Done();
1889 		if (status == B_OK) {
1890 			notify_attribute_changed(volume->ID(), inode->ID(), cookie->name,
1891 				created ? B_ATTR_CREATED : B_ATTR_CHANGED);
1892 			notify_stat_changed(volume->ID(), inode->ID(), B_STAT_CHANGE_TIME);
1893 		}
1894 	}
1895 
1896 	return status;
1897 }
1898 
1899 
1900 static status_t
1901 bfs_read_attr_stat(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1902 	struct stat* stat)
1903 {
1904 	FUNCTION();
1905 
1906 	attr_cookie* cookie = (attr_cookie*)_cookie;
1907 	Inode* inode = (Inode*)_file->private_node;
1908 
1909 	Attribute attribute(inode, cookie);
1910 
1911 	return attribute.Stat(*stat);
1912 }
1913 
1914 
1915 static status_t
1916 bfs_write_attr_stat(fs_volume* _volume, fs_vnode* file, void* cookie,
1917 	const struct stat* stat, int statMask)
1918 {
1919 	// TODO: Implement (at least setting the size)!
1920 	return EOPNOTSUPP;
1921 }
1922 
1923 
1924 static status_t
1925 bfs_rename_attr(fs_volume* _volume, fs_vnode* fromFile, const char* fromName,
1926 	fs_vnode* toFile, const char* toName)
1927 {
1928 	FUNCTION_START(("name = \"%s\", to = \"%s\"\n", fromName, toName));
1929 
1930 	// TODO: implement bfs_rename_attr()!
1931 	// There will probably be an API to move one attribute to another file,
1932 	// making that function much more complicated - oh joy ;-)
1933 
1934 	return EOPNOTSUPP;
1935 }
1936 
1937 
1938 static status_t
1939 bfs_remove_attr(fs_volume* _volume, fs_vnode* _node, const char* name)
1940 {
1941 	FUNCTION_START(("name = \"%s\"\n", name));
1942 
1943 	Volume* volume = (Volume*)_volume->private_volume;
1944 	Inode* inode = (Inode*)_node->private_node;
1945 
1946 	status_t status = inode->CheckPermissions(W_OK);
1947 	if (status != B_OK)
1948 		return status;
1949 
1950 	Transaction transaction(volume, inode->BlockNumber());
1951 
1952 	status = inode->RemoveAttribute(transaction, name);
1953 	if (status == B_OK)
1954 		status = transaction.Done();
1955 	if (status == B_OK) {
1956 		notify_attribute_changed(volume->ID(), inode->ID(), name,
1957 			B_ATTR_REMOVED);
1958 	}
1959 
1960 	return status;
1961 }
1962 
1963 
1964 //	#pragma mark - Special Nodes
1965 
1966 
1967 status_t
1968 bfs_create_special_node(fs_volume* _volume, fs_vnode* _directory,
1969 	const char* name, fs_vnode* subVnode, mode_t mode, uint32 flags,
1970 	fs_vnode* _superVnode, ino_t* _nodeID)
1971 {
1972 	// no need to support entry-less nodes
1973 	if (name == NULL)
1974 		return B_UNSUPPORTED;
1975 
1976 	FUNCTION_START(("name = \"%s\", mode = %d, flags = 0x%lx, subVnode: %p\n",
1977 		name, mode, flags, subVnode));
1978 
1979 	Volume* volume = (Volume*)_volume->private_volume;
1980 	Inode* directory = (Inode*)_directory->private_node;
1981 
1982 	if (volume->IsReadOnly())
1983 		return B_READ_ONLY_DEVICE;
1984 
1985 	if (!directory->IsDirectory())
1986 		RETURN_ERROR(B_BAD_TYPE);
1987 
1988 	status_t status = directory->CheckPermissions(W_OK);
1989 	if (status < B_OK)
1990 		RETURN_ERROR(status);
1991 
1992 	Transaction transaction(volume, directory->BlockNumber());
1993 
1994 	off_t id;
1995 	Inode* inode;
1996 	status = Inode::Create(transaction, directory, name, mode, O_EXCL, 0, NULL,
1997 		&id, &inode, subVnode ? subVnode->ops : NULL, flags);
1998 	if (status == B_OK) {
1999 		_superVnode->private_node = inode;
2000 		_superVnode->ops = &gBFSVnodeOps;
2001 		*_nodeID = id;
2002 
2003 		entry_cache_add(volume->ID(), directory->ID(), name, id);
2004 
2005 		status = transaction.Done();
2006 		if (status == B_OK)
2007 			notify_entry_created(volume->ID(), directory->ID(), name, id);
2008 		else
2009 			entry_cache_remove(volume->ID(), directory->ID(), name);
2010 	}
2011 
2012 	return status;
2013 }
2014 
2015 
2016 //	#pragma mark - Index functions
2017 
2018 
2019 static status_t
2020 bfs_open_index_dir(fs_volume* _volume, void** _cookie)
2021 {
2022 	FUNCTION();
2023 
2024 	Volume* volume = (Volume*)_volume->private_volume;
2025 
2026 	if (volume->IndicesNode() == NULL) {
2027 		// This volume does not have any indices
2028 		RETURN_ERROR(B_ENTRY_NOT_FOUND);
2029 	}
2030 
2031 	// Since the indices root node is just a directory, and we are storing
2032 	// a pointer to it in our Volume object, we can just use the directory
2033 	// traversal functions.
2034 	// In fact we're storing it in the Volume object for that reason.
2035 
2036 	fs_vnode indicesNode;
2037 	indicesNode.private_node = volume->IndicesNode();
2038 
2039 	RETURN_ERROR(bfs_open_dir(_volume, &indicesNode, _cookie));
2040 }
2041 
2042 
2043 static status_t
2044 bfs_close_index_dir(fs_volume* _volume, void* _cookie)
2045 {
2046 	FUNCTION();
2047 
2048 	Volume* volume = (Volume*)_volume->private_volume;
2049 
2050 	fs_vnode indicesNode;
2051 	indicesNode.private_node = volume->IndicesNode();
2052 
2053 	RETURN_ERROR(bfs_close_dir(_volume, &indicesNode, _cookie));
2054 }
2055 
2056 
2057 static status_t
2058 bfs_free_index_dir_cookie(fs_volume* _volume, void* _cookie)
2059 {
2060 	FUNCTION();
2061 
2062 	Volume* volume = (Volume*)_volume->private_volume;
2063 
2064 	fs_vnode indicesNode;
2065 	indicesNode.private_node = volume->IndicesNode();
2066 
2067 	RETURN_ERROR(bfs_free_dir_cookie(_volume, &indicesNode, _cookie));
2068 }
2069 
2070 
2071 static status_t
2072 bfs_rewind_index_dir(fs_volume* _volume, void* _cookie)
2073 {
2074 	FUNCTION();
2075 
2076 	Volume* volume = (Volume*)_volume->private_volume;
2077 
2078 	fs_vnode indicesNode;
2079 	indicesNode.private_node = volume->IndicesNode();
2080 
2081 	RETURN_ERROR(bfs_rewind_dir(_volume, &indicesNode, _cookie));
2082 }
2083 
2084 
2085 static status_t
2086 bfs_read_index_dir(fs_volume* _volume, void* _cookie, struct dirent* dirent,
2087 	size_t bufferSize, uint32* _num)
2088 {
2089 	FUNCTION();
2090 
2091 	Volume* volume = (Volume*)_volume->private_volume;
2092 
2093 	fs_vnode indicesNode;
2094 	indicesNode.private_node = volume->IndicesNode();
2095 
2096 	RETURN_ERROR(bfs_read_dir(_volume, &indicesNode, _cookie, dirent,
2097 		bufferSize, _num));
2098 }
2099 
2100 
2101 static status_t
2102 bfs_create_index(fs_volume* _volume, const char* name, uint32 type,
2103 	uint32 flags)
2104 {
2105 	FUNCTION_START(("name = \"%s\", type = %ld, flags = %ld\n", name, type, flags));
2106 
2107 	Volume* volume = (Volume*)_volume->private_volume;
2108 
2109 	if (volume->IsReadOnly())
2110 		return B_READ_ONLY_DEVICE;
2111 
2112 	// only root users are allowed to create indices
2113 	if (geteuid() != 0)
2114 		return B_NOT_ALLOWED;
2115 
2116 	Transaction transaction(volume, volume->Indices());
2117 
2118 	Index index(volume);
2119 	status_t status = index.Create(transaction, name, type);
2120 
2121 	if (status == B_OK)
2122 		status = transaction.Done();
2123 
2124 	RETURN_ERROR(status);
2125 }
2126 
2127 
2128 static status_t
2129 bfs_remove_index(fs_volume* _volume, const char* name)
2130 {
2131 	FUNCTION();
2132 
2133 	Volume* volume = (Volume*)_volume->private_volume;
2134 
2135 	if (volume->IsReadOnly())
2136 		return B_READ_ONLY_DEVICE;
2137 
2138 	// only root users are allowed to remove indices
2139 	if (geteuid() != 0)
2140 		return B_NOT_ALLOWED;
2141 
2142 	Inode* indices = volume->IndicesNode();
2143 	if (indices == NULL)
2144 		return B_ENTRY_NOT_FOUND;
2145 
2146 	Transaction transaction(volume, volume->Indices());
2147 
2148 	status_t status = indices->Remove(transaction, name);
2149 	if (status == B_OK)
2150 		status = transaction.Done();
2151 
2152 	RETURN_ERROR(status);
2153 }
2154 
2155 
2156 static status_t
2157 bfs_stat_index(fs_volume* _volume, const char* name, struct stat* stat)
2158 {
2159 	FUNCTION_START(("name = %s\n", name));
2160 
2161 	Volume* volume = (Volume*)_volume->private_volume;
2162 
2163 	Index index(volume);
2164 	status_t status = index.SetTo(name);
2165 	if (status < B_OK)
2166 		RETURN_ERROR(status);
2167 
2168 	bfs_inode& node = index.Node()->Node();
2169 
2170 	stat->st_type = index.Type();
2171 	stat->st_mode = node.Mode();
2172 
2173 	stat->st_size = node.data.Size();
2174 	stat->st_blocks = index.Node()->AllocatedSize() / 512;
2175 
2176 	stat->st_nlink = 1;
2177 	stat->st_blksize = 65536;
2178 
2179 	stat->st_uid = node.UserID();
2180 	stat->st_gid = node.GroupID();
2181 
2182 	fill_stat_time(node, *stat);
2183 
2184 	return B_OK;
2185 }
2186 
2187 
2188 //	#pragma mark - Query functions
2189 
2190 
2191 static status_t
2192 bfs_open_query(fs_volume* _volume, const char* queryString, uint32 flags,
2193 	port_id port, uint32 token, void** _cookie)
2194 {
2195 	FUNCTION_START(("bfs_open_query(\"%s\", flags = %lu, port_id = %ld, token = %ld)\n",
2196 		queryString, flags, port, token));
2197 
2198 	Volume* volume = (Volume*)_volume->private_volume;
2199 
2200 	Expression* expression = new(std::nothrow) Expression((char*)queryString);
2201 	if (expression == NULL)
2202 		RETURN_ERROR(B_NO_MEMORY);
2203 
2204 	if (expression->InitCheck() < B_OK) {
2205 		INFORM(("Could not parse query \"%s\", stopped at: \"%s\"\n",
2206 			queryString, expression->Position()));
2207 
2208 		delete expression;
2209 		RETURN_ERROR(B_BAD_VALUE);
2210 	}
2211 
2212 	Query* query = new(std::nothrow) Query(volume, expression, flags);
2213 	if (query == NULL) {
2214 		delete expression;
2215 		RETURN_ERROR(B_NO_MEMORY);
2216 	}
2217 
2218 	if (flags & B_LIVE_QUERY)
2219 		query->SetLiveMode(port, token);
2220 
2221 	*_cookie = (void*)query;
2222 
2223 	return B_OK;
2224 }
2225 
2226 
2227 static status_t
2228 bfs_close_query(fs_volume* _volume, void* cookie)
2229 {
2230 	FUNCTION();
2231 	return B_OK;
2232 }
2233 
2234 
2235 static status_t
2236 bfs_free_query_cookie(fs_volume* _volume, void* cookie)
2237 {
2238 	FUNCTION();
2239 
2240 	Query* query = (Query*)cookie;
2241 	Expression* expression = query->GetExpression();
2242 	delete query;
2243 	delete expression;
2244 
2245 	return B_OK;
2246 }
2247 
2248 
2249 static status_t
2250 bfs_read_query(fs_volume* /*_volume*/, void* cookie, struct dirent* dirent,
2251 	size_t bufferSize, uint32* _num)
2252 {
2253 	FUNCTION();
2254 	Query* query = (Query*)cookie;
2255 	status_t status = query->GetNextEntry(dirent, bufferSize);
2256 	if (status == B_OK)
2257 		*_num = 1;
2258 	else if (status == B_ENTRY_NOT_FOUND)
2259 		*_num = 0;
2260 	else
2261 		return status;
2262 
2263 	return B_OK;
2264 }
2265 
2266 
2267 static status_t
2268 bfs_rewind_query(fs_volume* /*_volume*/, void* cookie)
2269 {
2270 	FUNCTION();
2271 
2272 	Query* query = (Query*)cookie;
2273 	return query->Rewind();
2274 }
2275 
2276 
2277 //	#pragma mark -
2278 
2279 
2280 static uint32
2281 bfs_get_supported_operations(partition_data* partition, uint32 mask)
2282 {
2283 	// TODO: We should at least check the partition size.
2284 	return B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2285 		| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2286 		| B_DISK_SYSTEM_SUPPORTS_WRITING;
2287 }
2288 
2289 
2290 static status_t
2291 bfs_initialize(int fd, partition_id partitionID, const char* name,
2292 	const char* parameterString, off_t /*partitionSize*/, disk_job_id job)
2293 {
2294 	// check name
2295 	status_t status = check_volume_name(name);
2296 	if (status != B_OK)
2297 		return status;
2298 
2299 	// parse parameters
2300 	initialize_parameters parameters;
2301 	status = parse_initialize_parameters(parameterString, parameters);
2302 	if (status != B_OK)
2303 		return status;
2304 
2305 	update_disk_device_job_progress(job, 0);
2306 
2307 	// initialize the volume
2308 	Volume volume(NULL);
2309 	status = volume.Initialize(fd, name, parameters.blockSize,
2310 		parameters.flags);
2311 	if (status < B_OK) {
2312 		INFORM(("Initializing volume failed: %s\n", strerror(status)));
2313 		return status;
2314 	}
2315 
2316 	// rescan partition
2317 	status = scan_partition(partitionID);
2318 	if (status != B_OK)
2319 		return status;
2320 
2321 	update_disk_device_job_progress(job, 1);
2322 
2323 	// print some info, if desired
2324 	if (parameters.verbose) {
2325 		disk_super_block super = volume.SuperBlock();
2326 
2327 		INFORM(("Disk was initialized successfully.\n"));
2328 		INFORM(("\tname: \"%s\"\n", super.name));
2329 		INFORM(("\tnum blocks: %" B_PRIdOFF "\n", super.NumBlocks()));
2330 		INFORM(("\tused blocks: %" B_PRIdOFF "\n", super.UsedBlocks()));
2331 		INFORM(("\tblock size: %u bytes\n", (unsigned)super.BlockSize()));
2332 		INFORM(("\tnum allocation groups: %d\n",
2333 			(int)super.AllocationGroups()));
2334 		INFORM(("\tallocation group size: %ld blocks\n",
2335 			1L << super.AllocationGroupShift()));
2336 		INFORM(("\tlog size: %u blocks\n", super.log_blocks.Length()));
2337 	}
2338 
2339 	return B_OK;
2340 }
2341 
2342 
2343 static status_t
2344 bfs_uninitialize(int fd, partition_id partitionID, off_t partitionSize,
2345 	uint32 blockSize, disk_job_id job)
2346 {
2347 	if (blockSize == 0)
2348 		return B_BAD_VALUE;
2349 
2350 	update_disk_device_job_progress(job, 0.0);
2351 
2352 	// just overwrite the superblock
2353 	disk_super_block superBlock;
2354 	memset(&superBlock, 0, sizeof(superBlock));
2355 
2356 	if (write_pos(fd, 512, &superBlock, sizeof(superBlock)) < 0)
2357 		return errno;
2358 
2359 	update_disk_device_job_progress(job, 1.0);
2360 
2361 	return B_OK;
2362 }
2363 
2364 
2365 //	#pragma mark -
2366 
2367 
2368 static status_t
2369 bfs_std_ops(int32 op, ...)
2370 {
2371 	switch (op) {
2372 		case B_MODULE_INIT:
2373 #ifdef BFS_DEBUGGER_COMMANDS
2374 			add_debugger_commands();
2375 #endif
2376 			return B_OK;
2377 		case B_MODULE_UNINIT:
2378 #ifdef BFS_DEBUGGER_COMMANDS
2379 			remove_debugger_commands();
2380 #endif
2381 			return B_OK;
2382 
2383 		default:
2384 			return B_ERROR;
2385 	}
2386 }
2387 
2388 fs_volume_ops gBFSVolumeOps = {
2389 	&bfs_unmount,
2390 	&bfs_read_fs_stat,
2391 	&bfs_write_fs_stat,
2392 	&bfs_sync,
2393 	&bfs_get_vnode,
2394 
2395 	/* index directory & index operations */
2396 	&bfs_open_index_dir,
2397 	&bfs_close_index_dir,
2398 	&bfs_free_index_dir_cookie,
2399 	&bfs_read_index_dir,
2400 	&bfs_rewind_index_dir,
2401 
2402 	&bfs_create_index,
2403 	&bfs_remove_index,
2404 	&bfs_stat_index,
2405 
2406 	/* query operations */
2407 	&bfs_open_query,
2408 	&bfs_close_query,
2409 	&bfs_free_query_cookie,
2410 	&bfs_read_query,
2411 	&bfs_rewind_query,
2412 };
2413 
2414 fs_vnode_ops gBFSVnodeOps = {
2415 	/* vnode operations */
2416 	&bfs_lookup,
2417 	&bfs_get_vnode_name,
2418 	&bfs_put_vnode,
2419 	&bfs_remove_vnode,
2420 
2421 	/* VM file access */
2422 	&bfs_can_page,
2423 	&bfs_read_pages,
2424 	&bfs_write_pages,
2425 
2426 	&bfs_io,
2427 	NULL,	// cancel_io()
2428 
2429 	&bfs_get_file_map,
2430 
2431 	&bfs_ioctl,
2432 	&bfs_set_flags,
2433 	NULL,	// fs_select
2434 	NULL,	// fs_deselect
2435 	&bfs_fsync,
2436 
2437 	&bfs_read_link,
2438 	&bfs_create_symlink,
2439 
2440 	&bfs_link,
2441 	&bfs_unlink,
2442 	&bfs_rename,
2443 
2444 	&bfs_access,
2445 	&bfs_read_stat,
2446 	&bfs_write_stat,
2447 	NULL,	// fs_preallocate
2448 
2449 	/* file operations */
2450 	&bfs_create,
2451 	&bfs_open,
2452 	&bfs_close,
2453 	&bfs_free_cookie,
2454 	&bfs_read,
2455 	&bfs_write,
2456 
2457 	/* directory operations */
2458 	&bfs_create_dir,
2459 	&bfs_remove_dir,
2460 	&bfs_open_dir,
2461 	&bfs_close_dir,
2462 	&bfs_free_dir_cookie,
2463 	&bfs_read_dir,
2464 	&bfs_rewind_dir,
2465 
2466 	/* attribute directory operations */
2467 	&bfs_open_attr_dir,
2468 	&bfs_close_attr_dir,
2469 	&bfs_free_attr_dir_cookie,
2470 	&bfs_read_attr_dir,
2471 	&bfs_rewind_attr_dir,
2472 
2473 	/* attribute operations */
2474 	&bfs_create_attr,
2475 	&bfs_open_attr,
2476 	&bfs_close_attr,
2477 	&bfs_free_attr_cookie,
2478 	&bfs_read_attr,
2479 	&bfs_write_attr,
2480 
2481 	&bfs_read_attr_stat,
2482 	&bfs_write_attr_stat,
2483 	&bfs_rename_attr,
2484 	&bfs_remove_attr,
2485 
2486 	/* special nodes */
2487 	&bfs_create_special_node
2488 };
2489 
2490 static file_system_module_info sBeFileSystem = {
2491 	{
2492 		"file_systems/bfs" B_CURRENT_FS_API_VERSION,
2493 		0,
2494 		bfs_std_ops,
2495 	},
2496 
2497 	"bfs",						// short_name
2498 	"Be File System",			// pretty_name
2499 
2500 	// DDM flags
2501 	0
2502 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING
2503 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING
2504 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING
2505 //	| B_DISK_SYSTEM_SUPPORTS_MOVING
2506 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME
2507 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS
2508 	| B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2509 	| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2510 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING
2511 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING_WHILE_MOUNTED
2512 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING_WHILE_MOUNTED
2513 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING_WHILE_MOUNTED
2514 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING_WHILE_MOUNTED
2515 //	| B_DISK_SYSTEM_SUPPORTS_MOVING_WHILE_MOUNTED
2516 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME_WHILE_MOUNTED
2517 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS_WHILE_MOUNTED
2518 	| B_DISK_SYSTEM_SUPPORTS_WRITING
2519 	,
2520 
2521 	// scanning
2522 	bfs_identify_partition,
2523 	bfs_scan_partition,
2524 	bfs_free_identify_partition_cookie,
2525 	NULL,	// free_partition_content_cookie()
2526 
2527 	&bfs_mount,
2528 
2529 	/* capability querying operations */
2530 	&bfs_get_supported_operations,
2531 
2532 	NULL,	// validate_resize
2533 	NULL,	// validate_move
2534 	NULL,	// validate_set_content_name
2535 	NULL,	// validate_set_content_parameters
2536 	NULL,	// validate_initialize,
2537 
2538 	/* shadow partition modification */
2539 	NULL,	// shadow_changed
2540 
2541 	/* writing */
2542 	NULL,	// defragment
2543 	NULL,	// repair
2544 	NULL,	// resize
2545 	NULL,	// move
2546 	NULL,	// set_content_name
2547 	NULL,	// set_content_parameters
2548 	bfs_initialize,
2549 	bfs_uninitialize
2550 };
2551 
2552 module_info* modules[] = {
2553 	(module_info*)&sBeFileSystem,
2554 	NULL,
2555 };
2556