xref: /haiku/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp (revision 40e20c10768c3dcfc54074b8886e3e91455332e1)
1 /*
2  * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //!	file system interface to Haiku's vnode layer
8 
9 
10 #include "Debug.h"
11 #include "Volume.h"
12 #include "Inode.h"
13 #include "Index.h"
14 #include "BPlusTree.h"
15 #include "Query.h"
16 #include "Attribute.h"
17 #include "bfs_control.h"
18 #include "bfs_disk_system.h"
19 
20 // TODO: temporary solution as long as there is no public I/O requests API
21 #ifndef BFS_SHELL
22 #	include <io_requests.h>
23 #endif
24 
25 #define BFS_IO_SIZE	65536
26 
27 
28 struct identify_cookie {
29 	disk_super_block super_block;
30 };
31 
32 extern void fill_stat_buffer(Inode* inode, struct stat& stat);
33 
34 
35 static void
36 fill_stat_time(const bfs_inode& node, struct stat& stat)
37 {
38 	bigtime_t now = real_time_clock_usecs();
39 	stat.st_atim.tv_sec = now / 1000000LL;
40 	stat.st_atim.tv_nsec = (now % 1000000LL) * 1000;
41 
42 	stat.st_mtim.tv_sec = bfs_inode::ToSecs(node.LastModifiedTime());
43 	stat.st_mtim.tv_nsec = bfs_inode::ToNsecs(node.LastModifiedTime());
44 	stat.st_crtim.tv_sec = bfs_inode::ToSecs(node.CreateTime());
45 	stat.st_crtim.tv_nsec = bfs_inode::ToNsecs(node.CreateTime());
46 
47 	// For BeOS compatibility, if on-disk ctime is invalid, fall back to mtime:
48 	bigtime_t changeTime = node.StatusChangeTime();
49 	if (changeTime < node.LastModifiedTime())
50 		stat.st_ctim = stat.st_mtim;
51 	else {
52 		stat.st_ctim.tv_sec = bfs_inode::ToSecs(changeTime);
53 		stat.st_ctim.tv_nsec = bfs_inode::ToNsecs(changeTime);
54 	}
55 }
56 
57 
58 void
59 fill_stat_buffer(Inode* inode, struct stat& stat)
60 {
61 	const bfs_inode& node = inode->Node();
62 
63 	stat.st_dev = inode->GetVolume()->ID();
64 	stat.st_ino = inode->ID();
65 	stat.st_nlink = 1;
66 	stat.st_blksize = BFS_IO_SIZE;
67 
68 	stat.st_uid = node.UserID();
69 	stat.st_gid = node.GroupID();
70 	stat.st_mode = node.Mode();
71 	stat.st_type = node.Type();
72 
73 	fill_stat_time(node, stat);
74 
75 	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
76 		// symlinks report the size of the link here
77 		stat.st_size = strlen(node.short_symlink);
78 	} else
79 		stat.st_size = inode->Size();
80 
81 	stat.st_blocks = inode->AllocatedSize() / 512;
82 }
83 
84 
85 //!	bfs_io() callback hook
86 static status_t
87 iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
88 	size_t size, struct file_io_vec* vecs, size_t* _count)
89 {
90 	Inode* inode = (Inode*)cookie;
91 
92 	return file_map_translate(inode->Map(), offset, size, vecs, _count,
93 		inode->GetVolume()->BlockSize());
94 }
95 
96 
97 //!	bfs_io() callback hook
98 static status_t
99 iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
100 	bool partialTransfer, size_t bytesTransferred)
101 {
102 	Inode* inode = (Inode*)cookie;
103 	rw_lock_read_unlock(&inode->Lock());
104 	return B_OK;
105 }
106 
107 
108 //	#pragma mark - Scanning
109 
110 
111 static float
112 bfs_identify_partition(int fd, partition_data* partition, void** _cookie)
113 {
114 	disk_super_block superBlock;
115 	status_t status = Volume::Identify(fd, &superBlock);
116 	if (status != B_OK)
117 		return -1;
118 
119 	identify_cookie* cookie = new(std::nothrow) identify_cookie;
120 	if (cookie == NULL)
121 		return -1;
122 
123 	memcpy(&cookie->super_block, &superBlock, sizeof(disk_super_block));
124 
125 	*_cookie = cookie;
126 	return 0.8f;
127 }
128 
129 
130 static status_t
131 bfs_scan_partition(int fd, partition_data* partition, void* _cookie)
132 {
133 	identify_cookie* cookie = (identify_cookie*)_cookie;
134 
135 	partition->status = B_PARTITION_VALID;
136 	partition->flags |= B_PARTITION_FILE_SYSTEM;
137 	partition->content_size = cookie->super_block.NumBlocks()
138 		* cookie->super_block.BlockSize();
139 	partition->block_size = cookie->super_block.BlockSize();
140 	partition->content_name = strdup(cookie->super_block.name);
141 	if (partition->content_name == NULL)
142 		return B_NO_MEMORY;
143 
144 	return B_OK;
145 }
146 
147 
148 static void
149 bfs_free_identify_partition_cookie(partition_data* partition, void* _cookie)
150 {
151 	identify_cookie* cookie = (identify_cookie*)_cookie;
152 	delete cookie;
153 }
154 
155 
156 //	#pragma mark -
157 
158 
159 static status_t
160 bfs_mount(fs_volume* _volume, const char* device, uint32 flags,
161 	const char* args, ino_t* _rootID)
162 {
163 	FUNCTION();
164 
165 	Volume* volume = new(std::nothrow) Volume(_volume);
166 	if (volume == NULL)
167 		return B_NO_MEMORY;
168 
169 	status_t status = volume->Mount(device, flags);
170 	if (status != B_OK) {
171 		delete volume;
172 		RETURN_ERROR(status);
173 	}
174 
175 	_volume->private_volume = volume;
176 	_volume->ops = &gBFSVolumeOps;
177 	*_rootID = volume->ToVnode(volume->Root());
178 
179 	INFORM(("mounted \"%s\" (root node at %" B_PRIdINO ", device = %s)\n",
180 		volume->Name(), *_rootID, device));
181 	return B_OK;
182 }
183 
184 
185 static status_t
186 bfs_unmount(fs_volume* _volume)
187 {
188 	FUNCTION();
189 	Volume* volume = (Volume*)_volume->private_volume;
190 
191 	status_t status = volume->Unmount();
192 	delete volume;
193 
194 	RETURN_ERROR(status);
195 }
196 
197 
198 static status_t
199 bfs_read_fs_stat(fs_volume* _volume, struct fs_info* info)
200 {
201 	FUNCTION();
202 
203 	Volume* volume = (Volume*)_volume->private_volume;
204 	MutexLocker locker(volume->Lock());
205 
206 	// File system flags.
207 	info->flags = B_FS_IS_PERSISTENT | B_FS_HAS_ATTR | B_FS_HAS_MIME
208 		| (volume->IndicesNode() != NULL ? B_FS_HAS_QUERY : 0)
209 		| (volume->IsReadOnly() ? B_FS_IS_READONLY : 0);
210 
211 	info->io_size = BFS_IO_SIZE;
212 		// whatever is appropriate here?
213 
214 	info->block_size = volume->BlockSize();
215 	info->total_blocks = volume->NumBlocks();
216 	info->free_blocks = volume->FreeBlocks();
217 
218 	// Volume name
219 	strlcpy(info->volume_name, volume->Name(), sizeof(info->volume_name));
220 
221 	// File system name
222 	strlcpy(info->fsh_name, "bfs", sizeof(info->fsh_name));
223 
224 	return B_OK;
225 }
226 
227 
228 static status_t
229 bfs_write_fs_stat(fs_volume* _volume, const struct fs_info* info, uint32 mask)
230 {
231 	FUNCTION_START(("mask = %ld\n", mask));
232 
233 	Volume* volume = (Volume*)_volume->private_volume;
234 	if (volume->IsReadOnly())
235 		return B_READ_ONLY_DEVICE;
236 
237 	MutexLocker locker(volume->Lock());
238 
239 	status_t status = B_BAD_VALUE;
240 
241 	if (mask & FS_WRITE_FSINFO_NAME) {
242 		disk_super_block& superBlock = volume->SuperBlock();
243 
244 		strncpy(superBlock.name, info->volume_name,
245 			sizeof(superBlock.name) - 1);
246 		superBlock.name[sizeof(superBlock.name) - 1] = '\0';
247 
248 		status = volume->WriteSuperBlock();
249 	}
250 	return status;
251 }
252 
253 
254 static status_t
255 bfs_sync(fs_volume* _volume)
256 {
257 	FUNCTION();
258 
259 	Volume* volume = (Volume*)_volume->private_volume;
260 	return volume->Sync();
261 }
262 
263 
264 //	#pragma mark -
265 
266 
267 /*!	Reads in the node from disk and creates an inode object from it.
268 */
269 static status_t
270 bfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _node, int* _type,
271 	uint32* _flags, bool reenter)
272 {
273 	//FUNCTION_START(("ino_t = %Ld\n", id));
274 	Volume* volume = (Volume*)_volume->private_volume;
275 
276 	// first inode may be after the log area, we don't go through
277 	// the hassle and try to load an earlier block from disk
278 	if (id < volume->ToBlock(volume->Log()) + volume->Log().Length()
279 		|| id > volume->NumBlocks()) {
280 		INFORM(("inode at %" B_PRIdINO " requested!\n", id));
281 		return B_ERROR;
282 	}
283 
284 	CachedBlock cached(volume, id);
285 	bfs_inode* node = (bfs_inode*)cached.Block();
286 	if (node == NULL) {
287 		FATAL(("could not read inode: %" B_PRIdINO "\n", id));
288 		return B_IO_ERROR;
289 	}
290 
291 	status_t status = node->InitCheck(volume);
292 	if (status != B_OK) {
293 		if ((node->Flags() & INODE_DELETED) != 0) {
294 			INFORM(("inode at %" B_PRIdINO " is already deleted!\n", id));
295 		} else {
296 			FATAL(("inode at %" B_PRIdINO " could not be read: %s!\n", id,
297 				strerror(status)));
298 		}
299 		return status;
300 	}
301 
302 	Inode* inode = new(std::nothrow) Inode(volume, id);
303 	if (inode == NULL)
304 		return B_NO_MEMORY;
305 
306 	status = inode->InitCheck(false);
307 	if (status != B_OK)
308 		delete inode;
309 
310 	if (status == B_OK) {
311 		_node->private_node = inode;
312 		_node->ops = &gBFSVnodeOps;
313 		*_type = inode->Mode();
314 		*_flags = 0;
315 	}
316 
317 	return status;
318 }
319 
320 
321 static status_t
322 bfs_put_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
323 {
324 	Volume* volume = (Volume*)_volume->private_volume;
325 	Inode* inode = (Inode*)_node->private_node;
326 
327 	// since a directory's size can be changed without having it opened,
328 	// we need to take care about their preallocated blocks here
329 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()
330 		&& inode->NeedsTrimming()) {
331 		Transaction transaction(volume, inode->BlockNumber());
332 
333 		if (inode->TrimPreallocation(transaction) == B_OK)
334 			transaction.Done();
335 		else if (transaction.HasParent()) {
336 			// TODO: for now, we don't let sub-transactions fail
337 			transaction.Done();
338 		}
339 	}
340 
341 	delete inode;
342 	return B_OK;
343 }
344 
345 
346 static status_t
347 bfs_remove_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
348 {
349 	FUNCTION();
350 
351 	Volume* volume = (Volume*)_volume->private_volume;
352 	Inode* inode = (Inode*)_node->private_node;
353 
354 	// If the inode isn't in use anymore, we were called before
355 	// bfs_unlink() returns - in this case, we can just use the
356 	// transaction which has already deleted the inode.
357 	Transaction transaction(volume, volume->ToBlock(inode->Parent()));
358 
359 	// The file system check functionality uses this flag to prevent the space
360 	// used up by the inode from being freed - this flag is set only in
361 	// situations where this does not cause any harm as the block bitmap will
362 	// get fixed anyway in this case).
363 	if ((inode->Flags() & INODE_DONT_FREE_SPACE) != 0) {
364 		delete inode;
365 		return B_OK;
366 	}
367 
368 	ASSERT((inode->Flags() & INODE_DELETED) != 0);
369 
370 	status_t status = inode->Free(transaction);
371 	if (status == B_OK) {
372 		status = transaction.Done();
373 	} else if (transaction.HasParent()) {
374 		// TODO: for now, we don't let sub-transactions fail
375 		status = transaction.Done();
376 	}
377 
378 	volume->RemovedInodes().Remove(inode);
379 
380 	// TODO: the VFS currently does not allow this to fail
381 	delete inode;
382 
383 	return status;
384 }
385 
386 
387 static bool
388 bfs_can_page(fs_volume* _volume, fs_vnode* _v, void* _cookie)
389 {
390 	// TODO: we're obviously not even asked...
391 	return false;
392 }
393 
394 
395 static status_t
396 bfs_read_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
397 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
398 {
399 	Volume* volume = (Volume*)_volume->private_volume;
400 	Inode* inode = (Inode*)_node->private_node;
401 
402 	if (inode->FileCache() == NULL)
403 		RETURN_ERROR(B_BAD_VALUE);
404 
405 	InodeReadLocker _(inode);
406 
407 	uint32 vecIndex = 0;
408 	size_t vecOffset = 0;
409 	size_t bytesLeft = *_numBytes;
410 	status_t status;
411 
412 	while (true) {
413 		file_io_vec fileVecs[8];
414 		size_t fileVecCount = 8;
415 
416 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
417 			&fileVecCount, 0);
418 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
419 			break;
420 
421 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
422 
423 		size_t bytes = bytesLeft;
424 		status = read_file_io_vec_pages(volume->Device(), fileVecs,
425 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
426 		if (status != B_OK || !bufferOverflow)
427 			break;
428 
429 		pos += bytes;
430 		bytesLeft -= bytes;
431 	}
432 
433 	return status;
434 }
435 
436 
437 static status_t
438 bfs_write_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
439 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
440 {
441 	Volume* volume = (Volume*)_volume->private_volume;
442 	Inode* inode = (Inode*)_node->private_node;
443 
444 	if (volume->IsReadOnly())
445 		return B_READ_ONLY_DEVICE;
446 
447 	if (inode->FileCache() == NULL)
448 		RETURN_ERROR(B_BAD_VALUE);
449 
450 	InodeReadLocker _(inode);
451 
452 	uint32 vecIndex = 0;
453 	size_t vecOffset = 0;
454 	size_t bytesLeft = *_numBytes;
455 	status_t status;
456 
457 	while (true) {
458 		file_io_vec fileVecs[8];
459 		size_t fileVecCount = 8;
460 
461 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
462 			&fileVecCount, 0);
463 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
464 			break;
465 
466 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
467 
468 		size_t bytes = bytesLeft;
469 		status = write_file_io_vec_pages(volume->Device(), fileVecs,
470 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
471 		if (status != B_OK || !bufferOverflow)
472 			break;
473 
474 		pos += bytes;
475 		bytesLeft -= bytes;
476 	}
477 
478 	return status;
479 }
480 
481 
482 static status_t
483 bfs_io(fs_volume* _volume, fs_vnode* _node, void* _cookie, io_request* request)
484 {
485 	Volume* volume = (Volume*)_volume->private_volume;
486 	Inode* inode = (Inode*)_node->private_node;
487 
488 #ifndef BFS_SHELL
489 	if (io_request_is_write(request) && volume->IsReadOnly()) {
490 		notify_io_request(request, B_READ_ONLY_DEVICE);
491 		return B_READ_ONLY_DEVICE;
492 	}
493 #endif
494 
495 	if (inode->FileCache() == NULL) {
496 #ifndef BFS_SHELL
497 		notify_io_request(request, B_BAD_VALUE);
498 #endif
499 		RETURN_ERROR(B_BAD_VALUE);
500 	}
501 
502 	// We lock the node here and will unlock it in the "finished" hook.
503 	rw_lock_read_lock(&inode->Lock());
504 
505 	return do_iterative_fd_io(volume->Device(), request,
506 		iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
507 }
508 
509 
510 static status_t
511 bfs_get_file_map(fs_volume* _volume, fs_vnode* _node, off_t offset, size_t size,
512 	struct file_io_vec* vecs, size_t* _count)
513 {
514 	Volume* volume = (Volume*)_volume->private_volume;
515 	Inode* inode = (Inode*)_node->private_node;
516 
517 	int32 blockShift = volume->BlockShift();
518 	uint32 index = 0, max = *_count;
519 	block_run run;
520 	off_t fileOffset;
521 
522 	//FUNCTION_START(("offset = %Ld, size = %lu\n", offset, size));
523 
524 	while (true) {
525 		status_t status = inode->FindBlockRun(offset, run, fileOffset);
526 		if (status != B_OK)
527 			return status;
528 
529 		vecs[index].offset = volume->ToOffset(run) + offset - fileOffset;
530 		vecs[index].length = (run.Length() << blockShift) - offset + fileOffset;
531 
532 		// are we already done?
533 		if ((uint64)size <= (uint64)vecs[index].length
534 			|| (uint64)offset + (uint64)vecs[index].length
535 				>= (uint64)inode->Size()) {
536 			if ((uint64)offset + (uint64)vecs[index].length
537 					> (uint64)inode->Size()) {
538 				// make sure the extent ends with the last official file
539 				// block (without taking any preallocations into account)
540 				vecs[index].length = round_up(inode->Size() - offset,
541 					volume->BlockSize());
542 			}
543 			*_count = index + 1;
544 			return B_OK;
545 		}
546 
547 		offset += vecs[index].length;
548 		size -= vecs[index].length;
549 		index++;
550 
551 		if (index >= max) {
552 			// we're out of file_io_vecs; let's bail out
553 			*_count = index;
554 			return B_BUFFER_OVERFLOW;
555 		}
556 	}
557 
558 	// can never get here
559 	return B_ERROR;
560 }
561 
562 
563 //	#pragma mark -
564 
565 
566 static status_t
567 bfs_lookup(fs_volume* _volume, fs_vnode* _directory, const char* file,
568 	ino_t* _vnodeID)
569 {
570 	Volume* volume = (Volume*)_volume->private_volume;
571 	Inode* directory = (Inode*)_directory->private_node;
572 
573 	InodeReadLocker locker(directory);
574 
575 	// check access permissions
576 	status_t status = directory->CheckPermissions(X_OK);
577 	if (status != B_OK)
578 		RETURN_ERROR(status);
579 
580 	BPlusTree* tree = directory->Tree();
581 	if (tree == NULL)
582 		RETURN_ERROR(B_BAD_VALUE);
583 
584 	status = tree->Find((uint8*)file, (uint16)strlen(file), _vnodeID);
585 	if (status != B_OK) {
586 		//PRINT(("bfs_walk() could not find %Ld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
587 		return status;
588 	}
589 
590 	entry_cache_add(volume->ID(), directory->ID(), file, *_vnodeID);
591 
592 	locker.Unlock();
593 
594 	Inode* inode;
595 	status = get_vnode(volume->FSVolume(), *_vnodeID, (void**)&inode);
596 	if (status != B_OK) {
597 		REPORT_ERROR(status);
598 		return B_ENTRY_NOT_FOUND;
599 	}
600 
601 	return B_OK;
602 }
603 
604 
605 static status_t
606 bfs_get_vnode_name(fs_volume* _volume, fs_vnode* _node, char* buffer,
607 	size_t bufferSize)
608 {
609 	Inode* inode = (Inode*)_node->private_node;
610 
611 	return inode->GetName(buffer, bufferSize);
612 }
613 
614 
615 static status_t
616 bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
617 	void* buffer, size_t bufferLength)
618 {
619 	FUNCTION_START(("node = %p, cmd = %lu, buf = %p, len = %ld\n", _node, cmd,
620 		buffer, bufferLength));
621 
622 	Volume* volume = (Volume*)_volume->private_volume;
623 
624 	switch (cmd) {
625 		case BFS_IOCTL_VERSION:
626 		{
627 			uint32 version = 0x10000;
628 			return user_memcpy(buffer, &version, sizeof(uint32));
629 		}
630 		case BFS_IOCTL_START_CHECKING:
631 		{
632 			// start checking
633 			BlockAllocator& allocator = volume->Allocator();
634 			check_control control;
635 			if (user_memcpy(&control, buffer, sizeof(check_control)) != B_OK)
636 				return B_BAD_ADDRESS;
637 
638 			status_t status = allocator.StartChecking(&control);
639 			if (status == B_OK) {
640 				file_cookie* cookie = (file_cookie*)_cookie;
641 				cookie->open_mode |= BFS_OPEN_MODE_CHECKING;
642 			}
643 
644 			return status;
645 		}
646 		case BFS_IOCTL_STOP_CHECKING:
647 		{
648 			// stop checking
649 			BlockAllocator& allocator = volume->Allocator();
650 			check_control control;
651 
652 			status_t status = allocator.StopChecking(&control);
653 			if (status == B_OK) {
654 				file_cookie* cookie = (file_cookie*)_cookie;
655 				cookie->open_mode &= ~BFS_OPEN_MODE_CHECKING;
656 			}
657 			if (status == B_OK)
658 				status = user_memcpy(buffer, &control, sizeof(check_control));
659 
660 			return status;
661 		}
662 		case BFS_IOCTL_CHECK_NEXT_NODE:
663 		{
664 			// check next
665 			BlockAllocator& allocator = volume->Allocator();
666 			check_control control;
667 
668 			status_t status = allocator.CheckNextNode(&control);
669 			if (status == B_OK)
670 				status = user_memcpy(buffer, &control, sizeof(check_control));
671 
672 			return status;
673 		}
674 		case BFS_IOCTL_UPDATE_BOOT_BLOCK:
675 		{
676 			// let's makebootable (or anyone else) update the boot block
677 			// while BFS is mounted
678 			update_boot_block update;
679 			if (bufferLength != sizeof(update_boot_block))
680 				return B_BAD_VALUE;
681 			if (user_memcpy(&update, buffer, sizeof(update_boot_block)) != B_OK)
682 				return B_BAD_ADDRESS;
683 			if (update.offset < offsetof(disk_super_block, pad_to_block)
684 				|| update.length + update.offset > 512)
685 				return B_BAD_VALUE;
686 			if (user_memcpy((uint8*)&volume->SuperBlock() + update.offset,
687 					update.data, update.length) != B_OK)
688 				return B_BAD_ADDRESS;
689 
690 			return volume->WriteSuperBlock();
691 		}
692 
693 #ifdef DEBUG_FRAGMENTER
694 		case 56741:
695 		{
696 			BlockAllocator& allocator = volume->Allocator();
697 			allocator.Fragment();
698 			return B_OK;
699 		}
700 #endif
701 
702 #ifdef DEBUG
703 		case 56742:
704 		{
705 			// allocate all free blocks and zero them out
706 			// (a test for the BlockAllocator)!
707 			BlockAllocator& allocator = volume->Allocator();
708 			Transaction transaction(volume, 0);
709 			CachedBlock cached(volume);
710 			block_run run;
711 			while (allocator.AllocateBlocks(transaction, 8, 0, 64, 1, run)
712 					== B_OK) {
713 				PRINT(("write block_run(%ld, %d, %d)\n", run.allocation_group,
714 					run.start, run.length));
715 				for (int32 i = 0;i < run.length;i++) {
716 					uint8* block = cached.SetToWritable(transaction, run);
717 					if (block != NULL)
718 						memset(block, 0, volume->BlockSize());
719 				}
720 			}
721 			return B_OK;
722 		}
723 #endif
724 	}
725 	return B_DEV_INVALID_IOCTL;
726 }
727 
728 
729 /*!	Sets the open-mode flags for the open file cookie - only
730 	supports O_APPEND currently, but that should be sufficient
731 	for a file system.
732 */
733 static status_t
734 bfs_set_flags(fs_volume* _volume, fs_vnode* _node, void* _cookie, int flags)
735 {
736 	FUNCTION_START(("node = %p, flags = %d", _node, flags));
737 
738 	file_cookie* cookie = (file_cookie*)_cookie;
739 	cookie->open_mode = (cookie->open_mode & ~O_APPEND) | (flags & O_APPEND);
740 
741 	return B_OK;
742 }
743 
744 
745 static status_t
746 bfs_fsync(fs_volume* _volume, fs_vnode* _node)
747 {
748 	FUNCTION();
749 
750 	Inode* inode = (Inode*)_node->private_node;
751 	return inode->Sync();
752 }
753 
754 
755 static status_t
756 bfs_read_stat(fs_volume* _volume, fs_vnode* _node, struct stat* stat)
757 {
758 	FUNCTION();
759 
760 	Inode* inode = (Inode*)_node->private_node;
761 	fill_stat_buffer(inode, *stat);
762 	return B_OK;
763 }
764 
765 
766 static status_t
767 bfs_write_stat(fs_volume* _volume, fs_vnode* _node, const struct stat* stat,
768 	uint32 mask)
769 {
770 	FUNCTION();
771 
772 	Volume* volume = (Volume*)_volume->private_volume;
773 	Inode* inode = (Inode*)_node->private_node;
774 
775 	if (volume->IsReadOnly())
776 		return B_READ_ONLY_DEVICE;
777 
778 	// TODO: we should definitely check a bit more if the new stats are
779 	//	valid - or even better, the VFS should check this before calling us
780 
781 	bfs_inode& node = inode->Node();
782 	bool updateTime = false;
783 	uid_t uid = geteuid();
784 
785 	bool isOwnerOrRoot = uid == 0 || uid == (uid_t)node.UserID();
786 	bool hasWriteAccess = inode->CheckPermissions(W_OK) == B_OK;
787 
788 	Transaction transaction(volume, inode->BlockNumber());
789 	inode->WriteLockInTransaction(transaction);
790 
791 	if ((mask & B_STAT_SIZE) != 0 && inode->Size() != stat->st_size) {
792 		// Since B_STAT_SIZE is the only thing that can fail directly, we
793 		// do it first, so that the inode state will still be consistent
794 		// with the on-disk version
795 		if (inode->IsDirectory())
796 			return B_IS_A_DIRECTORY;
797 		if (!inode->IsFile())
798 			return B_BAD_VALUE;
799 		if (!hasWriteAccess)
800 			RETURN_ERROR(B_NOT_ALLOWED);
801 
802 		off_t oldSize = inode->Size();
803 
804 		status_t status = inode->SetFileSize(transaction, stat->st_size);
805 		if (status != B_OK)
806 			return status;
807 
808 		// fill the new blocks (if any) with zeros
809 		if ((mask & B_STAT_SIZE_INSECURE) == 0) {
810 			// We must not keep the inode locked during a write operation,
811 			// or else we might deadlock.
812 			rw_lock_write_unlock(&inode->Lock());
813 			inode->FillGapWithZeros(oldSize, inode->Size());
814 			rw_lock_write_lock(&inode->Lock());
815 		}
816 
817 		if (!inode->IsDeleted()) {
818 			Index index(volume);
819 			index.UpdateSize(transaction, inode);
820 
821 			updateTime = true;
822 		}
823 	}
824 
825 	if ((mask & B_STAT_UID) != 0) {
826 		// only root should be allowed
827 		if (uid != 0)
828 			RETURN_ERROR(B_NOT_ALLOWED);
829 		node.uid = HOST_ENDIAN_TO_BFS_INT32(stat->st_uid);
830 		updateTime = true;
831 	}
832 
833 	if ((mask & B_STAT_GID) != 0) {
834 		// only the user or root can do that
835 		if (!isOwnerOrRoot)
836 			RETURN_ERROR(B_NOT_ALLOWED);
837 		node.gid = HOST_ENDIAN_TO_BFS_INT32(stat->st_gid);
838 		updateTime = true;
839 	}
840 
841 	if ((mask & B_STAT_MODE) != 0) {
842 		// only the user or root can do that
843 		if (!isOwnerOrRoot)
844 			RETURN_ERROR(B_NOT_ALLOWED);
845 		PRINT(("original mode = %ld, stat->st_mode = %d\n", node.Mode(),
846 			stat->st_mode));
847 		node.mode = HOST_ENDIAN_TO_BFS_INT32((node.Mode() & ~S_IUMSK)
848 			| (stat->st_mode & S_IUMSK));
849 		updateTime = true;
850 	}
851 
852 	if ((mask & B_STAT_CREATION_TIME) != 0) {
853 		// the user or root can do that or any user with write access
854 		if (!isOwnerOrRoot && !hasWriteAccess)
855 			RETURN_ERROR(B_NOT_ALLOWED);
856 		node.create_time
857 			= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_crtim));
858 	}
859 
860 	if ((mask & B_STAT_MODIFICATION_TIME) != 0) {
861 		// the user or root can do that or any user with write access
862 		if (!isOwnerOrRoot && !hasWriteAccess)
863 			RETURN_ERROR(B_NOT_ALLOWED);
864 		if (!inode->InLastModifiedIndex()) {
865 			// directory modification times are not part of the index
866 			node.last_modified_time
867 				= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_mtim));
868 		} else if (!inode->IsDeleted()) {
869 			// Index::UpdateLastModified() will set the new time in the inode
870 			Index index(volume);
871 			index.UpdateLastModified(transaction, inode,
872 				bfs_inode::ToInode(stat->st_mtim));
873 		}
874 	}
875 
876 	if ((mask & B_STAT_CHANGE_TIME) != 0 || updateTime) {
877 		// the user or root can do that or any user with write access
878 		if (!isOwnerOrRoot && !hasWriteAccess)
879 			RETURN_ERROR(B_NOT_ALLOWED);
880 		bigtime_t newTime;
881 		if ((mask & B_STAT_CHANGE_TIME) == 0)
882 			newTime = bfs_inode::ToInode(real_time_clock_usecs());
883 		else
884 			newTime = bfs_inode::ToInode(stat->st_ctim);
885 
886 		node.status_change_time = HOST_ENDIAN_TO_BFS_INT64(newTime);
887 	}
888 
889 	status_t status = inode->WriteBack(transaction);
890 	if (status == B_OK)
891 		status = transaction.Done();
892 	if (status == B_OK)
893 		notify_stat_changed(volume->ID(), inode->ID(), mask);
894 
895 	return status;
896 }
897 
898 
899 status_t
900 bfs_create(fs_volume* _volume, fs_vnode* _directory, const char* name,
901 	int openMode, int mode, void** _cookie, ino_t* _vnodeID)
902 {
903 	FUNCTION_START(("name = \"%s\", perms = %d, openMode = %d\n", name, mode,
904 		openMode));
905 
906 	Volume* volume = (Volume*)_volume->private_volume;
907 	Inode* directory = (Inode*)_directory->private_node;
908 
909 	if (volume->IsReadOnly())
910 		return B_READ_ONLY_DEVICE;
911 
912 	if (!directory->IsDirectory())
913 		RETURN_ERROR(B_BAD_TYPE);
914 
915 	// We are creating the cookie at this point, so that we don't have
916 	// to remove the inode if we don't have enough free memory later...
917 	file_cookie* cookie = new(std::nothrow) file_cookie;
918 	if (cookie == NULL)
919 		RETURN_ERROR(B_NO_MEMORY);
920 
921 	// initialize the cookie
922 	cookie->open_mode = openMode;
923 	cookie->last_size = 0;
924 	cookie->last_notification = system_time();
925 
926 	Transaction transaction(volume, directory->BlockNumber());
927 
928 	Inode* inode;
929 	bool created;
930 	status_t status = Inode::Create(transaction, directory, name,
931 		S_FILE | (mode & S_IUMSK), openMode, 0, &created, _vnodeID, &inode);
932 
933 	// Disable the file cache, if requested?
934 	if (status == B_OK && (openMode & O_NOCACHE) != 0
935 		&& inode->FileCache() != NULL) {
936 		status = file_cache_disable(inode->FileCache());
937 	}
938 
939 	entry_cache_add(volume->ID(), directory->ID(), name, *_vnodeID);
940 
941 	if (status == B_OK)
942 		status = transaction.Done();
943 
944 	if (status == B_OK) {
945 		// register the cookie
946 		*_cookie = cookie;
947 
948 		if (created) {
949 			notify_entry_created(volume->ID(), directory->ID(), name,
950 				*_vnodeID);
951 		}
952 	} else {
953 		entry_cache_remove(volume->ID(), directory->ID(), name);
954 		delete cookie;
955 	}
956 
957 	return status;
958 }
959 
960 
961 static status_t
962 bfs_create_symlink(fs_volume* _volume, fs_vnode* _directory, const char* name,
963 	const char* path, int mode)
964 {
965 	FUNCTION_START(("name = \"%s\", path = \"%s\"\n", name, path));
966 
967 	Volume* volume = (Volume*)_volume->private_volume;
968 	Inode* directory = (Inode*)_directory->private_node;
969 
970 	if (volume->IsReadOnly())
971 		return B_READ_ONLY_DEVICE;
972 
973 	if (!directory->IsDirectory())
974 		RETURN_ERROR(B_BAD_TYPE);
975 
976 	status_t status = directory->CheckPermissions(W_OK);
977 	if (status < B_OK)
978 		RETURN_ERROR(status);
979 
980 	Transaction transaction(volume, directory->BlockNumber());
981 
982 	Inode* link;
983 	off_t id;
984 	status = Inode::Create(transaction, directory, name, S_SYMLINK | 0777,
985 		0, 0, NULL, &id, &link);
986 	if (status < B_OK)
987 		RETURN_ERROR(status);
988 
989 	size_t length = strlen(path);
990 	if (length < SHORT_SYMLINK_NAME_LENGTH) {
991 		strcpy(link->Node().short_symlink, path);
992 	} else {
993 		link->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_LONG_SYMLINK
994 			| INODE_LOGGED);
995 
996 		// links usually don't have a file cache attached - but we now need one
997 		link->SetFileCache(file_cache_create(volume->ID(), link->ID(), 0));
998 		link->SetMap(file_map_create(volume->ID(), link->ID(), 0));
999 
1000 		// The following call will have to write the inode back, so
1001 		// we don't have to do that here...
1002 		status = link->WriteAt(transaction, 0, (const uint8*)path, &length);
1003 	}
1004 
1005 	if (status == B_OK)
1006 		status = link->WriteBack(transaction);
1007 
1008 	// Inode::Create() left the inode locked in memory, and also doesn't
1009 	// publish links
1010 	publish_vnode(volume->FSVolume(), id, link, &gBFSVnodeOps, link->Mode(), 0);
1011 	put_vnode(volume->FSVolume(), id);
1012 
1013 	if (status == B_OK) {
1014 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1015 
1016 		status = transaction.Done();
1017 		if (status == B_OK)
1018 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1019 		else
1020 			entry_cache_remove(volume->ID(), directory->ID(), name);
1021 	}
1022 
1023 	return status;
1024 }
1025 
1026 
1027 status_t
1028 bfs_link(fs_volume* _volume, fs_vnode* dir, const char* name, fs_vnode* node)
1029 {
1030 	FUNCTION_START(("name = \"%s\"\n", name));
1031 
1032 	// This one won't be implemented in a binary compatible BFS
1033 	return B_UNSUPPORTED;
1034 }
1035 
1036 
1037 status_t
1038 bfs_unlink(fs_volume* _volume, fs_vnode* _directory, const char* name)
1039 {
1040 	FUNCTION_START(("name = \"%s\"\n", name));
1041 
1042 	if (!strcmp(name, "..") || !strcmp(name, "."))
1043 		return B_NOT_ALLOWED;
1044 
1045 	Volume* volume = (Volume*)_volume->private_volume;
1046 	Inode* directory = (Inode*)_directory->private_node;
1047 
1048 	status_t status = directory->CheckPermissions(W_OK);
1049 	if (status < B_OK)
1050 		return status;
1051 
1052 	Transaction transaction(volume, directory->BlockNumber());
1053 
1054 	off_t id;
1055 	status = directory->Remove(transaction, name, &id);
1056 	if (status == B_OK) {
1057 		entry_cache_remove(volume->ID(), directory->ID(), name);
1058 
1059 		status = transaction.Done();
1060 		if (status == B_OK)
1061 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1062 		else
1063 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1064 	}
1065 	return status;
1066 }
1067 
1068 
1069 status_t
1070 bfs_rename(fs_volume* _volume, fs_vnode* _oldDir, const char* oldName,
1071 	fs_vnode* _newDir, const char* newName)
1072 {
1073 	FUNCTION_START(("oldDir = %p, oldName = \"%s\", newDir = %p, newName = "
1074 		"\"%s\"\n", _oldDir, oldName, _newDir, newName));
1075 
1076 	Volume* volume = (Volume*)_volume->private_volume;
1077 	Inode* oldDirectory = (Inode*)_oldDir->private_node;
1078 	Inode* newDirectory = (Inode*)_newDir->private_node;
1079 
1080 	// are we already done?
1081 	if (oldDirectory == newDirectory && !strcmp(oldName, newName))
1082 		return B_OK;
1083 
1084 	Transaction transaction(volume, oldDirectory->BlockNumber());
1085 
1086 	oldDirectory->WriteLockInTransaction(transaction);
1087 	if (oldDirectory != newDirectory)
1088 		newDirectory->WriteLockInTransaction(transaction);
1089 
1090 	// are we allowed to do what we've been told?
1091 	status_t status = oldDirectory->CheckPermissions(W_OK);
1092 	if (status == B_OK)
1093 		status = newDirectory->CheckPermissions(W_OK);
1094 	if (status != B_OK)
1095 		return status;
1096 
1097 	// Get the directory's tree, and a pointer to the inode which should be
1098 	// changed
1099 	BPlusTree* tree = oldDirectory->Tree();
1100 	if (tree == NULL)
1101 		RETURN_ERROR(B_BAD_VALUE);
1102 
1103 	off_t id;
1104 	status = tree->Find((const uint8*)oldName, strlen(oldName), &id);
1105 	if (status != B_OK)
1106 		RETURN_ERROR(status);
1107 
1108 	Vnode vnode(volume, id);
1109 	Inode* inode;
1110 	if (vnode.Get(&inode) != B_OK)
1111 		return B_IO_ERROR;
1112 
1113 	// Don't move a directory into one of its children - we soar up
1114 	// from the newDirectory to either the root node or the old
1115 	// directory, whichever comes first.
1116 	// If we meet our inode on that way, we have to bail out.
1117 
1118 	if (oldDirectory != newDirectory) {
1119 		ino_t parent = newDirectory->ID();
1120 		ino_t root = volume->RootNode()->ID();
1121 
1122 		while (true) {
1123 			if (parent == id)
1124 				return B_BAD_VALUE;
1125 			else if (parent == root || parent == oldDirectory->ID())
1126 				break;
1127 
1128 			Vnode vnode(volume, parent);
1129 			Inode* parentNode;
1130 			if (vnode.Get(&parentNode) != B_OK)
1131 				return B_ERROR;
1132 
1133 			parent = volume->ToVnode(parentNode->Parent());
1134 		}
1135 	}
1136 
1137 	// Everything okay? Then lets get to work...
1138 
1139 	// First, try to make sure there is nothing that will stop us in
1140 	// the target directory - since this is the only non-critical
1141 	// failure, we will test this case first
1142 	BPlusTree* newTree = tree;
1143 	if (newDirectory != oldDirectory) {
1144 		newTree = newDirectory->Tree();
1145 		if (newTree == NULL)
1146 			RETURN_ERROR(B_BAD_VALUE);
1147 	}
1148 
1149 	status = newTree->Insert(transaction, (const uint8*)newName,
1150 		strlen(newName), id);
1151 	if (status == B_NAME_IN_USE) {
1152 		// If there is already a file with that name, we have to remove
1153 		// it, as long it's not a directory with files in it
1154 		off_t clobber;
1155 		if (newTree->Find((const uint8*)newName, strlen(newName), &clobber)
1156 				< B_OK)
1157 			return B_NAME_IN_USE;
1158 		if (clobber == id)
1159 			return B_BAD_VALUE;
1160 
1161 		Vnode vnode(volume, clobber);
1162 		Inode* other;
1163 		if (vnode.Get(&other) < B_OK)
1164 			return B_NAME_IN_USE;
1165 
1166 		// only allowed, if either both nodes are directories or neither is
1167 		if (inode->IsDirectory() != other->IsDirectory())
1168 			return other->IsDirectory() ? B_IS_A_DIRECTORY : B_NOT_A_DIRECTORY;
1169 
1170 		status = newDirectory->Remove(transaction, newName, NULL,
1171 			other->IsDirectory());
1172 		if (status < B_OK)
1173 			return status;
1174 
1175 		entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1176 
1177 		notify_entry_removed(volume->ID(), newDirectory->ID(), newName,
1178 			clobber);
1179 
1180 		status = newTree->Insert(transaction, (const uint8*)newName,
1181 			strlen(newName), id);
1182 	}
1183 	if (status != B_OK)
1184 		return status;
1185 
1186 	inode->WriteLockInTransaction(transaction);
1187 
1188 	volume->UpdateLiveQueriesRenameMove(inode, oldDirectory->ID(), oldName,
1189 		newDirectory->ID(), newName);
1190 
1191 	// update the name only when they differ
1192 	if (strcmp(oldName, newName)) {
1193 		status = inode->SetName(transaction, newName);
1194 		if (status == B_OK) {
1195 			Index index(volume);
1196 			index.UpdateName(transaction, oldName, newName, inode);
1197 		}
1198 	}
1199 
1200 	if (status == B_OK) {
1201 		status = tree->Remove(transaction, (const uint8*)oldName,
1202 			strlen(oldName), id);
1203 		if (status == B_OK) {
1204 			inode->Parent() = newDirectory->BlockRun();
1205 
1206 			// if it's a directory, update the parent directory pointer
1207 			// in its tree if necessary
1208 			BPlusTree* movedTree = inode->Tree();
1209 			if (oldDirectory != newDirectory
1210 				&& inode->IsDirectory()
1211 				&& movedTree != NULL) {
1212 				status = movedTree->Replace(transaction, (const uint8*)"..",
1213 					2, newDirectory->ID());
1214 
1215 				if (status == B_OK) {
1216 					// update/add the cache entry for the parent
1217 					entry_cache_add(volume->ID(), id, "..", newDirectory->ID());
1218 				}
1219 			}
1220 
1221 			if (status == B_OK && newDirectory != oldDirectory)
1222 				status = oldDirectory->ContainerContentsChanged(transaction);
1223 			if (status == B_OK)
1224 				status = newDirectory->ContainerContentsChanged(transaction);
1225 
1226 			if (status == B_OK)
1227 				status = inode->WriteBack(transaction);
1228 
1229 			if (status == B_OK) {
1230 				entry_cache_remove(volume->ID(), oldDirectory->ID(), oldName);
1231 				entry_cache_add(volume->ID(), newDirectory->ID(), newName, id);
1232 
1233 				status = transaction.Done();
1234 				if (status == B_OK) {
1235 					notify_entry_moved(volume->ID(), oldDirectory->ID(),
1236 						oldName, newDirectory->ID(), newName, id);
1237 					return B_OK;
1238 				}
1239 
1240 				entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1241 				entry_cache_add(volume->ID(), oldDirectory->ID(), oldName, id);
1242 			}
1243 		}
1244 	}
1245 
1246 	return status;
1247 }
1248 
1249 
1250 static status_t
1251 bfs_open(fs_volume* _volume, fs_vnode* _node, int openMode, void** _cookie)
1252 {
1253 	FUNCTION();
1254 
1255 	Volume* volume = (Volume*)_volume->private_volume;
1256 	Inode* inode = (Inode*)_node->private_node;
1257 
1258 	// Opening a directory read-only is allowed, although you can't read
1259 	// any data from it.
1260 	if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
1261 		return B_IS_A_DIRECTORY;
1262 	if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
1263 		return B_NOT_A_DIRECTORY;
1264 
1265 	status_t status = inode->CheckPermissions(open_mode_to_access(openMode)
1266 		| ((openMode & O_TRUNC) != 0 ? W_OK : 0));
1267 	if (status != B_OK)
1268 		RETURN_ERROR(status);
1269 
1270 	file_cookie* cookie = new(std::nothrow) file_cookie;
1271 	if (cookie == NULL)
1272 		RETURN_ERROR(B_NO_MEMORY);
1273 	ObjectDeleter<file_cookie> cookieDeleter(cookie);
1274 
1275 	// initialize the cookie
1276 	cookie->open_mode = openMode & BFS_OPEN_MODE_USER_MASK;
1277 	cookie->last_size = inode->Size();
1278 	cookie->last_notification = system_time();
1279 
1280 	// Disable the file cache, if requested?
1281 	CObjectDeleter<void> fileCacheEnabler(file_cache_enable);
1282 	if ((openMode & O_NOCACHE) != 0 && inode->FileCache() != NULL) {
1283 		status = file_cache_disable(inode->FileCache());
1284 		if (status != B_OK)
1285 			return status;
1286 		fileCacheEnabler.SetTo(inode->FileCache());
1287 	}
1288 
1289 	// Should we truncate the file?
1290 	if ((openMode & O_TRUNC) != 0) {
1291 		if ((openMode & O_RWMASK) == O_RDONLY)
1292 			return B_NOT_ALLOWED;
1293 
1294 		Transaction transaction(volume, inode->BlockNumber());
1295 		inode->WriteLockInTransaction(transaction);
1296 
1297 		status_t status = inode->SetFileSize(transaction, 0);
1298 		if (status == B_OK)
1299 			status = inode->WriteBack(transaction);
1300 		if (status == B_OK)
1301 			status = transaction.Done();
1302 		if (status != B_OK)
1303 			return status;
1304 	}
1305 
1306 	fileCacheEnabler.Detach();
1307 	cookieDeleter.Detach();
1308 	*_cookie = cookie;
1309 	return B_OK;
1310 }
1311 
1312 
1313 static status_t
1314 bfs_read(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1315 	void* buffer, size_t* _length)
1316 {
1317 	//FUNCTION();
1318 	Inode* inode = (Inode*)_node->private_node;
1319 
1320 	if (!inode->HasUserAccessableStream()) {
1321 		*_length = 0;
1322 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1323 	}
1324 
1325 	return inode->ReadAt(pos, (uint8*)buffer, _length);
1326 }
1327 
1328 
1329 static status_t
1330 bfs_write(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1331 	const void* buffer, size_t* _length)
1332 {
1333 	//FUNCTION();
1334 	Volume* volume = (Volume*)_volume->private_volume;
1335 	Inode* inode = (Inode*)_node->private_node;
1336 
1337 	if (volume->IsReadOnly())
1338 		return B_READ_ONLY_DEVICE;
1339 
1340 	if (!inode->HasUserAccessableStream()) {
1341 		*_length = 0;
1342 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1343 	}
1344 
1345 	file_cookie* cookie = (file_cookie*)_cookie;
1346 
1347 	if (cookie->open_mode & O_APPEND)
1348 		pos = inode->Size();
1349 
1350 	Transaction transaction;
1351 		// We are not starting the transaction here, since
1352 		// it might not be needed at all (the contents of
1353 		// regular files aren't logged)
1354 
1355 	status_t status = inode->WriteAt(transaction, pos, (const uint8*)buffer,
1356 		_length);
1357 	if (status == B_OK)
1358 		status = transaction.Done();
1359 	if (status == B_OK) {
1360 		InodeReadLocker locker(inode);
1361 
1362 		// periodically notify if the file size has changed
1363 		// TODO: should we better test for a change in the last_modified time only?
1364 		if (!inode->IsDeleted() && cookie->last_size != inode->Size()
1365 			&& system_time() > cookie->last_notification
1366 					+ INODE_NOTIFICATION_INTERVAL) {
1367 			notify_stat_changed(volume->ID(), inode->ID(),
1368 				B_STAT_MODIFICATION_TIME | B_STAT_SIZE | B_STAT_INTERIM_UPDATE);
1369 			cookie->last_size = inode->Size();
1370 			cookie->last_notification = system_time();
1371 		}
1372 	}
1373 
1374 	return status;
1375 }
1376 
1377 
1378 static status_t
1379 bfs_close(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1380 {
1381 	FUNCTION();
1382 	return B_OK;
1383 }
1384 
1385 
1386 static status_t
1387 bfs_free_cookie(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1388 {
1389 	FUNCTION();
1390 
1391 	file_cookie* cookie = (file_cookie*)_cookie;
1392 	Volume* volume = (Volume*)_volume->private_volume;
1393 	Inode* inode = (Inode*)_node->private_node;
1394 
1395 	Transaction transaction;
1396 	bool needsTrimming = false;
1397 
1398 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()) {
1399 		InodeReadLocker locker(inode);
1400 		needsTrimming = inode->NeedsTrimming();
1401 
1402 		if ((cookie->open_mode & O_RWMASK) != 0
1403 			&& !inode->IsDeleted()
1404 			&& (needsTrimming
1405 				|| inode->OldLastModified() != inode->LastModified()
1406 				|| (inode->InSizeIndex()
1407 					// TODO: this can prevent the size update notification
1408 					// for nodes not in the index!
1409 					&& inode->OldSize() != inode->Size()))) {
1410 			locker.Unlock();
1411 			transaction.Start(volume, inode->BlockNumber());
1412 		}
1413 	}
1414 
1415 	status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
1416 
1417 	if (status == B_OK) {
1418 		inode->WriteLockInTransaction(transaction);
1419 
1420 		// trim the preallocated blocks and update the size,
1421 		// and last_modified indices if needed
1422 		bool changedSize = false, changedTime = false;
1423 		Index index(volume);
1424 
1425 		if (needsTrimming) {
1426 			status = inode->TrimPreallocation(transaction);
1427 			if (status < B_OK) {
1428 				FATAL(("Could not trim preallocated blocks: inode %" B_PRIdINO
1429 					", transaction %d: %s!\n", inode->ID(),
1430 					(int)transaction.ID(), strerror(status)));
1431 
1432 				// we still want this transaction to succeed
1433 				status = B_OK;
1434 			}
1435 		}
1436 		if (inode->OldSize() != inode->Size()) {
1437 			if (inode->InSizeIndex())
1438 				index.UpdateSize(transaction, inode);
1439 			changedSize = true;
1440 		}
1441 		if (inode->OldLastModified() != inode->LastModified()) {
1442 			if (inode->InLastModifiedIndex()) {
1443 				index.UpdateLastModified(transaction, inode,
1444 					inode->LastModified());
1445 			}
1446 			changedTime = true;
1447 
1448 			// updating the index doesn't write back the inode
1449 			inode->WriteBack(transaction);
1450 		}
1451 
1452 		if (changedSize || changedTime) {
1453 			notify_stat_changed(volume->ID(), inode->ID(),
1454 				(changedTime ? B_STAT_MODIFICATION_TIME : 0)
1455 				| (changedSize ? B_STAT_SIZE : 0));
1456 		}
1457 	}
1458 	if (status == B_OK)
1459 		transaction.Done();
1460 
1461 	if ((cookie->open_mode & BFS_OPEN_MODE_CHECKING) != 0) {
1462 		// "chkbfs" exited abnormally, so we have to stop it here...
1463 		FATAL(("check process was aborted!\n"));
1464 		volume->Allocator().StopChecking(NULL);
1465 	}
1466 
1467 	if ((cookie->open_mode & O_NOCACHE) != 0 && inode->FileCache() != NULL)
1468 		file_cache_enable(inode->FileCache());
1469 
1470 	delete cookie;
1471 	return B_OK;
1472 }
1473 
1474 
1475 /*!	Checks access permissions, return B_NOT_ALLOWED if the action
1476 	is not allowed.
1477 */
1478 static status_t
1479 bfs_access(fs_volume* _volume, fs_vnode* _node, int accessMode)
1480 {
1481 	//FUNCTION();
1482 
1483 	Inode* inode = (Inode*)_node->private_node;
1484 	status_t status = inode->CheckPermissions(accessMode);
1485 	if (status < B_OK)
1486 		RETURN_ERROR(status);
1487 
1488 	return B_OK;
1489 }
1490 
1491 
1492 static status_t
1493 bfs_read_link(fs_volume* _volume, fs_vnode* _node, char* buffer,
1494 	size_t* _bufferSize)
1495 {
1496 	FUNCTION();
1497 
1498 	Inode* inode = (Inode*)_node->private_node;
1499 
1500 	if (!inode->IsSymLink())
1501 		RETURN_ERROR(B_BAD_VALUE);
1502 
1503 	if ((inode->Flags() & INODE_LONG_SYMLINK) != 0) {
1504 		if ((uint64)inode->Size() < (uint64)*_bufferSize)
1505 			*_bufferSize = inode->Size();
1506 
1507 		status_t status = inode->ReadAt(0, (uint8*)buffer, _bufferSize);
1508 		if (status < B_OK)
1509 			RETURN_ERROR(status);
1510 
1511 		return B_OK;
1512 	}
1513 
1514 	size_t linkLen = strlen(inode->Node().short_symlink);
1515 	if (linkLen < *_bufferSize)
1516 		*_bufferSize = linkLen;
1517 
1518 	return user_memcpy(buffer, inode->Node().short_symlink, *_bufferSize);
1519 }
1520 
1521 
1522 //	#pragma mark - Directory functions
1523 
1524 
1525 static status_t
1526 bfs_create_dir(fs_volume* _volume, fs_vnode* _directory, const char* name,
1527 	int mode)
1528 {
1529 	FUNCTION_START(("name = \"%s\", perms = %d\n", name, mode));
1530 
1531 	Volume* volume = (Volume*)_volume->private_volume;
1532 	Inode* directory = (Inode*)_directory->private_node;
1533 
1534 	if (volume->IsReadOnly())
1535 		return B_READ_ONLY_DEVICE;
1536 
1537 	if (!directory->IsDirectory())
1538 		RETURN_ERROR(B_BAD_TYPE);
1539 
1540 	status_t status = directory->CheckPermissions(W_OK);
1541 	if (status < B_OK)
1542 		RETURN_ERROR(status);
1543 
1544 	Transaction transaction(volume, directory->BlockNumber());
1545 
1546 	// Inode::Create() locks the inode if we pass the "id" parameter, but we
1547 	// need it anyway
1548 	off_t id;
1549 	status = Inode::Create(transaction, directory, name,
1550 		S_DIRECTORY | (mode & S_IUMSK), 0, 0, NULL, &id);
1551 	if (status == B_OK) {
1552 		put_vnode(volume->FSVolume(), id);
1553 
1554 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1555 
1556 		status = transaction.Done();
1557 		if (status == B_OK)
1558 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1559 		else
1560 			entry_cache_remove(volume->ID(), directory->ID(), name);
1561 	}
1562 
1563 	return status;
1564 }
1565 
1566 
1567 static status_t
1568 bfs_remove_dir(fs_volume* _volume, fs_vnode* _directory, const char* name)
1569 {
1570 	FUNCTION_START(("name = \"%s\"\n", name));
1571 
1572 	Volume* volume = (Volume*)_volume->private_volume;
1573 	Inode* directory = (Inode*)_directory->private_node;
1574 
1575 	Transaction transaction(volume, directory->BlockNumber());
1576 
1577 	off_t id;
1578 	status_t status = directory->Remove(transaction, name, &id, true);
1579 	if (status == B_OK) {
1580 		// Remove the cache entry for the directory and potentially also
1581 		// the parent entry still belonging to the directory
1582 		entry_cache_remove(volume->ID(), directory->ID(), name);
1583 		entry_cache_remove(volume->ID(), id, "..");
1584 
1585 		status = transaction.Done();
1586 		if (status == B_OK)
1587 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1588 		else {
1589 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1590 			entry_cache_add(volume->ID(), id, "..", id);
1591 		}
1592 	}
1593 
1594 	return status;
1595 }
1596 
1597 
1598 /*!	Opens a directory ready to be traversed.
1599 	bfs_open_dir() is also used by bfs_open_index_dir().
1600 */
1601 static status_t
1602 bfs_open_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1603 {
1604 	FUNCTION();
1605 
1606 	Inode* inode = (Inode*)_node->private_node;
1607 	status_t status = inode->CheckPermissions(R_OK);
1608 	if (status < B_OK)
1609 		RETURN_ERROR(status);
1610 
1611 	// we don't ask here for directories only, because the bfs_open_index_dir()
1612 	// function utilizes us (so we must be able to open indices as well)
1613 	if (!inode->IsContainer())
1614 		RETURN_ERROR(B_NOT_A_DIRECTORY);
1615 
1616 	BPlusTree* tree = inode->Tree();
1617 	if (tree == NULL)
1618 		RETURN_ERROR(B_BAD_VALUE);
1619 
1620 	TreeIterator* iterator = new(std::nothrow) TreeIterator(tree);
1621 	if (iterator == NULL)
1622 		RETURN_ERROR(B_NO_MEMORY);
1623 
1624 	*_cookie = iterator;
1625 	return B_OK;
1626 }
1627 
1628 
1629 static status_t
1630 bfs_read_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie,
1631 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1632 {
1633 	FUNCTION();
1634 
1635 	TreeIterator* iterator = (TreeIterator*)_cookie;
1636 
1637 	uint16 length;
1638 	ino_t id;
1639 	status_t status = iterator->GetNextEntry(dirent->d_name, &length,
1640 		bufferSize, &id);
1641 	if (status == B_ENTRY_NOT_FOUND) {
1642 		*_num = 0;
1643 		return B_OK;
1644 	} else if (status != B_OK)
1645 		RETURN_ERROR(status);
1646 
1647 	Volume* volume = (Volume*)_volume->private_volume;
1648 
1649 	dirent->d_dev = volume->ID();
1650 	dirent->d_ino = id;
1651 
1652 	dirent->d_reclen = sizeof(struct dirent) + length;
1653 
1654 	*_num = 1;
1655 	return B_OK;
1656 }
1657 
1658 
1659 /*!	Sets the TreeIterator back to the beginning of the directory. */
1660 static status_t
1661 bfs_rewind_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* _cookie)
1662 {
1663 	FUNCTION();
1664 	TreeIterator* iterator = (TreeIterator*)_cookie;
1665 
1666 	return iterator->Rewind();
1667 }
1668 
1669 
1670 static status_t
1671 bfs_close_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* /*_cookie*/)
1672 {
1673 	FUNCTION();
1674 	return B_OK;
1675 }
1676 
1677 
1678 static status_t
1679 bfs_free_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1680 {
1681 	delete (TreeIterator*)_cookie;
1682 	return B_OK;
1683 }
1684 
1685 
1686 //	#pragma mark - Attribute functions
1687 
1688 
1689 static status_t
1690 bfs_open_attr_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1691 {
1692 	Inode* inode = (Inode*)_node->private_node;
1693 
1694 	FUNCTION();
1695 
1696 	AttributeIterator* iterator = new(std::nothrow) AttributeIterator(inode);
1697 	if (iterator == NULL)
1698 		RETURN_ERROR(B_NO_MEMORY);
1699 
1700 	*_cookie = iterator;
1701 	return B_OK;
1702 }
1703 
1704 
1705 static status_t
1706 bfs_close_attr_dir(fs_volume* _volume, fs_vnode* node, void* cookie)
1707 {
1708 	FUNCTION();
1709 	return B_OK;
1710 }
1711 
1712 
1713 static status_t
1714 bfs_free_attr_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1715 {
1716 	FUNCTION();
1717 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1718 
1719 	delete iterator;
1720 	return B_OK;
1721 }
1722 
1723 
1724 static status_t
1725 bfs_rewind_attr_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1726 {
1727 	FUNCTION();
1728 
1729 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1730 	RETURN_ERROR(iterator->Rewind());
1731 }
1732 
1733 
1734 static status_t
1735 bfs_read_attr_dir(fs_volume* _volume, fs_vnode* node, void* _cookie,
1736 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1737 {
1738 	FUNCTION();
1739 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1740 
1741 	uint32 type;
1742 	size_t length;
1743 	status_t status = iterator->GetNext(dirent->d_name, &length, &type,
1744 		&dirent->d_ino);
1745 	if (status == B_ENTRY_NOT_FOUND) {
1746 		*_num = 0;
1747 		return B_OK;
1748 	} else if (status != B_OK) {
1749 		RETURN_ERROR(status);
1750 	}
1751 
1752 	Volume* volume = (Volume*)_volume->private_volume;
1753 
1754 	dirent->d_dev = volume->ID();
1755 	dirent->d_reclen = sizeof(struct dirent) + length;
1756 
1757 	*_num = 1;
1758 	return B_OK;
1759 }
1760 
1761 
1762 static status_t
1763 bfs_create_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1764 	uint32 type, int openMode, void** _cookie)
1765 {
1766 	FUNCTION();
1767 
1768 	Volume* volume = (Volume*)_volume->private_volume;
1769 	if (volume->IsReadOnly())
1770 		return B_READ_ONLY_DEVICE;
1771 
1772 	Inode* inode = (Inode*)_node->private_node;
1773 	Attribute attribute(inode);
1774 
1775 	return attribute.Create(name, type, openMode, (attr_cookie**)_cookie);
1776 }
1777 
1778 
1779 static status_t
1780 bfs_open_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1781 	int openMode, void** _cookie)
1782 {
1783 	FUNCTION();
1784 
1785 	Inode* inode = (Inode*)_node->private_node;
1786 	Attribute attribute(inode);
1787 
1788 	return attribute.Open(name, openMode, (attr_cookie**)_cookie);
1789 }
1790 
1791 
1792 static status_t
1793 bfs_close_attr(fs_volume* _volume, fs_vnode* _file, void* cookie)
1794 {
1795 	return B_OK;
1796 }
1797 
1798 
1799 static status_t
1800 bfs_free_attr_cookie(fs_volume* _volume, fs_vnode* _file, void* cookie)
1801 {
1802 	delete (attr_cookie*)cookie;
1803 	return B_OK;
1804 }
1805 
1806 
1807 static status_t
1808 bfs_read_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie, off_t pos,
1809 	void* buffer, size_t* _length)
1810 {
1811 	FUNCTION();
1812 
1813 	attr_cookie* cookie = (attr_cookie*)_cookie;
1814 	Inode* inode = (Inode*)_file->private_node;
1815 
1816 	Attribute attribute(inode, cookie);
1817 
1818 	return attribute.Read(cookie, pos, (uint8*)buffer, _length);
1819 }
1820 
1821 
1822 static status_t
1823 bfs_write_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1824 	off_t pos, const void* buffer, size_t* _length)
1825 {
1826 	FUNCTION();
1827 
1828 	attr_cookie* cookie = (attr_cookie*)_cookie;
1829 	Volume* volume = (Volume*)_volume->private_volume;
1830 	Inode* inode = (Inode*)_file->private_node;
1831 
1832 	Transaction transaction(volume, inode->BlockNumber());
1833 	Attribute attribute(inode, cookie);
1834 
1835 	bool created;
1836 	status_t status = attribute.Write(transaction, cookie, pos,
1837 		(const uint8*)buffer, _length, &created);
1838 	if (status == B_OK) {
1839 		status = transaction.Done();
1840 		if (status == B_OK) {
1841 			notify_attribute_changed(volume->ID(), inode->ID(), cookie->name,
1842 				created ? B_ATTR_CREATED : B_ATTR_CHANGED);
1843 			notify_stat_changed(volume->ID(), inode->ID(), B_STAT_CHANGE_TIME);
1844 		}
1845 	}
1846 
1847 	return status;
1848 }
1849 
1850 
1851 static status_t
1852 bfs_read_attr_stat(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1853 	struct stat* stat)
1854 {
1855 	FUNCTION();
1856 
1857 	attr_cookie* cookie = (attr_cookie*)_cookie;
1858 	Inode* inode = (Inode*)_file->private_node;
1859 
1860 	Attribute attribute(inode, cookie);
1861 
1862 	return attribute.Stat(*stat);
1863 }
1864 
1865 
1866 static status_t
1867 bfs_write_attr_stat(fs_volume* _volume, fs_vnode* file, void* cookie,
1868 	const struct stat* stat, int statMask)
1869 {
1870 	// TODO: Implement (at least setting the size)!
1871 	return EOPNOTSUPP;
1872 }
1873 
1874 
1875 static status_t
1876 bfs_rename_attr(fs_volume* _volume, fs_vnode* fromFile, const char* fromName,
1877 	fs_vnode* toFile, const char* toName)
1878 {
1879 	FUNCTION_START(("name = \"%s\", to = \"%s\"\n", fromName, toName));
1880 
1881 	// TODO: implement bfs_rename_attr()!
1882 	// There will probably be an API to move one attribute to another file,
1883 	// making that function much more complicated - oh joy ;-)
1884 
1885 	return EOPNOTSUPP;
1886 }
1887 
1888 
1889 static status_t
1890 bfs_remove_attr(fs_volume* _volume, fs_vnode* _node, const char* name)
1891 {
1892 	FUNCTION_START(("name = \"%s\"\n", name));
1893 
1894 	Volume* volume = (Volume*)_volume->private_volume;
1895 	Inode* inode = (Inode*)_node->private_node;
1896 
1897 	status_t status = inode->CheckPermissions(W_OK);
1898 	if (status != B_OK)
1899 		return status;
1900 
1901 	Transaction transaction(volume, inode->BlockNumber());
1902 
1903 	status = inode->RemoveAttribute(transaction, name);
1904 	if (status == B_OK)
1905 		status = transaction.Done();
1906 	if (status == B_OK) {
1907 		notify_attribute_changed(volume->ID(), inode->ID(), name,
1908 			B_ATTR_REMOVED);
1909 	}
1910 
1911 	return status;
1912 }
1913 
1914 
1915 //	#pragma mark - Special Nodes
1916 
1917 
1918 status_t
1919 bfs_create_special_node(fs_volume* _volume, fs_vnode* _directory,
1920 	const char* name, fs_vnode* subVnode, mode_t mode, uint32 flags,
1921 	fs_vnode* _superVnode, ino_t* _nodeID)
1922 {
1923 	// no need to support entry-less nodes
1924 	if (name == NULL)
1925 		return B_UNSUPPORTED;
1926 
1927 	FUNCTION_START(("name = \"%s\", mode = %d, flags = 0x%lx, subVnode: %p\n",
1928 		name, mode, flags, subVnode));
1929 
1930 	Volume* volume = (Volume*)_volume->private_volume;
1931 	Inode* directory = (Inode*)_directory->private_node;
1932 
1933 	if (volume->IsReadOnly())
1934 		return B_READ_ONLY_DEVICE;
1935 
1936 	if (!directory->IsDirectory())
1937 		RETURN_ERROR(B_BAD_TYPE);
1938 
1939 	status_t status = directory->CheckPermissions(W_OK);
1940 	if (status < B_OK)
1941 		RETURN_ERROR(status);
1942 
1943 	Transaction transaction(volume, directory->BlockNumber());
1944 
1945 	off_t id;
1946 	Inode* inode;
1947 	status = Inode::Create(transaction, directory, name, mode, O_EXCL, 0, NULL,
1948 		&id, &inode, subVnode ? subVnode->ops : NULL, flags);
1949 	if (status == B_OK) {
1950 		_superVnode->private_node = inode;
1951 		_superVnode->ops = &gBFSVnodeOps;
1952 		*_nodeID = id;
1953 
1954 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1955 
1956 		status = transaction.Done();
1957 		if (status == B_OK)
1958 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1959 		else
1960 			entry_cache_remove(volume->ID(), directory->ID(), name);
1961 	}
1962 
1963 	return status;
1964 }
1965 
1966 
1967 //	#pragma mark - Index functions
1968 
1969 
1970 static status_t
1971 bfs_open_index_dir(fs_volume* _volume, void** _cookie)
1972 {
1973 	FUNCTION();
1974 
1975 	Volume* volume = (Volume*)_volume->private_volume;
1976 
1977 	if (volume->IndicesNode() == NULL) {
1978 		// This volume does not have any indices
1979 		RETURN_ERROR(B_ENTRY_NOT_FOUND);
1980 	}
1981 
1982 	// Since the indices root node is just a directory, and we are storing
1983 	// a pointer to it in our Volume object, we can just use the directory
1984 	// traversal functions.
1985 	// In fact we're storing it in the Volume object for that reason.
1986 
1987 	fs_vnode indicesNode;
1988 	indicesNode.private_node = volume->IndicesNode();
1989 
1990 	RETURN_ERROR(bfs_open_dir(_volume, &indicesNode, _cookie));
1991 }
1992 
1993 
1994 static status_t
1995 bfs_close_index_dir(fs_volume* _volume, void* _cookie)
1996 {
1997 	FUNCTION();
1998 
1999 	Volume* volume = (Volume*)_volume->private_volume;
2000 
2001 	fs_vnode indicesNode;
2002 	indicesNode.private_node = volume->IndicesNode();
2003 
2004 	RETURN_ERROR(bfs_close_dir(_volume, &indicesNode, _cookie));
2005 }
2006 
2007 
2008 static status_t
2009 bfs_free_index_dir_cookie(fs_volume* _volume, void* _cookie)
2010 {
2011 	FUNCTION();
2012 
2013 	Volume* volume = (Volume*)_volume->private_volume;
2014 
2015 	fs_vnode indicesNode;
2016 	indicesNode.private_node = volume->IndicesNode();
2017 
2018 	RETURN_ERROR(bfs_free_dir_cookie(_volume, &indicesNode, _cookie));
2019 }
2020 
2021 
2022 static status_t
2023 bfs_rewind_index_dir(fs_volume* _volume, void* _cookie)
2024 {
2025 	FUNCTION();
2026 
2027 	Volume* volume = (Volume*)_volume->private_volume;
2028 
2029 	fs_vnode indicesNode;
2030 	indicesNode.private_node = volume->IndicesNode();
2031 
2032 	RETURN_ERROR(bfs_rewind_dir(_volume, &indicesNode, _cookie));
2033 }
2034 
2035 
2036 static status_t
2037 bfs_read_index_dir(fs_volume* _volume, void* _cookie, struct dirent* dirent,
2038 	size_t bufferSize, uint32* _num)
2039 {
2040 	FUNCTION();
2041 
2042 	Volume* volume = (Volume*)_volume->private_volume;
2043 
2044 	fs_vnode indicesNode;
2045 	indicesNode.private_node = volume->IndicesNode();
2046 
2047 	RETURN_ERROR(bfs_read_dir(_volume, &indicesNode, _cookie, dirent,
2048 		bufferSize, _num));
2049 }
2050 
2051 
2052 static status_t
2053 bfs_create_index(fs_volume* _volume, const char* name, uint32 type,
2054 	uint32 flags)
2055 {
2056 	FUNCTION_START(("name = \"%s\", type = %ld, flags = %ld\n", name, type, flags));
2057 
2058 	Volume* volume = (Volume*)_volume->private_volume;
2059 
2060 	if (volume->IsReadOnly())
2061 		return B_READ_ONLY_DEVICE;
2062 
2063 	// only root users are allowed to create indices
2064 	if (geteuid() != 0)
2065 		return B_NOT_ALLOWED;
2066 
2067 	Transaction transaction(volume, volume->Indices());
2068 
2069 	Index index(volume);
2070 	status_t status = index.Create(transaction, name, type);
2071 
2072 	if (status == B_OK)
2073 		status = transaction.Done();
2074 
2075 	RETURN_ERROR(status);
2076 }
2077 
2078 
2079 static status_t
2080 bfs_remove_index(fs_volume* _volume, const char* name)
2081 {
2082 	FUNCTION();
2083 
2084 	Volume* volume = (Volume*)_volume->private_volume;
2085 
2086 	if (volume->IsReadOnly())
2087 		return B_READ_ONLY_DEVICE;
2088 
2089 	// only root users are allowed to remove indices
2090 	if (geteuid() != 0)
2091 		return B_NOT_ALLOWED;
2092 
2093 	Inode* indices = volume->IndicesNode();
2094 	if (indices == NULL)
2095 		return B_ENTRY_NOT_FOUND;
2096 
2097 	Transaction transaction(volume, volume->Indices());
2098 
2099 	status_t status = indices->Remove(transaction, name);
2100 	if (status == B_OK)
2101 		status = transaction.Done();
2102 
2103 	RETURN_ERROR(status);
2104 }
2105 
2106 
2107 static status_t
2108 bfs_stat_index(fs_volume* _volume, const char* name, struct stat* stat)
2109 {
2110 	FUNCTION_START(("name = %s\n", name));
2111 
2112 	Volume* volume = (Volume*)_volume->private_volume;
2113 
2114 	Index index(volume);
2115 	status_t status = index.SetTo(name);
2116 	if (status < B_OK)
2117 		RETURN_ERROR(status);
2118 
2119 	bfs_inode& node = index.Node()->Node();
2120 
2121 	stat->st_type = index.Type();
2122 	stat->st_mode = node.Mode();
2123 
2124 	stat->st_size = node.data.Size();
2125 	stat->st_blocks = index.Node()->AllocatedSize() / 512;
2126 
2127 	stat->st_nlink = 1;
2128 	stat->st_blksize = 65536;
2129 
2130 	stat->st_uid = node.UserID();
2131 	stat->st_gid = node.GroupID();
2132 
2133 	fill_stat_time(node, *stat);
2134 
2135 	return B_OK;
2136 }
2137 
2138 
2139 //	#pragma mark - Query functions
2140 
2141 
2142 static status_t
2143 bfs_open_query(fs_volume* _volume, const char* queryString, uint32 flags,
2144 	port_id port, uint32 token, void** _cookie)
2145 {
2146 	FUNCTION_START(("bfs_open_query(\"%s\", flags = %lu, port_id = %ld, token = %ld)\n",
2147 		queryString, flags, port, token));
2148 
2149 	Volume* volume = (Volume*)_volume->private_volume;
2150 
2151 	Expression* expression = new(std::nothrow) Expression((char*)queryString);
2152 	if (expression == NULL)
2153 		RETURN_ERROR(B_NO_MEMORY);
2154 
2155 	if (expression->InitCheck() < B_OK) {
2156 		INFORM(("Could not parse query \"%s\", stopped at: \"%s\"\n",
2157 			queryString, expression->Position()));
2158 
2159 		delete expression;
2160 		RETURN_ERROR(B_BAD_VALUE);
2161 	}
2162 
2163 	Query* query = new(std::nothrow) Query(volume, expression, flags);
2164 	if (query == NULL) {
2165 		delete expression;
2166 		RETURN_ERROR(B_NO_MEMORY);
2167 	}
2168 
2169 	if (flags & B_LIVE_QUERY)
2170 		query->SetLiveMode(port, token);
2171 
2172 	*_cookie = (void*)query;
2173 
2174 	return B_OK;
2175 }
2176 
2177 
2178 static status_t
2179 bfs_close_query(fs_volume* _volume, void* cookie)
2180 {
2181 	FUNCTION();
2182 	return B_OK;
2183 }
2184 
2185 
2186 static status_t
2187 bfs_free_query_cookie(fs_volume* _volume, void* cookie)
2188 {
2189 	FUNCTION();
2190 
2191 	Query* query = (Query*)cookie;
2192 	Expression* expression = query->GetExpression();
2193 	delete query;
2194 	delete expression;
2195 
2196 	return B_OK;
2197 }
2198 
2199 
2200 static status_t
2201 bfs_read_query(fs_volume* /*_volume*/, void* cookie, struct dirent* dirent,
2202 	size_t bufferSize, uint32* _num)
2203 {
2204 	FUNCTION();
2205 	Query* query = (Query*)cookie;
2206 	status_t status = query->GetNextEntry(dirent, bufferSize);
2207 	if (status == B_OK)
2208 		*_num = 1;
2209 	else if (status == B_ENTRY_NOT_FOUND)
2210 		*_num = 0;
2211 	else
2212 		return status;
2213 
2214 	return B_OK;
2215 }
2216 
2217 
2218 static status_t
2219 bfs_rewind_query(fs_volume* /*_volume*/, void* cookie)
2220 {
2221 	FUNCTION();
2222 
2223 	Query* query = (Query*)cookie;
2224 	return query->Rewind();
2225 }
2226 
2227 
2228 //	#pragma mark -
2229 
2230 
2231 static uint32
2232 bfs_get_supported_operations(partition_data* partition, uint32 mask)
2233 {
2234 	// TODO: We should at least check the partition size.
2235 	return B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2236 		| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2237 		| B_DISK_SYSTEM_SUPPORTS_WRITING;
2238 }
2239 
2240 
2241 static status_t
2242 bfs_initialize(int fd, partition_id partitionID, const char* name,
2243 	const char* parameterString, off_t /*partitionSize*/, disk_job_id job)
2244 {
2245 	// check name
2246 	status_t status = check_volume_name(name);
2247 	if (status != B_OK)
2248 		return status;
2249 
2250 	// parse parameters
2251 	initialize_parameters parameters;
2252 	status = parse_initialize_parameters(parameterString, parameters);
2253 	if (status != B_OK)
2254 		return status;
2255 
2256 	update_disk_device_job_progress(job, 0);
2257 
2258 	// initialize the volume
2259 	Volume volume(NULL);
2260 	status = volume.Initialize(fd, name, parameters.blockSize,
2261 		parameters.flags);
2262 	if (status < B_OK) {
2263 		INFORM(("Initializing volume failed: %s\n", strerror(status)));
2264 		return status;
2265 	}
2266 
2267 	// rescan partition
2268 	status = scan_partition(partitionID);
2269 	if (status != B_OK)
2270 		return status;
2271 
2272 	update_disk_device_job_progress(job, 1);
2273 
2274 	// print some info, if desired
2275 	if (parameters.verbose) {
2276 		disk_super_block super = volume.SuperBlock();
2277 
2278 		INFORM(("Disk was initialized successfully.\n"));
2279 		INFORM(("\tname: \"%s\"\n", super.name));
2280 		INFORM(("\tnum blocks: %" B_PRIdOFF "\n", super.NumBlocks()));
2281 		INFORM(("\tused blocks: %" B_PRIdOFF "\n", super.UsedBlocks()));
2282 		INFORM(("\tblock size: %u bytes\n", (unsigned)super.BlockSize()));
2283 		INFORM(("\tnum allocation groups: %d\n",
2284 			(int)super.AllocationGroups()));
2285 		INFORM(("\tallocation group size: %ld blocks\n",
2286 			1L << super.AllocationGroupShift()));
2287 		INFORM(("\tlog size: %u blocks\n", super.log_blocks.Length()));
2288 	}
2289 
2290 	return B_OK;
2291 }
2292 
2293 
2294 static status_t
2295 bfs_uninitialize(int fd, partition_id partitionID, off_t partitionSize,
2296 	uint32 blockSize, disk_job_id job)
2297 {
2298 	if (blockSize == 0)
2299 		return B_BAD_VALUE;
2300 
2301 	update_disk_device_job_progress(job, 0.0);
2302 
2303 	// just overwrite the superblock
2304 	disk_super_block superBlock;
2305 	memset(&superBlock, 0, sizeof(superBlock));
2306 
2307 	if (write_pos(fd, 512, &superBlock, sizeof(superBlock)) < 0)
2308 		return errno;
2309 
2310 	update_disk_device_job_progress(job, 1.0);
2311 
2312 	return B_OK;
2313 }
2314 
2315 
2316 //	#pragma mark -
2317 
2318 
2319 static status_t
2320 bfs_std_ops(int32 op, ...)
2321 {
2322 	switch (op) {
2323 		case B_MODULE_INIT:
2324 #ifdef BFS_DEBUGGER_COMMANDS
2325 			add_debugger_commands();
2326 #endif
2327 			return B_OK;
2328 		case B_MODULE_UNINIT:
2329 #ifdef BFS_DEBUGGER_COMMANDS
2330 			remove_debugger_commands();
2331 #endif
2332 			return B_OK;
2333 
2334 		default:
2335 			return B_ERROR;
2336 	}
2337 }
2338 
2339 fs_volume_ops gBFSVolumeOps = {
2340 	&bfs_unmount,
2341 	&bfs_read_fs_stat,
2342 	&bfs_write_fs_stat,
2343 	&bfs_sync,
2344 	&bfs_get_vnode,
2345 
2346 	/* index directory & index operations */
2347 	&bfs_open_index_dir,
2348 	&bfs_close_index_dir,
2349 	&bfs_free_index_dir_cookie,
2350 	&bfs_read_index_dir,
2351 	&bfs_rewind_index_dir,
2352 
2353 	&bfs_create_index,
2354 	&bfs_remove_index,
2355 	&bfs_stat_index,
2356 
2357 	/* query operations */
2358 	&bfs_open_query,
2359 	&bfs_close_query,
2360 	&bfs_free_query_cookie,
2361 	&bfs_read_query,
2362 	&bfs_rewind_query,
2363 };
2364 
2365 fs_vnode_ops gBFSVnodeOps = {
2366 	/* vnode operations */
2367 	&bfs_lookup,
2368 	&bfs_get_vnode_name,
2369 	&bfs_put_vnode,
2370 	&bfs_remove_vnode,
2371 
2372 	/* VM file access */
2373 	&bfs_can_page,
2374 	&bfs_read_pages,
2375 	&bfs_write_pages,
2376 
2377 	&bfs_io,
2378 	NULL,	// cancel_io()
2379 
2380 	&bfs_get_file_map,
2381 
2382 	&bfs_ioctl,
2383 	&bfs_set_flags,
2384 	NULL,	// fs_select
2385 	NULL,	// fs_deselect
2386 	&bfs_fsync,
2387 
2388 	&bfs_read_link,
2389 	&bfs_create_symlink,
2390 
2391 	&bfs_link,
2392 	&bfs_unlink,
2393 	&bfs_rename,
2394 
2395 	&bfs_access,
2396 	&bfs_read_stat,
2397 	&bfs_write_stat,
2398 	NULL,	// fs_preallocate
2399 
2400 	/* file operations */
2401 	&bfs_create,
2402 	&bfs_open,
2403 	&bfs_close,
2404 	&bfs_free_cookie,
2405 	&bfs_read,
2406 	&bfs_write,
2407 
2408 	/* directory operations */
2409 	&bfs_create_dir,
2410 	&bfs_remove_dir,
2411 	&bfs_open_dir,
2412 	&bfs_close_dir,
2413 	&bfs_free_dir_cookie,
2414 	&bfs_read_dir,
2415 	&bfs_rewind_dir,
2416 
2417 	/* attribute directory operations */
2418 	&bfs_open_attr_dir,
2419 	&bfs_close_attr_dir,
2420 	&bfs_free_attr_dir_cookie,
2421 	&bfs_read_attr_dir,
2422 	&bfs_rewind_attr_dir,
2423 
2424 	/* attribute operations */
2425 	&bfs_create_attr,
2426 	&bfs_open_attr,
2427 	&bfs_close_attr,
2428 	&bfs_free_attr_cookie,
2429 	&bfs_read_attr,
2430 	&bfs_write_attr,
2431 
2432 	&bfs_read_attr_stat,
2433 	&bfs_write_attr_stat,
2434 	&bfs_rename_attr,
2435 	&bfs_remove_attr,
2436 
2437 	/* special nodes */
2438 	&bfs_create_special_node
2439 };
2440 
2441 static file_system_module_info sBeFileSystem = {
2442 	{
2443 		"file_systems/bfs" B_CURRENT_FS_API_VERSION,
2444 		0,
2445 		bfs_std_ops,
2446 	},
2447 
2448 	"bfs",						// short_name
2449 	"Be File System",			// pretty_name
2450 
2451 	// DDM flags
2452 	0
2453 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING
2454 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING
2455 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING
2456 //	| B_DISK_SYSTEM_SUPPORTS_MOVING
2457 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME
2458 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS
2459 	| B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2460 	| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2461 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING
2462 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING_WHILE_MOUNTED
2463 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING_WHILE_MOUNTED
2464 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING_WHILE_MOUNTED
2465 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING_WHILE_MOUNTED
2466 //	| B_DISK_SYSTEM_SUPPORTS_MOVING_WHILE_MOUNTED
2467 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME_WHILE_MOUNTED
2468 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS_WHILE_MOUNTED
2469 	| B_DISK_SYSTEM_SUPPORTS_WRITING
2470 	,
2471 
2472 	// scanning
2473 	bfs_identify_partition,
2474 	bfs_scan_partition,
2475 	bfs_free_identify_partition_cookie,
2476 	NULL,	// free_partition_content_cookie()
2477 
2478 	&bfs_mount,
2479 
2480 	/* capability querying operations */
2481 	&bfs_get_supported_operations,
2482 
2483 	NULL,	// validate_resize
2484 	NULL,	// validate_move
2485 	NULL,	// validate_set_content_name
2486 	NULL,	// validate_set_content_parameters
2487 	NULL,	// validate_initialize,
2488 
2489 	/* shadow partition modification */
2490 	NULL,	// shadow_changed
2491 
2492 	/* writing */
2493 	NULL,	// defragment
2494 	NULL,	// repair
2495 	NULL,	// resize
2496 	NULL,	// move
2497 	NULL,	// set_content_name
2498 	NULL,	// set_content_parameters
2499 	bfs_initialize,
2500 	bfs_uninitialize
2501 };
2502 
2503 module_info* modules[] = {
2504 	(module_info*)&sBeFileSystem,
2505 	NULL,
2506 };
2507