xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 71f92c6439bddce17ccd7121d4ba7ff716617b1c)
1 /*
2  * Copyright 2005-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2011, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and covers_vnode fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the covers_vnode, and thus
130 	making the access path vnode->mount->covers_vnode->mount->... safe if a
131 	reference to vnode is held (note that for the root mount covers_vnode
132 	is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
224 
225 	The thread trying to lock the lock must not hold sVnodeLock or
226 	sMountMutex.
227 */
228 static recursive_lock sMountOpLock;
229 
230 /*!	\brief Guards sVnodeTable.
231 
232 	The holder is allowed read/write access to sVnodeTable and to
233 	any unbusy vnode in that table, save to the immutable fields (device, id,
234 	private_node, mount) to which only read-only access is allowed.
235 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
236 	well as the busy, removed, unused flags, and the vnode's type can also be
237 	write access when holding a read lock to sVnodeLock *and* having the vnode
238 	locked. Writing access to covered_by requires to write lock sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not have this lock held when calling create_sem(), as this
242 	might call vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, uint32 op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
549 		status_t status, generic_size_t bytesTransferred)
550 		:
551 		fVnode(vnode),
552 		fMountID(vnode->mount->id),
553 		fNodeID(vnode->id),
554 		fCookie(cookie),
555 		fPos(pos),
556 		fCount(count),
557 		fFlags(flags),
558 		fBytesRequested(bytesRequested),
559 		fStatus(status),
560 		fBytesTransferred(bytesTransferred)
561 	{
562 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs, sizeof(generic_io_vec) * count,
563 			false);
564 	}
565 
566 	void AddDump(TraceOutput& out, const char* mode)
567 	{
568 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
569 			"pos: %lld, size: %llu, vecs: {", mode, fVnode, fMountID, fNodeID,
570 			fCookie, fPos, (uint64)fBytesRequested);
571 
572 		if (fVecs != NULL) {
573 			for (uint32 i = 0; i < fCount; i++) {
574 				if (i > 0)
575 					out.Print(", ");
576 				out.Print("(%llx, %llu)", (uint64)fVecs[i].base, (uint64)fVecs[i].length);
577 			}
578 		}
579 
580 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %llu",
581 			fFlags, fStatus, (uint64)fBytesTransferred);
582 	}
583 
584 protected:
585 	struct vnode*	fVnode;
586 	dev_t			fMountID;
587 	ino_t			fNodeID;
588 	void*			fCookie;
589 	off_t			fPos;
590 	generic_io_vec*		fVecs;
591 	uint32			fCount;
592 	uint32			fFlags;
593 	generic_size_t			fBytesRequested;
594 	status_t		fStatus;
595 	generic_size_t			fBytesTransferred;
596 };
597 
598 
599 class ReadPages : public PagesIOTraceEntry {
600 public:
601 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
602 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
603 		status_t status, generic_size_t bytesTransferred)
604 		:
605 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
606 			bytesRequested, status, bytesTransferred)
607 	{
608 		Initialized();
609 	}
610 
611 	virtual void AddDump(TraceOutput& out)
612 	{
613 		PagesIOTraceEntry::AddDump(out, "read");
614 	}
615 };
616 
617 
618 class WritePages : public PagesIOTraceEntry {
619 public:
620 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
621 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
622 		status_t status, generic_size_t bytesTransferred)
623 		:
624 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
625 			bytesRequested, status, bytesTransferred)
626 	{
627 		Initialized();
628 	}
629 
630 	virtual void AddDump(TraceOutput& out)
631 	{
632 		PagesIOTraceEntry::AddDump(out, "write");
633 	}
634 };
635 
636 }	// namespace VFSPagesIOTracing
637 
638 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
639 #else
640 #	define TPIO(x) ;
641 #endif	// VFS_PAGES_IO_TRACING
642 
643 
644 static int
645 mount_compare(void* _m, const void* _key)
646 {
647 	struct fs_mount* mount = (fs_mount*)_m;
648 	const dev_t* id = (dev_t*)_key;
649 
650 	if (mount->id == *id)
651 		return 0;
652 
653 	return -1;
654 }
655 
656 
657 static uint32
658 mount_hash(void* _m, const void* _key, uint32 range)
659 {
660 	struct fs_mount* mount = (fs_mount*)_m;
661 	const dev_t* id = (dev_t*)_key;
662 
663 	if (mount)
664 		return mount->id % range;
665 
666 	return (uint32)*id % range;
667 }
668 
669 
670 /*! Finds the mounted device (the fs_mount structure) with the given ID.
671 	Note, you must hold the gMountMutex lock when you call this function.
672 */
673 static struct fs_mount*
674 find_mount(dev_t id)
675 {
676 	ASSERT_LOCKED_MUTEX(&sMountMutex);
677 
678 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
679 }
680 
681 
682 static status_t
683 get_mount(dev_t id, struct fs_mount** _mount)
684 {
685 	struct fs_mount* mount;
686 
687 	ReadLocker nodeLocker(sVnodeLock);
688 	MutexLocker mountLocker(sMountMutex);
689 
690 	mount = find_mount(id);
691 	if (mount == NULL)
692 		return B_BAD_VALUE;
693 
694 	struct vnode* rootNode = mount->root_vnode;
695 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
696 		// might have been called during a mount/unmount operation
697 		return B_BUSY;
698 	}
699 
700 	inc_vnode_ref_count(mount->root_vnode);
701 	*_mount = mount;
702 	return B_OK;
703 }
704 
705 
706 static void
707 put_mount(struct fs_mount* mount)
708 {
709 	if (mount)
710 		put_vnode(mount->root_vnode);
711 }
712 
713 
714 /*!	Tries to open the specified file system module.
715 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
716 	Returns a pointer to file system module interface, or NULL if it
717 	could not open the module.
718 */
719 static file_system_module_info*
720 get_file_system(const char* fsName)
721 {
722 	char name[B_FILE_NAME_LENGTH];
723 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
724 		// construct module name if we didn't get one
725 		// (we currently support only one API)
726 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
727 		fsName = NULL;
728 	}
729 
730 	file_system_module_info* info;
731 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
732 		return NULL;
733 
734 	return info;
735 }
736 
737 
738 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
739 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
740 	The name is allocated for you, and you have to free() it when you're
741 	done with it.
742 	Returns NULL if the required memory is not available.
743 */
744 static char*
745 get_file_system_name(const char* fsName)
746 {
747 	const size_t length = strlen("file_systems/");
748 
749 	if (strncmp(fsName, "file_systems/", length)) {
750 		// the name already seems to be the module's file name
751 		return strdup(fsName);
752 	}
753 
754 	fsName += length;
755 	const char* end = strchr(fsName, '/');
756 	if (end == NULL) {
757 		// this doesn't seem to be a valid name, but well...
758 		return strdup(fsName);
759 	}
760 
761 	// cut off the trailing /v1
762 
763 	char* name = (char*)malloc(end + 1 - fsName);
764 	if (name == NULL)
765 		return NULL;
766 
767 	strlcpy(name, fsName, end + 1 - fsName);
768 	return name;
769 }
770 
771 
772 /*!	Accepts a list of file system names separated by a colon, one for each
773 	layer and returns the file system name for the specified layer.
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available or if there is no
777 	name for the specified layer.
778 */
779 static char*
780 get_file_system_name_for_layer(const char* fsNames, int32 layer)
781 {
782 	while (layer >= 0) {
783 		const char* end = strchr(fsNames, ':');
784 		if (end == NULL) {
785 			if (layer == 0)
786 				return strdup(fsNames);
787 			return NULL;
788 		}
789 
790 		if (layer == 0) {
791 			size_t length = end - fsNames + 1;
792 			char* result = (char*)malloc(length);
793 			strlcpy(result, fsNames, length);
794 			return result;
795 		}
796 
797 		fsNames = end + 1;
798 		layer--;
799 	}
800 
801 	return NULL;
802 }
803 
804 
805 static int
806 vnode_compare(void* _vnode, const void* _key)
807 {
808 	struct vnode* vnode = (struct vnode*)_vnode;
809 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
810 
811 	if (vnode->device == key->device && vnode->id == key->vnode)
812 		return 0;
813 
814 	return -1;
815 }
816 
817 
818 static uint32
819 vnode_hash(void* _vnode, const void* _key, uint32 range)
820 {
821 	struct vnode* vnode = (struct vnode*)_vnode;
822 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
823 
824 #define VHASH(mountid, vnodeid) \
825 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
826 
827 	if (vnode != NULL)
828 		return VHASH(vnode->device, vnode->id) % range;
829 
830 	return VHASH(key->device, key->vnode) % range;
831 
832 #undef VHASH
833 }
834 
835 
836 static void
837 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
838 {
839 	RecursiveLocker _(mount->rlock);
840 	mount->vnodes.Add(vnode);
841 }
842 
843 
844 static void
845 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
846 {
847 	RecursiveLocker _(mount->rlock);
848 	mount->vnodes.Remove(vnode);
849 }
850 
851 
852 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
853 
854 	The caller must hold the sVnodeLock (read lock at least).
855 
856 	\param mountID the mount ID.
857 	\param vnodeID the node ID.
858 
859 	\return The vnode structure, if it was found in the hash table, \c NULL
860 			otherwise.
861 */
862 static struct vnode*
863 lookup_vnode(dev_t mountID, ino_t vnodeID)
864 {
865 	struct vnode_hash_key key;
866 
867 	key.device = mountID;
868 	key.vnode = vnodeID;
869 
870 	return (vnode*)hash_lookup(sVnodeTable, &key);
871 }
872 
873 
874 /*!	Creates a new vnode with the given mount and node ID.
875 	If the node already exists, it is returned instead and no new node is
876 	created. In either case -- but not, if an error occurs -- the function write
877 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
878 	error the lock is not not held on return.
879 
880 	\param mountID The mount ID.
881 	\param vnodeID The vnode ID.
882 	\param _vnode Will be set to the new vnode on success.
883 	\param _nodeCreated Will be set to \c true when the returned vnode has
884 		been newly created, \c false when it already existed. Will not be
885 		changed on error.
886 	\return \c B_OK, when the vnode was successfully created and inserted or
887 		a node with the given ID was found, \c B_NO_MEMORY or
888 		\c B_ENTRY_NOT_FOUND on error.
889 */
890 static status_t
891 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
892 	bool& _nodeCreated)
893 {
894 	FUNCTION(("create_new_vnode_and_lock()\n"));
895 
896 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
897 	if (vnode == NULL)
898 		return B_NO_MEMORY;
899 
900 	// initialize basic values
901 	memset(vnode, 0, sizeof(struct vnode));
902 	vnode->device = mountID;
903 	vnode->id = vnodeID;
904 	vnode->ref_count = 1;
905 	vnode->SetBusy(true);
906 
907 	// look up the the node -- it might have been added by someone else in the
908 	// meantime
909 	rw_lock_write_lock(&sVnodeLock);
910 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
911 	if (existingVnode != NULL) {
912 		free(vnode);
913 		_vnode = existingVnode;
914 		_nodeCreated = false;
915 		return B_OK;
916 	}
917 
918 	// get the mount structure
919 	mutex_lock(&sMountMutex);
920 	vnode->mount = find_mount(mountID);
921 	if (!vnode->mount || vnode->mount->unmounting) {
922 		mutex_unlock(&sMountMutex);
923 		rw_lock_write_unlock(&sVnodeLock);
924 		free(vnode);
925 		return B_ENTRY_NOT_FOUND;
926 	}
927 
928 	// add the vnode to the mount's node list and the hash table
929 	hash_insert(sVnodeTable, vnode);
930 	add_vnode_to_mount_list(vnode, vnode->mount);
931 
932 	mutex_unlock(&sMountMutex);
933 
934 	_vnode = vnode;
935 	_nodeCreated = true;
936 
937 	// keep the vnode lock locked
938 	return B_OK;
939 }
940 
941 
942 /*!	Frees the vnode and all resources it has acquired, and removes
943 	it from the vnode hash as well as from its mount structure.
944 	Will also make sure that any cache modifications are written back.
945 */
946 static void
947 free_vnode(struct vnode* vnode, bool reenter)
948 {
949 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
950 		vnode);
951 
952 	// write back any changes in this vnode's cache -- but only
953 	// if the vnode won't be deleted, in which case the changes
954 	// will be discarded
955 
956 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
957 		FS_CALL_NO_PARAMS(vnode, fsync);
958 
959 	// Note: If this vnode has a cache attached, there will still be two
960 	// references to that cache at this point. The last one belongs to the vnode
961 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
962 	// cache. Each but the last reference to a cache also includes a reference
963 	// to the vnode. The file cache, however, released its reference (cf.
964 	// file_cache_create()), so that this vnode's ref count has the chance to
965 	// ever drop to 0. Deleting the file cache now, will cause the next to last
966 	// cache reference to be released, which will also release a (no longer
967 	// existing) vnode reference. To avoid problems, we set the vnode's ref
968 	// count, so that it will neither become negative nor 0.
969 	vnode->ref_count = 2;
970 
971 	if (!vnode->IsUnpublished()) {
972 		if (vnode->IsRemoved())
973 			FS_CALL(vnode, remove_vnode, reenter);
974 		else
975 			FS_CALL(vnode, put_vnode, reenter);
976 	}
977 
978 	// If the vnode has a VMCache attached, make sure that it won't try to get
979 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
980 	// long as the vnode is busy and in the hash, that won't happen, but as
981 	// soon as we've removed it from the hash, it could reload the vnode -- with
982 	// a new cache attached!
983 	if (vnode->cache != NULL)
984 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
985 
986 	// The file system has removed the resources of the vnode now, so we can
987 	// make it available again (by removing the busy vnode from the hash).
988 	rw_lock_write_lock(&sVnodeLock);
989 	hash_remove(sVnodeTable, vnode);
990 	rw_lock_write_unlock(&sVnodeLock);
991 
992 	// if we have a VMCache attached, remove it
993 	if (vnode->cache)
994 		vnode->cache->ReleaseRef();
995 
996 	vnode->cache = NULL;
997 
998 	remove_vnode_from_mount_list(vnode, vnode->mount);
999 
1000 	free(vnode);
1001 }
1002 
1003 
1004 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1005 	if the counter dropped to 0.
1006 
1007 	The caller must, of course, own a reference to the vnode to call this
1008 	function.
1009 	The caller must not hold the sVnodeLock or the sMountMutex.
1010 
1011 	\param vnode the vnode.
1012 	\param alwaysFree don't move this vnode into the unused list, but really
1013 		   delete it if possible.
1014 	\param reenter \c true, if this function is called (indirectly) from within
1015 		   a file system. This will be passed to file system hooks only.
1016 	\return \c B_OK, if everything went fine, an error code otherwise.
1017 */
1018 static status_t
1019 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1020 {
1021 	ReadLocker locker(sVnodeLock);
1022 	AutoLocker<Vnode> nodeLocker(vnode);
1023 
1024 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1025 
1026 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1027 
1028 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1029 		vnode->ref_count));
1030 
1031 	if (oldRefCount != 1)
1032 		return B_OK;
1033 
1034 	if (vnode->IsBusy())
1035 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1036 
1037 	bool freeNode = false;
1038 	bool freeUnusedNodes = false;
1039 
1040 	// Just insert the vnode into an unused list if we don't need
1041 	// to delete it
1042 	if (vnode->IsRemoved() || alwaysFree) {
1043 		vnode_to_be_freed(vnode);
1044 		vnode->SetBusy(true);
1045 		freeNode = true;
1046 	} else
1047 		freeUnusedNodes = vnode_unused(vnode);
1048 
1049 	nodeLocker.Unlock();
1050 	locker.Unlock();
1051 
1052 	if (freeNode)
1053 		free_vnode(vnode, reenter);
1054 	else if (freeUnusedNodes)
1055 		free_unused_vnodes();
1056 
1057 	return B_OK;
1058 }
1059 
1060 
1061 /*!	\brief Increments the reference counter of the given vnode.
1062 
1063 	The caller must make sure that the node isn't deleted while this function
1064 	is called. This can be done either:
1065 	- by ensuring that a reference to the node exists and remains in existence,
1066 	  or
1067 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1068 	  or by holding sVnodeLock write locked.
1069 
1070 	In the second case the caller is responsible for dealing with the ref count
1071 	0 -> 1 transition. That is 1. this function must not be invoked when the
1072 	node is busy in the first place and 2. vnode_used() must be called for the
1073 	node.
1074 
1075 	\param vnode the vnode.
1076 */
1077 static void
1078 inc_vnode_ref_count(struct vnode* vnode)
1079 {
1080 	atomic_add(&vnode->ref_count, 1);
1081 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1082 		vnode->ref_count));
1083 }
1084 
1085 
1086 static bool
1087 is_special_node_type(int type)
1088 {
1089 	// at the moment only FIFOs are supported
1090 	return S_ISFIFO(type);
1091 }
1092 
1093 
1094 static status_t
1095 create_special_sub_node(struct vnode* vnode, uint32 flags)
1096 {
1097 	if (S_ISFIFO(vnode->Type()))
1098 		return create_fifo_vnode(vnode->mount->volume, vnode);
1099 
1100 	return B_BAD_VALUE;
1101 }
1102 
1103 
1104 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1105 
1106 	If the node is not yet in memory, it will be loaded.
1107 
1108 	The caller must not hold the sVnodeLock or the sMountMutex.
1109 
1110 	\param mountID the mount ID.
1111 	\param vnodeID the node ID.
1112 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1113 		   retrieved vnode structure shall be written.
1114 	\param reenter \c true, if this function is called (indirectly) from within
1115 		   a file system.
1116 	\return \c B_OK, if everything when fine, an error code otherwise.
1117 */
1118 static status_t
1119 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1120 	int reenter)
1121 {
1122 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1123 		_vnode));
1124 
1125 	rw_lock_read_lock(&sVnodeLock);
1126 
1127 	int32 tries = 2000;
1128 		// try for 10 secs
1129 restart:
1130 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1131 	AutoLocker<Vnode> nodeLocker(vnode);
1132 
1133 	if (vnode && vnode->IsBusy()) {
1134 		nodeLocker.Unlock();
1135 		rw_lock_read_unlock(&sVnodeLock);
1136 		if (!canWait || --tries < 0) {
1137 			// vnode doesn't seem to become unbusy
1138 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1139 				vnodeID);
1140 			return B_BUSY;
1141 		}
1142 		snooze(5000); // 5 ms
1143 		rw_lock_read_lock(&sVnodeLock);
1144 		goto restart;
1145 	}
1146 
1147 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1148 
1149 	status_t status;
1150 
1151 	if (vnode) {
1152 		if (vnode->ref_count == 0) {
1153 			// this vnode has been unused before
1154 			vnode_used(vnode);
1155 		}
1156 		inc_vnode_ref_count(vnode);
1157 
1158 		nodeLocker.Unlock();
1159 		rw_lock_read_unlock(&sVnodeLock);
1160 	} else {
1161 		// we need to create a new vnode and read it in
1162 		rw_lock_read_unlock(&sVnodeLock);
1163 			// unlock -- create_new_vnode_and_lock() write-locks on success
1164 		bool nodeCreated;
1165 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1166 			nodeCreated);
1167 		if (status != B_OK)
1168 			return status;
1169 
1170 		if (!nodeCreated) {
1171 			rw_lock_read_lock(&sVnodeLock);
1172 			rw_lock_write_unlock(&sVnodeLock);
1173 			goto restart;
1174 		}
1175 
1176 		rw_lock_write_unlock(&sVnodeLock);
1177 
1178 		int type;
1179 		uint32 flags;
1180 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1181 			&flags, reenter);
1182 		if (status == B_OK && vnode->private_node == NULL)
1183 			status = B_BAD_VALUE;
1184 
1185 		bool gotNode = status == B_OK;
1186 		bool publishSpecialSubNode = false;
1187 		if (gotNode) {
1188 			vnode->SetType(type);
1189 			publishSpecialSubNode = is_special_node_type(type)
1190 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1191 		}
1192 
1193 		if (gotNode && publishSpecialSubNode)
1194 			status = create_special_sub_node(vnode, flags);
1195 
1196 		if (status != B_OK) {
1197 			if (gotNode)
1198 				FS_CALL(vnode, put_vnode, reenter);
1199 
1200 			rw_lock_write_lock(&sVnodeLock);
1201 			hash_remove(sVnodeTable, vnode);
1202 			remove_vnode_from_mount_list(vnode, vnode->mount);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 
1205 			free(vnode);
1206 			return status;
1207 		}
1208 
1209 		rw_lock_read_lock(&sVnodeLock);
1210 		vnode->Lock();
1211 
1212 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1213 		vnode->SetBusy(false);
1214 
1215 		vnode->Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	}
1218 
1219 	TRACE(("get_vnode: returning %p\n", vnode));
1220 
1221 	*_vnode = vnode;
1222 	return B_OK;
1223 }
1224 
1225 
1226 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1227 	if the counter dropped to 0.
1228 
1229 	The caller must, of course, own a reference to the vnode to call this
1230 	function.
1231 	The caller must not hold the sVnodeLock or the sMountMutex.
1232 
1233 	\param vnode the vnode.
1234 */
1235 static inline void
1236 put_vnode(struct vnode* vnode)
1237 {
1238 	dec_vnode_ref_count(vnode, false, false);
1239 }
1240 
1241 
1242 static void
1243 free_unused_vnodes(int32 level)
1244 {
1245 	unused_vnodes_check_started();
1246 
1247 	if (level == B_NO_LOW_RESOURCE) {
1248 		unused_vnodes_check_done();
1249 		return;
1250 	}
1251 
1252 	flush_hot_vnodes();
1253 
1254 	// determine how many nodes to free
1255 	uint32 count = 1;
1256 	{
1257 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1258 
1259 		switch (level) {
1260 			case B_LOW_RESOURCE_NOTE:
1261 				count = sUnusedVnodes / 100;
1262 				break;
1263 			case B_LOW_RESOURCE_WARNING:
1264 				count = sUnusedVnodes / 10;
1265 				break;
1266 			case B_LOW_RESOURCE_CRITICAL:
1267 				count = sUnusedVnodes;
1268 				break;
1269 		}
1270 
1271 		if (count > sUnusedVnodes)
1272 			count = sUnusedVnodes;
1273 	}
1274 
1275 	// Write back the modified pages of some unused vnodes and free them.
1276 
1277 	for (uint32 i = 0; i < count; i++) {
1278 		ReadLocker vnodesReadLocker(sVnodeLock);
1279 
1280 		// get the first node
1281 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1282 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1283 			&sUnusedVnodeList);
1284 		unusedVnodesLocker.Unlock();
1285 
1286 		if (vnode == NULL)
1287 			break;
1288 
1289 		// lock the node
1290 		AutoLocker<Vnode> nodeLocker(vnode);
1291 
1292 		// Check whether the node is still unused -- since we only append to the
1293 		// the tail of the unused queue, the vnode should still be at its head.
1294 		// Alternatively we could check its ref count for 0 and its busy flag,
1295 		// but if the node is no longer at the head of the queue, it means it
1296 		// has been touched in the meantime, i.e. it is no longer the least
1297 		// recently used unused vnode and we rather don't free it.
1298 		unusedVnodesLocker.Lock();
1299 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1300 			continue;
1301 		unusedVnodesLocker.Unlock();
1302 
1303 		ASSERT(!vnode->IsBusy());
1304 
1305 		// grab a reference
1306 		inc_vnode_ref_count(vnode);
1307 		vnode_used(vnode);
1308 
1309 		// write back changes and free the node
1310 		nodeLocker.Unlock();
1311 		vnodesReadLocker.Unlock();
1312 
1313 		if (vnode->cache != NULL)
1314 			vnode->cache->WriteModified();
1315 
1316 		dec_vnode_ref_count(vnode, true, false);
1317 			// this should free the vnode when it's still unused
1318 	}
1319 
1320 	unused_vnodes_check_done();
1321 }
1322 
1323 
1324 static void
1325 free_unused_vnodes()
1326 {
1327 	free_unused_vnodes(
1328 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1329 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1330 }
1331 
1332 
1333 static void
1334 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1335 {
1336 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1337 
1338 	free_unused_vnodes(level);
1339 }
1340 
1341 
1342 static inline void
1343 put_advisory_locking(struct advisory_locking* locking)
1344 {
1345 	release_sem(locking->lock);
1346 }
1347 
1348 
1349 /*!	Returns the advisory_locking object of the \a vnode in case it
1350 	has one, and locks it.
1351 	You have to call put_advisory_locking() when you're done with
1352 	it.
1353 	Note, you must not have the vnode mutex locked when calling
1354 	this function.
1355 */
1356 static struct advisory_locking*
1357 get_advisory_locking(struct vnode* vnode)
1358 {
1359 	rw_lock_read_lock(&sVnodeLock);
1360 	vnode->Lock();
1361 
1362 	struct advisory_locking* locking = vnode->advisory_locking;
1363 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1364 
1365 	vnode->Unlock();
1366 	rw_lock_read_unlock(&sVnodeLock);
1367 
1368 	if (lock >= 0)
1369 		lock = acquire_sem(lock);
1370 	if (lock < 0) {
1371 		// This means the locking has been deleted in the mean time
1372 		// or had never existed in the first place - otherwise, we
1373 		// would get the lock at some point.
1374 		return NULL;
1375 	}
1376 
1377 	return locking;
1378 }
1379 
1380 
1381 /*!	Creates a locked advisory_locking object, and attaches it to the
1382 	given \a vnode.
1383 	Returns B_OK in case of success - also if the vnode got such an
1384 	object from someone else in the mean time, you'll still get this
1385 	one locked then.
1386 */
1387 static status_t
1388 create_advisory_locking(struct vnode* vnode)
1389 {
1390 	if (vnode == NULL)
1391 		return B_FILE_ERROR;
1392 
1393 	ObjectDeleter<advisory_locking> lockingDeleter;
1394 	struct advisory_locking* locking = NULL;
1395 
1396 	while (get_advisory_locking(vnode) == NULL) {
1397 		// no locking object set on the vnode yet, create one
1398 		if (locking == NULL) {
1399 			locking = new(std::nothrow) advisory_locking;
1400 			if (locking == NULL)
1401 				return B_NO_MEMORY;
1402 			lockingDeleter.SetTo(locking);
1403 
1404 			locking->wait_sem = create_sem(0, "advisory lock");
1405 			if (locking->wait_sem < 0)
1406 				return locking->wait_sem;
1407 
1408 			locking->lock = create_sem(0, "advisory locking");
1409 			if (locking->lock < 0)
1410 				return locking->lock;
1411 		}
1412 
1413 		// set our newly created locking object
1414 		ReadLocker _(sVnodeLock);
1415 		AutoLocker<Vnode> nodeLocker(vnode);
1416 		if (vnode->advisory_locking == NULL) {
1417 			vnode->advisory_locking = locking;
1418 			lockingDeleter.Detach();
1419 			return B_OK;
1420 		}
1421 	}
1422 
1423 	// The vnode already had a locking object. That's just as well.
1424 
1425 	return B_OK;
1426 }
1427 
1428 
1429 /*!	Retrieves the first lock that has been set by the current team.
1430 */
1431 static status_t
1432 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1433 {
1434 	struct advisory_locking* locking = get_advisory_locking(vnode);
1435 	if (locking == NULL)
1436 		return B_BAD_VALUE;
1437 
1438 	// TODO: this should probably get the flock by its file descriptor!
1439 	team_id team = team_get_current_team_id();
1440 	status_t status = B_BAD_VALUE;
1441 
1442 	LockList::Iterator iterator = locking->locks.GetIterator();
1443 	while (iterator.HasNext()) {
1444 		struct advisory_lock* lock = iterator.Next();
1445 
1446 		if (lock->team == team) {
1447 			flock->l_start = lock->start;
1448 			flock->l_len = lock->end - lock->start + 1;
1449 			status = B_OK;
1450 			break;
1451 		}
1452 	}
1453 
1454 	put_advisory_locking(locking);
1455 	return status;
1456 }
1457 
1458 
1459 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1460 	with the advisory_lock \a lock.
1461 */
1462 static bool
1463 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1464 {
1465 	if (flock == NULL)
1466 		return true;
1467 
1468 	return lock->start <= flock->l_start - 1 + flock->l_len
1469 		&& lock->end >= flock->l_start;
1470 }
1471 
1472 
1473 /*!	Removes the specified lock, or all locks of the calling team
1474 	if \a flock is NULL.
1475 */
1476 static status_t
1477 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1478 {
1479 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1480 
1481 	struct advisory_locking* locking = get_advisory_locking(vnode);
1482 	if (locking == NULL)
1483 		return B_OK;
1484 
1485 	// TODO: use the thread ID instead??
1486 	team_id team = team_get_current_team_id();
1487 	pid_t session = thread_get_current_thread()->team->session_id;
1488 
1489 	// find matching lock entries
1490 
1491 	LockList::Iterator iterator = locking->locks.GetIterator();
1492 	while (iterator.HasNext()) {
1493 		struct advisory_lock* lock = iterator.Next();
1494 		bool removeLock = false;
1495 
1496 		if (lock->session == session)
1497 			removeLock = true;
1498 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1499 			bool endsBeyond = false;
1500 			bool startsBefore = false;
1501 			if (flock != NULL) {
1502 				startsBefore = lock->start < flock->l_start;
1503 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1504 			}
1505 
1506 			if (!startsBefore && !endsBeyond) {
1507 				// lock is completely contained in flock
1508 				removeLock = true;
1509 			} else if (startsBefore && !endsBeyond) {
1510 				// cut the end of the lock
1511 				lock->end = flock->l_start - 1;
1512 			} else if (!startsBefore && endsBeyond) {
1513 				// cut the start of the lock
1514 				lock->start = flock->l_start + flock->l_len;
1515 			} else {
1516 				// divide the lock into two locks
1517 				struct advisory_lock* secondLock = new advisory_lock;
1518 				if (secondLock == NULL) {
1519 					// TODO: we should probably revert the locks we already
1520 					// changed... (ie. allocate upfront)
1521 					put_advisory_locking(locking);
1522 					return B_NO_MEMORY;
1523 				}
1524 
1525 				lock->end = flock->l_start - 1;
1526 
1527 				secondLock->team = lock->team;
1528 				secondLock->session = lock->session;
1529 				// values must already be normalized when getting here
1530 				secondLock->start = flock->l_start + flock->l_len;
1531 				secondLock->end = lock->end;
1532 				secondLock->shared = lock->shared;
1533 
1534 				locking->locks.Add(secondLock);
1535 			}
1536 		}
1537 
1538 		if (removeLock) {
1539 			// this lock is no longer used
1540 			iterator.Remove();
1541 			free(lock);
1542 		}
1543 	}
1544 
1545 	bool removeLocking = locking->locks.IsEmpty();
1546 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1547 
1548 	put_advisory_locking(locking);
1549 
1550 	if (removeLocking) {
1551 		// We can remove the whole advisory locking structure; it's no
1552 		// longer used
1553 		locking = get_advisory_locking(vnode);
1554 		if (locking != NULL) {
1555 			ReadLocker locker(sVnodeLock);
1556 			AutoLocker<Vnode> nodeLocker(vnode);
1557 
1558 			// the locking could have been changed in the mean time
1559 			if (locking->locks.IsEmpty()) {
1560 				vnode->advisory_locking = NULL;
1561 				nodeLocker.Unlock();
1562 				locker.Unlock();
1563 
1564 				// we've detached the locking from the vnode, so we can
1565 				// safely delete it
1566 				delete locking;
1567 			} else {
1568 				// the locking is in use again
1569 				nodeLocker.Unlock();
1570 				locker.Unlock();
1571 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1572 			}
1573 		}
1574 	}
1575 
1576 	return B_OK;
1577 }
1578 
1579 
1580 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1581 	will wait for the lock to become available, if there are any collisions
1582 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1583 
1584 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1585 	BSD flock() semantics are used, that is, all children can unlock the file
1586 	in question (we even allow parents to remove the lock, though, but that
1587 	seems to be in line to what the BSD's are doing).
1588 */
1589 static status_t
1590 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1591 	bool wait)
1592 {
1593 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1594 		vnode, flock, wait ? "yes" : "no"));
1595 
1596 	bool shared = flock->l_type == F_RDLCK;
1597 	status_t status = B_OK;
1598 
1599 	// TODO: do deadlock detection!
1600 
1601 	struct advisory_locking* locking;
1602 
1603 	while (true) {
1604 		// if this vnode has an advisory_locking structure attached,
1605 		// lock that one and search for any colliding file lock
1606 		status = create_advisory_locking(vnode);
1607 		if (status != B_OK)
1608 			return status;
1609 
1610 		locking = vnode->advisory_locking;
1611 		team_id team = team_get_current_team_id();
1612 		sem_id waitForLock = -1;
1613 
1614 		// test for collisions
1615 		LockList::Iterator iterator = locking->locks.GetIterator();
1616 		while (iterator.HasNext()) {
1617 			struct advisory_lock* lock = iterator.Next();
1618 
1619 			// TODO: locks from the same team might be joinable!
1620 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1621 				// locks do overlap
1622 				if (!shared || !lock->shared) {
1623 					// we need to wait
1624 					waitForLock = locking->wait_sem;
1625 					break;
1626 				}
1627 			}
1628 		}
1629 
1630 		if (waitForLock < 0)
1631 			break;
1632 
1633 		// We need to wait. Do that or fail now, if we've been asked not to.
1634 
1635 		if (!wait) {
1636 			put_advisory_locking(locking);
1637 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1638 		}
1639 
1640 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1641 			B_CAN_INTERRUPT, 0);
1642 		if (status != B_OK && status != B_BAD_SEM_ID)
1643 			return status;
1644 
1645 		// We have been notified, but we need to re-lock the locking object. So
1646 		// go another round...
1647 	}
1648 
1649 	// install new lock
1650 
1651 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1652 		sizeof(struct advisory_lock));
1653 	if (lock == NULL) {
1654 		put_advisory_locking(locking);
1655 		return B_NO_MEMORY;
1656 	}
1657 
1658 	lock->team = team_get_current_team_id();
1659 	lock->session = session;
1660 	// values must already be normalized when getting here
1661 	lock->start = flock->l_start;
1662 	lock->end = flock->l_start - 1 + flock->l_len;
1663 	lock->shared = shared;
1664 
1665 	locking->locks.Add(lock);
1666 	put_advisory_locking(locking);
1667 
1668 	return status;
1669 }
1670 
1671 
1672 /*!	Normalizes the \a flock structure to make it easier to compare the
1673 	structure with others. The l_start and l_len fields are set to absolute
1674 	values according to the l_whence field.
1675 */
1676 static status_t
1677 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1678 {
1679 	switch (flock->l_whence) {
1680 		case SEEK_SET:
1681 			break;
1682 		case SEEK_CUR:
1683 			flock->l_start += descriptor->pos;
1684 			break;
1685 		case SEEK_END:
1686 		{
1687 			struct vnode* vnode = descriptor->u.vnode;
1688 			struct stat stat;
1689 			status_t status;
1690 
1691 			if (!HAS_FS_CALL(vnode, read_stat))
1692 				return B_UNSUPPORTED;
1693 
1694 			status = FS_CALL(vnode, read_stat, &stat);
1695 			if (status != B_OK)
1696 				return status;
1697 
1698 			flock->l_start += stat.st_size;
1699 			break;
1700 		}
1701 		default:
1702 			return B_BAD_VALUE;
1703 	}
1704 
1705 	if (flock->l_start < 0)
1706 		flock->l_start = 0;
1707 	if (flock->l_len == 0)
1708 		flock->l_len = OFF_MAX;
1709 
1710 	// don't let the offset and length overflow
1711 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1712 		flock->l_len = OFF_MAX - flock->l_start;
1713 
1714 	if (flock->l_len < 0) {
1715 		// a negative length reverses the region
1716 		flock->l_start += flock->l_len;
1717 		flock->l_len = -flock->l_len;
1718 	}
1719 
1720 	return B_OK;
1721 }
1722 
1723 
1724 static void
1725 replace_vnode_if_disconnected(struct fs_mount* mount,
1726 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1727 	struct vnode* fallBack, bool lockRootLock)
1728 {
1729 	if (lockRootLock)
1730 		mutex_lock(&sIOContextRootLock);
1731 
1732 	struct vnode* obsoleteVnode = NULL;
1733 
1734 	if (vnode != NULL && vnode->mount == mount
1735 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1736 		obsoleteVnode = vnode;
1737 
1738 		if (vnode == mount->root_vnode) {
1739 			// redirect the vnode to the covered vnode
1740 			vnode = mount->covers_vnode;
1741 		} else
1742 			vnode = fallBack;
1743 
1744 		if (vnode != NULL)
1745 			inc_vnode_ref_count(vnode);
1746 	}
1747 
1748 	if (lockRootLock)
1749 		mutex_unlock(&sIOContextRootLock);
1750 
1751 	if (obsoleteVnode != NULL)
1752 		put_vnode(obsoleteVnode);
1753 }
1754 
1755 
1756 /*!	Disconnects all file descriptors that are associated with the
1757 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1758 	\a mount object.
1759 
1760 	Note, after you've called this function, there might still be ongoing
1761 	accesses - they won't be interrupted if they already happened before.
1762 	However, any subsequent access will fail.
1763 
1764 	This is not a cheap function and should be used with care and rarely.
1765 	TODO: there is currently no means to stop a blocking read/write!
1766 */
1767 static void
1768 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1769 	struct vnode* vnodeToDisconnect)
1770 {
1771 	// iterate over all teams and peek into their file descriptors
1772 	TeamListIterator teamIterator;
1773 	while (Team* team = teamIterator.Next()) {
1774 		BReference<Team> teamReference(team, true);
1775 
1776 		// lock the I/O context
1777 		io_context* context = team->io_context;
1778 		MutexLocker contextLocker(context->io_mutex);
1779 
1780 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1781 			sRoot, true);
1782 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1783 			sRoot, false);
1784 
1785 		for (uint32 i = 0; i < context->table_size; i++) {
1786 			if (struct file_descriptor* descriptor = context->fds[i]) {
1787 				inc_fd_ref_count(descriptor);
1788 
1789 				// if this descriptor points at this mount, we
1790 				// need to disconnect it to be able to unmount
1791 				struct vnode* vnode = fd_vnode(descriptor);
1792 				if (vnodeToDisconnect != NULL) {
1793 					if (vnode == vnodeToDisconnect)
1794 						disconnect_fd(descriptor);
1795 				} else if ((vnode != NULL && vnode->mount == mount)
1796 					|| (vnode == NULL && descriptor->u.mount == mount))
1797 					disconnect_fd(descriptor);
1798 
1799 				put_fd(descriptor);
1800 			}
1801 		}
1802 	}
1803 }
1804 
1805 
1806 /*!	\brief Gets the root node of the current IO context.
1807 	If \a kernel is \c true, the kernel IO context will be used.
1808 	The caller obtains a reference to the returned node.
1809 */
1810 struct vnode*
1811 get_root_vnode(bool kernel)
1812 {
1813 	if (!kernel) {
1814 		// Get current working directory from io context
1815 		struct io_context* context = get_current_io_context(kernel);
1816 
1817 		mutex_lock(&sIOContextRootLock);
1818 
1819 		struct vnode* root = context->root;
1820 		if (root != NULL)
1821 			inc_vnode_ref_count(root);
1822 
1823 		mutex_unlock(&sIOContextRootLock);
1824 
1825 		if (root != NULL)
1826 			return root;
1827 
1828 		// That should never happen.
1829 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1830 			"root\n", team_get_current_team_id());
1831 	}
1832 
1833 	inc_vnode_ref_count(sRoot);
1834 	return sRoot;
1835 }
1836 
1837 
1838 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1839 		   by.
1840 
1841 	Given an arbitrary vnode, the function checks, whether the node is covered
1842 	by the root of a volume. If it is the function obtains a reference to the
1843 	volume root node and returns it.
1844 
1845 	\param vnode The vnode in question.
1846 	\return The volume root vnode the vnode cover is covered by, if it is
1847 			indeed a mount point, or \c NULL otherwise.
1848 */
1849 static struct vnode*
1850 resolve_mount_point_to_volume_root(struct vnode* vnode)
1851 {
1852 	if (!vnode)
1853 		return NULL;
1854 
1855 	struct vnode* volumeRoot = NULL;
1856 
1857 	rw_lock_read_lock(&sVnodeLock);
1858 
1859 	if (vnode->covered_by) {
1860 		volumeRoot = vnode->covered_by;
1861 		inc_vnode_ref_count(volumeRoot);
1862 	}
1863 
1864 	rw_lock_read_unlock(&sVnodeLock);
1865 
1866 	return volumeRoot;
1867 }
1868 
1869 
1870 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1871 		   by.
1872 
1873 	Given an arbitrary vnode (identified by mount and node ID), the function
1874 	checks, whether the node is covered by the root of a volume. If it is the
1875 	function returns the mount and node ID of the volume root node. Otherwise
1876 	it simply returns the supplied mount and node ID.
1877 
1878 	In case of error (e.g. the supplied node could not be found) the variables
1879 	for storing the resolved mount and node ID remain untouched and an error
1880 	code is returned.
1881 
1882 	\param mountID The mount ID of the vnode in question.
1883 	\param nodeID The node ID of the vnode in question.
1884 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1885 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1886 	\return
1887 	- \c B_OK, if everything went fine,
1888 	- another error code, if something went wrong.
1889 */
1890 status_t
1891 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1892 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1893 {
1894 	// get the node
1895 	struct vnode* node;
1896 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1897 	if (error != B_OK)
1898 		return error;
1899 
1900 	// resolve the node
1901 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
1902 	if (resolvedNode) {
1903 		put_vnode(node);
1904 		node = resolvedNode;
1905 	}
1906 
1907 	// set the return values
1908 	*resolvedMountID = node->device;
1909 	*resolvedNodeID = node->id;
1910 
1911 	put_vnode(node);
1912 
1913 	return B_OK;
1914 }
1915 
1916 
1917 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1918 
1919 	Given an arbitrary vnode, the function checks, whether the node is the
1920 	root of a volume. If it is (and if it is not "/"), the function obtains
1921 	a reference to the underlying mount point node and returns it.
1922 
1923 	\param vnode The vnode in question (caller must have a reference).
1924 	\return The mount point vnode the vnode covers, if it is indeed a volume
1925 			root and not "/", or \c NULL otherwise.
1926 */
1927 static struct vnode*
1928 resolve_volume_root_to_mount_point(struct vnode* vnode)
1929 {
1930 	if (!vnode)
1931 		return NULL;
1932 
1933 	struct vnode* mountPoint = NULL;
1934 
1935 	struct fs_mount* mount = vnode->mount;
1936 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1937 		mountPoint = mount->covers_vnode;
1938 		inc_vnode_ref_count(mountPoint);
1939 	}
1940 
1941 	return mountPoint;
1942 }
1943 
1944 
1945 /*!	\brief Gets the directory path and leaf name for a given path.
1946 
1947 	The supplied \a path is transformed to refer to the directory part of
1948 	the entry identified by the original path, and into the buffer \a filename
1949 	the leaf name of the original entry is written.
1950 	Neither the returned path nor the leaf name can be expected to be
1951 	canonical.
1952 
1953 	\param path The path to be analyzed. Must be able to store at least one
1954 		   additional character.
1955 	\param filename The buffer into which the leaf name will be written.
1956 		   Must be of size B_FILE_NAME_LENGTH at least.
1957 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1958 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
1959 		   if the given path name is empty.
1960 */
1961 static status_t
1962 get_dir_path_and_leaf(char* path, char* filename)
1963 {
1964 	if (*path == '\0')
1965 		return B_ENTRY_NOT_FOUND;
1966 
1967 	char* last = strrchr(path, '/');
1968 		// '/' are not allowed in file names!
1969 
1970 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1971 
1972 	if (last == NULL) {
1973 		// this path is single segment with no '/' in it
1974 		// ex. "foo"
1975 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1976 			return B_NAME_TOO_LONG;
1977 
1978 		strcpy(path, ".");
1979 	} else {
1980 		last++;
1981 		if (last[0] == '\0') {
1982 			// special case: the path ends in one or more '/' - remove them
1983 			while (*--last == '/' && last != path);
1984 			last[1] = '\0';
1985 
1986 			if (last == path && last[0] == '/') {
1987 				// This path points to the root of the file system
1988 				strcpy(filename, ".");
1989 				return B_OK;
1990 			}
1991 			for (; last != path && *(last - 1) != '/'; last--);
1992 				// rewind to the start of the leaf before the '/'
1993 		}
1994 
1995 		// normal leaf: replace the leaf portion of the path with a '.'
1996 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1997 			return B_NAME_TOO_LONG;
1998 
1999 		last[0] = '.';
2000 		last[1] = '\0';
2001 	}
2002 	return B_OK;
2003 }
2004 
2005 
2006 static status_t
2007 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2008 	bool traverse, bool kernel, struct vnode** _vnode)
2009 {
2010 	char clonedName[B_FILE_NAME_LENGTH + 1];
2011 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2012 		return B_NAME_TOO_LONG;
2013 
2014 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2015 	struct vnode* directory;
2016 
2017 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2018 	if (status < 0)
2019 		return status;
2020 
2021 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2022 		_vnode, NULL);
2023 }
2024 
2025 
2026 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2027 	and returns the respective vnode.
2028 	On success a reference to the vnode is acquired for the caller.
2029 */
2030 static status_t
2031 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2032 {
2033 	ino_t id;
2034 
2035 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2036 		return get_vnode(dir->device, id, _vnode, true, false);
2037 
2038 	status_t status = FS_CALL(dir, lookup, name, &id);
2039 	if (status != B_OK)
2040 		return status;
2041 
2042 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2043 	// have a reference and just need to look the node up.
2044 	rw_lock_read_lock(&sVnodeLock);
2045 	*_vnode = lookup_vnode(dir->device, id);
2046 	rw_lock_read_unlock(&sVnodeLock);
2047 
2048 	if (*_vnode == NULL) {
2049 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2050 			"0x%Lx)\n", dir->device, id);
2051 		return B_ENTRY_NOT_FOUND;
2052 	}
2053 
2054 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2055 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2056 //		(*_vnode)->mount->id, (*_vnode)->id);
2057 
2058 	return B_OK;
2059 }
2060 
2061 
2062 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2063 	\a path must not be NULL.
2064 	If it returns successfully, \a path contains the name of the last path
2065 	component. This function clobbers the buffer pointed to by \a path only
2066 	if it does contain more than one component.
2067 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2068 	it is successful or not!
2069 */
2070 static status_t
2071 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2072 	int count, struct io_context* ioContext, struct vnode** _vnode,
2073 	ino_t* _parentID)
2074 {
2075 	status_t status = B_OK;
2076 	ino_t lastParentID = vnode->id;
2077 
2078 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2079 
2080 	if (path == NULL) {
2081 		put_vnode(vnode);
2082 		return B_BAD_VALUE;
2083 	}
2084 
2085 	if (*path == '\0') {
2086 		put_vnode(vnode);
2087 		return B_ENTRY_NOT_FOUND;
2088 	}
2089 
2090 	while (true) {
2091 		struct vnode* nextVnode;
2092 		char* nextPath;
2093 
2094 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2095 			path));
2096 
2097 		// done?
2098 		if (path[0] == '\0')
2099 			break;
2100 
2101 		// walk to find the next path component ("path" will point to a single
2102 		// path component), and filter out multiple slashes
2103 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2104 				nextPath++);
2105 
2106 		if (*nextPath == '/') {
2107 			*nextPath = '\0';
2108 			do
2109 				nextPath++;
2110 			while (*nextPath == '/');
2111 		}
2112 
2113 		// See if the '..' is at the root of a mount and move to the covered
2114 		// vnode so we pass the '..' path to the underlying filesystem.
2115 		// Also prevent breaking the root of the IO context.
2116 		if (strcmp("..", path) == 0) {
2117 			if (vnode == ioContext->root) {
2118 				// Attempted prison break! Keep it contained.
2119 				path = nextPath;
2120 				continue;
2121 			} else if (vnode->mount->root_vnode == vnode
2122 				&& vnode->mount->covers_vnode) {
2123 				nextVnode = vnode->mount->covers_vnode;
2124 				inc_vnode_ref_count(nextVnode);
2125 				put_vnode(vnode);
2126 				vnode = nextVnode;
2127 			}
2128 		}
2129 
2130 		// check if vnode is really a directory
2131 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2132 			status = B_NOT_A_DIRECTORY;
2133 
2134 		// Check if we have the right to search the current directory vnode.
2135 		// If a file system doesn't have the access() function, we assume that
2136 		// searching a directory is always allowed
2137 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2138 			status = FS_CALL(vnode, access, X_OK);
2139 
2140 		// Tell the filesystem to get the vnode of this path component (if we
2141 		// got the permission from the call above)
2142 		if (status == B_OK)
2143 			status = lookup_dir_entry(vnode, path, &nextVnode);
2144 
2145 		if (status != B_OK) {
2146 			put_vnode(vnode);
2147 			return status;
2148 		}
2149 
2150 		// If the new node is a symbolic link, resolve it (if we've been told
2151 		// to do it)
2152 		if (S_ISLNK(nextVnode->Type())
2153 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2154 			size_t bufferSize;
2155 			char* buffer;
2156 
2157 			TRACE(("traverse link\n"));
2158 
2159 			// it's not exactly nice style using goto in this way, but hey,
2160 			// it works :-/
2161 			if (count + 1 > B_MAX_SYMLINKS) {
2162 				status = B_LINK_LIMIT;
2163 				goto resolve_link_error;
2164 			}
2165 
2166 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2167 			if (buffer == NULL) {
2168 				status = B_NO_MEMORY;
2169 				goto resolve_link_error;
2170 			}
2171 
2172 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2173 				bufferSize--;
2174 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2175 				// null-terminate
2176 				if (status >= 0)
2177 					buffer[bufferSize] = '\0';
2178 			} else
2179 				status = B_BAD_VALUE;
2180 
2181 			if (status != B_OK) {
2182 				free(buffer);
2183 
2184 		resolve_link_error:
2185 				put_vnode(vnode);
2186 				put_vnode(nextVnode);
2187 
2188 				return status;
2189 			}
2190 			put_vnode(nextVnode);
2191 
2192 			// Check if we start from the root directory or the current
2193 			// directory ("vnode" still points to that one).
2194 			// Cut off all leading slashes if it's the root directory
2195 			path = buffer;
2196 			bool absoluteSymlink = false;
2197 			if (path[0] == '/') {
2198 				// we don't need the old directory anymore
2199 				put_vnode(vnode);
2200 
2201 				while (*++path == '/')
2202 					;
2203 
2204 				mutex_lock(&sIOContextRootLock);
2205 				vnode = ioContext->root;
2206 				inc_vnode_ref_count(vnode);
2207 				mutex_unlock(&sIOContextRootLock);
2208 
2209 				absoluteSymlink = true;
2210 			}
2211 
2212 			inc_vnode_ref_count(vnode);
2213 				// balance the next recursion - we will decrement the
2214 				// ref_count of the vnode, no matter if we succeeded or not
2215 
2216 			if (absoluteSymlink && *path == '\0') {
2217 				// symlink was just "/"
2218 				nextVnode = vnode;
2219 			} else {
2220 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2221 					ioContext, &nextVnode, &lastParentID);
2222 			}
2223 
2224 			free(buffer);
2225 
2226 			if (status != B_OK) {
2227 				put_vnode(vnode);
2228 				return status;
2229 			}
2230 		} else
2231 			lastParentID = vnode->id;
2232 
2233 		// decrease the ref count on the old dir we just looked up into
2234 		put_vnode(vnode);
2235 
2236 		path = nextPath;
2237 		vnode = nextVnode;
2238 
2239 		// see if we hit a mount point
2240 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2241 		if (mountPoint) {
2242 			put_vnode(vnode);
2243 			vnode = mountPoint;
2244 		}
2245 	}
2246 
2247 	*_vnode = vnode;
2248 	if (_parentID)
2249 		*_parentID = lastParentID;
2250 
2251 	return B_OK;
2252 }
2253 
2254 
2255 static status_t
2256 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2257 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2258 {
2259 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2260 		get_current_io_context(kernel), _vnode, _parentID);
2261 }
2262 
2263 
2264 static status_t
2265 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2266 	ino_t* _parentID, bool kernel)
2267 {
2268 	struct vnode* start = NULL;
2269 
2270 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2271 
2272 	if (!path)
2273 		return B_BAD_VALUE;
2274 
2275 	if (*path == '\0')
2276 		return B_ENTRY_NOT_FOUND;
2277 
2278 	// figure out if we need to start at root or at cwd
2279 	if (*path == '/') {
2280 		if (sRoot == NULL) {
2281 			// we're a bit early, aren't we?
2282 			return B_ERROR;
2283 		}
2284 
2285 		while (*++path == '/')
2286 			;
2287 		start = get_root_vnode(kernel);
2288 
2289 		if (*path == '\0') {
2290 			*_vnode = start;
2291 			return B_OK;
2292 		}
2293 
2294 	} else {
2295 		struct io_context* context = get_current_io_context(kernel);
2296 
2297 		mutex_lock(&context->io_mutex);
2298 		start = context->cwd;
2299 		if (start != NULL)
2300 			inc_vnode_ref_count(start);
2301 		mutex_unlock(&context->io_mutex);
2302 
2303 		if (start == NULL)
2304 			return B_ERROR;
2305 	}
2306 
2307 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2308 		_parentID);
2309 }
2310 
2311 
2312 /*! Returns the vnode in the next to last segment of the path, and returns
2313 	the last portion in filename.
2314 	The path buffer must be able to store at least one additional character.
2315 */
2316 static status_t
2317 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2318 	bool kernel)
2319 {
2320 	status_t status = get_dir_path_and_leaf(path, filename);
2321 	if (status != B_OK)
2322 		return status;
2323 
2324 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2325 }
2326 
2327 
2328 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2329 		   to by a FD + path pair.
2330 
2331 	\a path must be given in either case. \a fd might be omitted, in which
2332 	case \a path is either an absolute path or one relative to the current
2333 	directory. If both a supplied and \a path is relative it is reckoned off
2334 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2335 	ignored.
2336 
2337 	The caller has the responsibility to call put_vnode() on the returned
2338 	directory vnode.
2339 
2340 	\param fd The FD. May be < 0.
2341 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2342 	       is modified by this function. It must have at least room for a
2343 	       string one character longer than the path it contains.
2344 	\param _vnode A pointer to a variable the directory vnode shall be written
2345 		   into.
2346 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2347 		   the leaf name of the specified entry will be written.
2348 	\param kernel \c true, if invoked from inside the kernel, \c false if
2349 		   invoked from userland.
2350 	\return \c B_OK, if everything went fine, another error code otherwise.
2351 */
2352 static status_t
2353 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2354 	char* filename, bool kernel)
2355 {
2356 	if (!path)
2357 		return B_BAD_VALUE;
2358 	if (*path == '\0')
2359 		return B_ENTRY_NOT_FOUND;
2360 	if (fd < 0)
2361 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2362 
2363 	status_t status = get_dir_path_and_leaf(path, filename);
2364 	if (status != B_OK)
2365 		return status;
2366 
2367 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2368 }
2369 
2370 
2371 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2372 		   to by a vnode + path pair.
2373 
2374 	\a path must be given in either case. \a vnode might be omitted, in which
2375 	case \a path is either an absolute path or one relative to the current
2376 	directory. If both a supplied and \a path is relative it is reckoned off
2377 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2378 	ignored.
2379 
2380 	The caller has the responsibility to call put_vnode() on the returned
2381 	directory vnode.
2382 
2383 	\param vnode The vnode. May be \c NULL.
2384 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2385 	       is modified by this function. It must have at least room for a
2386 	       string one character longer than the path it contains.
2387 	\param _vnode A pointer to a variable the directory vnode shall be written
2388 		   into.
2389 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2390 		   the leaf name of the specified entry will be written.
2391 	\param kernel \c true, if invoked from inside the kernel, \c false if
2392 		   invoked from userland.
2393 	\return \c B_OK, if everything went fine, another error code otherwise.
2394 */
2395 static status_t
2396 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2397 	struct vnode** _vnode, char* filename, bool kernel)
2398 {
2399 	if (!path)
2400 		return B_BAD_VALUE;
2401 	if (*path == '\0')
2402 		return B_ENTRY_NOT_FOUND;
2403 	if (vnode == NULL || path[0] == '/')
2404 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2405 
2406 	status_t status = get_dir_path_and_leaf(path, filename);
2407 	if (status != B_OK)
2408 		return status;
2409 
2410 	inc_vnode_ref_count(vnode);
2411 		// vnode_path_to_vnode() always decrements the ref count
2412 
2413 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2414 }
2415 
2416 
2417 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2418 */
2419 static status_t
2420 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2421 	size_t bufferSize, struct io_context* ioContext)
2422 {
2423 	if (bufferSize < sizeof(struct dirent))
2424 		return B_BAD_VALUE;
2425 
2426 	// See if vnode is the root of a mount and move to the covered
2427 	// vnode so we get the underlying file system
2428 	VNodePutter vnodePutter;
2429 	if (vnode->mount->root_vnode == vnode
2430 		&& vnode->mount->covers_vnode != NULL) {
2431 		vnode = vnode->mount->covers_vnode;
2432 		inc_vnode_ref_count(vnode);
2433 		vnodePutter.SetTo(vnode);
2434 	}
2435 
2436 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2437 		// The FS supports getting the name of a vnode.
2438 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2439 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2440 			return B_OK;
2441 	}
2442 
2443 	// The FS doesn't support getting the name of a vnode. So we search the
2444 	// parent directory for the vnode, if the caller let us.
2445 
2446 	if (parent == NULL)
2447 		return B_UNSUPPORTED;
2448 
2449 	void* cookie;
2450 
2451 	status_t status = FS_CALL(parent, open_dir, &cookie);
2452 	if (status >= B_OK) {
2453 		while (true) {
2454 			uint32 num = 1;
2455 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2456 				&num);
2457 			if (status != B_OK)
2458 				break;
2459 			if (num == 0) {
2460 				status = B_ENTRY_NOT_FOUND;
2461 				break;
2462 			}
2463 
2464 			if (vnode->id == buffer->d_ino) {
2465 				// found correct entry!
2466 				break;
2467 			}
2468 		}
2469 
2470 		FS_CALL(vnode, close_dir, cookie);
2471 		FS_CALL(vnode, free_dir_cookie, cookie);
2472 	}
2473 	return status;
2474 }
2475 
2476 
2477 static status_t
2478 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2479 	size_t nameSize, bool kernel)
2480 {
2481 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2482 	struct dirent* dirent = (struct dirent*)buffer;
2483 
2484 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2485 		get_current_io_context(kernel));
2486 	if (status != B_OK)
2487 		return status;
2488 
2489 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2490 		return B_BUFFER_OVERFLOW;
2491 
2492 	return B_OK;
2493 }
2494 
2495 
2496 /*!	Gets the full path to a given directory vnode.
2497 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2498 	file system doesn't support this call, it will fall back to iterating
2499 	through the parent directory to get the name of the child.
2500 
2501 	To protect against circular loops, it supports a maximum tree depth
2502 	of 256 levels.
2503 
2504 	Note that the path may not be correct the time this function returns!
2505 	It doesn't use any locking to prevent returning the correct path, as
2506 	paths aren't safe anyway: the path to a file can change at any time.
2507 
2508 	It might be a good idea, though, to check if the returned path exists
2509 	in the calling function (it's not done here because of efficiency)
2510 */
2511 static status_t
2512 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2513 	bool kernel)
2514 {
2515 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2516 
2517 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2518 		return B_BAD_VALUE;
2519 
2520 	if (!S_ISDIR(vnode->Type()))
2521 		return B_NOT_A_DIRECTORY;
2522 
2523 	char* path = buffer;
2524 	int32 insert = bufferSize;
2525 	int32 maxLevel = 256;
2526 	int32 length;
2527 	status_t status;
2528 	struct io_context* ioContext = get_current_io_context(kernel);
2529 
2530 	// we don't use get_vnode() here because this call is more
2531 	// efficient and does all we need from get_vnode()
2532 	inc_vnode_ref_count(vnode);
2533 
2534 	if (vnode != ioContext->root) {
2535 		// we don't hit the IO context root
2536 		// resolve a volume root to its mount point
2537 		struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2538 		if (mountPoint) {
2539 			put_vnode(vnode);
2540 			vnode = mountPoint;
2541 		}
2542 	}
2543 
2544 	path[--insert] = '\0';
2545 		// the path is filled right to left
2546 
2547 	while (true) {
2548 		// the name buffer is also used for fs_read_dir()
2549 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2550 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2551 		struct vnode* parentVnode;
2552 		ino_t parentID;
2553 
2554 		// lookup the parent vnode
2555 		if (vnode == ioContext->root) {
2556 			// we hit the IO context root
2557 			parentVnode = vnode;
2558 			inc_vnode_ref_count(vnode);
2559 		} else {
2560 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2561 			if (status != B_OK)
2562 				goto out;
2563 		}
2564 
2565 		// get the node's name
2566 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2567 			sizeof(nameBuffer), ioContext);
2568 
2569 		if (vnode != ioContext->root) {
2570 			// we don't hit the IO context root
2571 			// resolve a volume root to its mount point
2572 			struct vnode* mountPoint
2573 				= resolve_volume_root_to_mount_point(parentVnode);
2574 			if (mountPoint) {
2575 				put_vnode(parentVnode);
2576 				parentVnode = mountPoint;
2577 				parentID = parentVnode->id;
2578 			}
2579 		}
2580 
2581 		bool hitRoot = (parentVnode == vnode);
2582 
2583 		// release the current vnode, we only need its parent from now on
2584 		put_vnode(vnode);
2585 		vnode = parentVnode;
2586 
2587 		if (status != B_OK)
2588 			goto out;
2589 
2590 		if (hitRoot) {
2591 			// we have reached "/", which means we have constructed the full
2592 			// path
2593 			break;
2594 		}
2595 
2596 		// TODO: add an explicit check for loops in about 10 levels to do
2597 		// real loop detection
2598 
2599 		// don't go deeper as 'maxLevel' to prevent circular loops
2600 		if (maxLevel-- < 0) {
2601 			status = B_LINK_LIMIT;
2602 			goto out;
2603 		}
2604 
2605 		// add the name in front of the current path
2606 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2607 		length = strlen(name);
2608 		insert -= length;
2609 		if (insert <= 0) {
2610 			status = B_RESULT_NOT_REPRESENTABLE;
2611 			goto out;
2612 		}
2613 		memcpy(path + insert, name, length);
2614 		path[--insert] = '/';
2615 	}
2616 
2617 	// the root dir will result in an empty path: fix it
2618 	if (path[insert] == '\0')
2619 		path[--insert] = '/';
2620 
2621 	TRACE(("  path is: %s\n", path + insert));
2622 
2623 	// move the path to the start of the buffer
2624 	length = bufferSize - insert;
2625 	memmove(buffer, path + insert, length);
2626 
2627 out:
2628 	put_vnode(vnode);
2629 	return status;
2630 }
2631 
2632 
2633 /*!	Checks the length of every path component, and adds a '.'
2634 	if the path ends in a slash.
2635 	The given path buffer must be able to store at least one
2636 	additional character.
2637 */
2638 static status_t
2639 check_path(char* to)
2640 {
2641 	int32 length = 0;
2642 
2643 	// check length of every path component
2644 
2645 	while (*to) {
2646 		char* begin;
2647 		if (*to == '/')
2648 			to++, length++;
2649 
2650 		begin = to;
2651 		while (*to != '/' && *to)
2652 			to++, length++;
2653 
2654 		if (to - begin > B_FILE_NAME_LENGTH)
2655 			return B_NAME_TOO_LONG;
2656 	}
2657 
2658 	if (length == 0)
2659 		return B_ENTRY_NOT_FOUND;
2660 
2661 	// complete path if there is a slash at the end
2662 
2663 	if (*(to - 1) == '/') {
2664 		if (length > B_PATH_NAME_LENGTH - 2)
2665 			return B_NAME_TOO_LONG;
2666 
2667 		to[0] = '.';
2668 		to[1] = '\0';
2669 	}
2670 
2671 	return B_OK;
2672 }
2673 
2674 
2675 static struct file_descriptor*
2676 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2677 {
2678 	struct file_descriptor* descriptor
2679 		= get_fd(get_current_io_context(kernel), fd);
2680 	if (descriptor == NULL)
2681 		return NULL;
2682 
2683 	struct vnode* vnode = fd_vnode(descriptor);
2684 	if (vnode == NULL) {
2685 		put_fd(descriptor);
2686 		return NULL;
2687 	}
2688 
2689 	// ToDo: when we can close a file descriptor at any point, investigate
2690 	//	if this is still valid to do (accessing the vnode without ref_count
2691 	//	or locking)
2692 	*_vnode = vnode;
2693 	return descriptor;
2694 }
2695 
2696 
2697 static struct vnode*
2698 get_vnode_from_fd(int fd, bool kernel)
2699 {
2700 	struct file_descriptor* descriptor;
2701 	struct vnode* vnode;
2702 
2703 	descriptor = get_fd(get_current_io_context(kernel), fd);
2704 	if (descriptor == NULL)
2705 		return NULL;
2706 
2707 	vnode = fd_vnode(descriptor);
2708 	if (vnode != NULL)
2709 		inc_vnode_ref_count(vnode);
2710 
2711 	put_fd(descriptor);
2712 	return vnode;
2713 }
2714 
2715 
2716 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2717 	only the path will be considered. In this case, the \a path must not be
2718 	NULL.
2719 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2720 	and should be NULL for files.
2721 */
2722 static status_t
2723 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2724 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2725 {
2726 	if (fd < 0 && !path)
2727 		return B_BAD_VALUE;
2728 
2729 	if (path != NULL && *path == '\0')
2730 		return B_ENTRY_NOT_FOUND;
2731 
2732 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2733 		// no FD or absolute path
2734 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2735 	}
2736 
2737 	// FD only, or FD + relative path
2738 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2739 	if (!vnode)
2740 		return B_FILE_ERROR;
2741 
2742 	if (path != NULL) {
2743 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2744 			_vnode, _parentID);
2745 	}
2746 
2747 	// there is no relative path to take into account
2748 
2749 	*_vnode = vnode;
2750 	if (_parentID)
2751 		*_parentID = -1;
2752 
2753 	return B_OK;
2754 }
2755 
2756 
2757 static int
2758 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2759 	void* cookie, int openMode, bool kernel)
2760 {
2761 	struct file_descriptor* descriptor;
2762 	int fd;
2763 
2764 	// If the vnode is locked, we don't allow creating a new file/directory
2765 	// file_descriptor for it
2766 	if (vnode && vnode->mandatory_locked_by != NULL
2767 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2768 		return B_BUSY;
2769 
2770 	descriptor = alloc_fd();
2771 	if (!descriptor)
2772 		return B_NO_MEMORY;
2773 
2774 	if (vnode)
2775 		descriptor->u.vnode = vnode;
2776 	else
2777 		descriptor->u.mount = mount;
2778 	descriptor->cookie = cookie;
2779 
2780 	switch (type) {
2781 		// vnode types
2782 		case FDTYPE_FILE:
2783 			descriptor->ops = &sFileOps;
2784 			break;
2785 		case FDTYPE_DIR:
2786 			descriptor->ops = &sDirectoryOps;
2787 			break;
2788 		case FDTYPE_ATTR:
2789 			descriptor->ops = &sAttributeOps;
2790 			break;
2791 		case FDTYPE_ATTR_DIR:
2792 			descriptor->ops = &sAttributeDirectoryOps;
2793 			break;
2794 
2795 		// mount types
2796 		case FDTYPE_INDEX_DIR:
2797 			descriptor->ops = &sIndexDirectoryOps;
2798 			break;
2799 		case FDTYPE_QUERY:
2800 			descriptor->ops = &sQueryOps;
2801 			break;
2802 
2803 		default:
2804 			panic("get_new_fd() called with unknown type %d\n", type);
2805 			break;
2806 	}
2807 	descriptor->type = type;
2808 	descriptor->open_mode = openMode;
2809 
2810 	io_context* context = get_current_io_context(kernel);
2811 	fd = new_fd(context, descriptor);
2812 	if (fd < 0) {
2813 		free(descriptor);
2814 		return B_NO_MORE_FDS;
2815 	}
2816 
2817 	mutex_lock(&context->io_mutex);
2818 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2819 	mutex_unlock(&context->io_mutex);
2820 
2821 	return fd;
2822 }
2823 
2824 
2825 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2826 	vfs_normalize_path(). See there for more documentation.
2827 */
2828 static status_t
2829 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2830 {
2831 	VNodePutter dirPutter;
2832 	struct vnode* dir = NULL;
2833 	status_t error;
2834 
2835 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2836 		// get dir vnode + leaf name
2837 		struct vnode* nextDir;
2838 		char leaf[B_FILE_NAME_LENGTH];
2839 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2840 		if (error != B_OK)
2841 			return error;
2842 
2843 		dir = nextDir;
2844 		strcpy(path, leaf);
2845 		dirPutter.SetTo(dir);
2846 
2847 		// get file vnode, if we shall resolve links
2848 		bool fileExists = false;
2849 		struct vnode* fileVnode;
2850 		VNodePutter fileVnodePutter;
2851 		if (traverseLink) {
2852 			inc_vnode_ref_count(dir);
2853 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2854 					NULL) == B_OK) {
2855 				fileVnodePutter.SetTo(fileVnode);
2856 				fileExists = true;
2857 			}
2858 		}
2859 
2860 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2861 			// we're done -- construct the path
2862 			bool hasLeaf = true;
2863 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2864 				// special cases "." and ".." -- get the dir, forget the leaf
2865 				inc_vnode_ref_count(dir);
2866 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2867 					&nextDir, NULL);
2868 				if (error != B_OK)
2869 					return error;
2870 				dir = nextDir;
2871 				dirPutter.SetTo(dir);
2872 				hasLeaf = false;
2873 			}
2874 
2875 			// get the directory path
2876 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2877 			if (error != B_OK)
2878 				return error;
2879 
2880 			// append the leaf name
2881 			if (hasLeaf) {
2882 				// insert a directory separator if this is not the file system
2883 				// root
2884 				if ((strcmp(path, "/") != 0
2885 					&& strlcat(path, "/", pathSize) >= pathSize)
2886 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2887 					return B_NAME_TOO_LONG;
2888 				}
2889 			}
2890 
2891 			return B_OK;
2892 		}
2893 
2894 		// read link
2895 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2896 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2897 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2898 			if (error != B_OK)
2899 				return error;
2900 			path[bufferSize] = '\0';
2901 		} else
2902 			return B_BAD_VALUE;
2903 	}
2904 
2905 	return B_LINK_LIMIT;
2906 }
2907 
2908 
2909 #ifdef ADD_DEBUGGER_COMMANDS
2910 
2911 
2912 static void
2913 _dump_advisory_locking(advisory_locking* locking)
2914 {
2915 	if (locking == NULL)
2916 		return;
2917 
2918 	kprintf("   lock:        %ld", locking->lock);
2919 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2920 
2921 	int32 index = 0;
2922 	LockList::Iterator iterator = locking->locks.GetIterator();
2923 	while (iterator.HasNext()) {
2924 		struct advisory_lock* lock = iterator.Next();
2925 
2926 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2927 		kprintf("        start:  %Ld\n", lock->start);
2928 		kprintf("        end:    %Ld\n", lock->end);
2929 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2930 	}
2931 }
2932 
2933 
2934 static void
2935 _dump_mount(struct fs_mount* mount)
2936 {
2937 	kprintf("MOUNT: %p\n", mount);
2938 	kprintf(" id:            %ld\n", mount->id);
2939 	kprintf(" device_name:   %s\n", mount->device_name);
2940 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2941 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2942 	kprintf(" partition:     %p\n", mount->partition);
2943 	kprintf(" lock:          %p\n", &mount->rlock);
2944 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2945 		mount->owns_file_device ? " owns_file_device" : "");
2946 
2947 	fs_volume* volume = mount->volume;
2948 	while (volume != NULL) {
2949 		kprintf(" volume %p:\n", volume);
2950 		kprintf("  layer:            %ld\n", volume->layer);
2951 		kprintf("  private_volume:   %p\n", volume->private_volume);
2952 		kprintf("  ops:              %p\n", volume->ops);
2953 		kprintf("  file_system:      %p\n", volume->file_system);
2954 		kprintf("  file_system_name: %s\n", volume->file_system_name);
2955 		volume = volume->super_volume;
2956 	}
2957 
2958 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
2959 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2960 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
2961 	set_debug_variable("_partition", (addr_t)mount->partition);
2962 }
2963 
2964 
2965 static bool
2966 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
2967 	const char* name)
2968 {
2969 	bool insertSlash = buffer[bufferSize] != '\0';
2970 	size_t nameLength = strlen(name);
2971 
2972 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
2973 		return false;
2974 
2975 	if (insertSlash)
2976 		buffer[--bufferSize] = '/';
2977 
2978 	bufferSize -= nameLength;
2979 	memcpy(buffer + bufferSize, name, nameLength);
2980 
2981 	return true;
2982 }
2983 
2984 
2985 static bool
2986 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
2987 	ino_t nodeID)
2988 {
2989 	if (bufferSize == 0)
2990 		return false;
2991 
2992 	bool insertSlash = buffer[bufferSize] != '\0';
2993 	if (insertSlash)
2994 		buffer[--bufferSize] = '/';
2995 
2996 	size_t size = snprintf(buffer, bufferSize,
2997 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
2998 	if (size > bufferSize) {
2999 		if (insertSlash)
3000 			bufferSize++;
3001 		return false;
3002 	}
3003 
3004 	if (size < bufferSize)
3005 		memmove(buffer + bufferSize - size, buffer, size);
3006 
3007 	bufferSize -= size;
3008 	return true;
3009 }
3010 
3011 
3012 static char*
3013 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3014 	bool& _truncated)
3015 {
3016 	// null-terminate the path
3017 	buffer[--bufferSize] = '\0';
3018 
3019 	while (true) {
3020 		while (vnode->mount->root_vnode == vnode
3021 				&& vnode->mount->covers_vnode != NULL) {
3022 			vnode = vnode->mount->covers_vnode;
3023 		}
3024 
3025 		if (vnode == sRoot) {
3026 			_truncated = bufferSize == 0;
3027 			if (!_truncated)
3028 				buffer[--bufferSize] = '/';
3029 			return buffer + bufferSize;
3030 		}
3031 
3032 		// resolve the name
3033 		ino_t dirID;
3034 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3035 			vnode->id, dirID);
3036 		if (name == NULL) {
3037 			// Failed to resolve the name -- prepend "<dev,node>/".
3038 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3039 				vnode->mount->id, vnode->id);
3040 			return buffer + bufferSize;
3041 		}
3042 
3043 		// prepend the name
3044 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3045 			_truncated = true;
3046 			return buffer + bufferSize;
3047 		}
3048 
3049 		// resolve the directory node
3050 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3051 		if (nextVnode == NULL) {
3052 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3053 				vnode->mount->id, dirID);
3054 			return buffer + bufferSize;
3055 		}
3056 
3057 		vnode = nextVnode;
3058 	}
3059 }
3060 
3061 
3062 static void
3063 _dump_vnode(struct vnode* vnode, bool printPath)
3064 {
3065 	kprintf("VNODE: %p\n", vnode);
3066 	kprintf(" device:        %ld\n", vnode->device);
3067 	kprintf(" id:            %Ld\n", vnode->id);
3068 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3069 	kprintf(" private_node:  %p\n", vnode->private_node);
3070 	kprintf(" mount:         %p\n", vnode->mount);
3071 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3072 	kprintf(" cache:         %p\n", vnode->cache);
3073 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3074 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3075 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3076 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3077 
3078 	_dump_advisory_locking(vnode->advisory_locking);
3079 
3080 	if (printPath) {
3081 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3082 		if (buffer != NULL) {
3083 			bool truncated;
3084 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3085 				B_PATH_NAME_LENGTH, truncated);
3086 			if (path != NULL) {
3087 				kprintf(" path:          ");
3088 				if (truncated)
3089 					kputs("<truncated>/");
3090 				kputs(path);
3091 				kputs("\n");
3092 			} else
3093 				kprintf("Failed to resolve vnode path.\n");
3094 
3095 			debug_free(buffer);
3096 		} else
3097 			kprintf("Failed to allocate memory for constructing the path.\n");
3098 	}
3099 
3100 	set_debug_variable("_node", (addr_t)vnode->private_node);
3101 	set_debug_variable("_mount", (addr_t)vnode->mount);
3102 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3103 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3104 }
3105 
3106 
3107 static int
3108 dump_mount(int argc, char** argv)
3109 {
3110 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3111 		kprintf("usage: %s [id|address]\n", argv[0]);
3112 		return 0;
3113 	}
3114 
3115 	uint32 id = parse_expression(argv[1]);
3116 	struct fs_mount* mount = NULL;
3117 
3118 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3119 	if (mount == NULL) {
3120 		if (IS_USER_ADDRESS(id)) {
3121 			kprintf("fs_mount not found\n");
3122 			return 0;
3123 		}
3124 		mount = (fs_mount*)id;
3125 	}
3126 
3127 	_dump_mount(mount);
3128 	return 0;
3129 }
3130 
3131 
3132 static int
3133 dump_mounts(int argc, char** argv)
3134 {
3135 	if (argc != 1) {
3136 		kprintf("usage: %s\n", argv[0]);
3137 		return 0;
3138 	}
3139 
3140 	kprintf("address     id root       covers     cookie     fs_name\n");
3141 
3142 	struct hash_iterator iterator;
3143 	struct fs_mount* mount;
3144 
3145 	hash_open(sMountsTable, &iterator);
3146 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3147 			!= NULL) {
3148 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3149 			mount->covers_vnode, mount->volume->private_volume,
3150 			mount->volume->file_system_name);
3151 
3152 		fs_volume* volume = mount->volume;
3153 		while (volume->super_volume != NULL) {
3154 			volume = volume->super_volume;
3155 			kprintf("                                     %p %s\n",
3156 				volume->private_volume, volume->file_system_name);
3157 		}
3158 	}
3159 
3160 	hash_close(sMountsTable, &iterator, false);
3161 	return 0;
3162 }
3163 
3164 
3165 static int
3166 dump_vnode(int argc, char** argv)
3167 {
3168 	bool printPath = false;
3169 	int argi = 1;
3170 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3171 		printPath = true;
3172 		argi++;
3173 	}
3174 
3175 	if (argi >= argc || argi + 2 < argc) {
3176 		print_debugger_command_usage(argv[0]);
3177 		return 0;
3178 	}
3179 
3180 	struct vnode* vnode = NULL;
3181 
3182 	if (argi + 1 == argc) {
3183 		vnode = (struct vnode*)parse_expression(argv[argi]);
3184 		if (IS_USER_ADDRESS(vnode)) {
3185 			kprintf("invalid vnode address\n");
3186 			return 0;
3187 		}
3188 		_dump_vnode(vnode, printPath);
3189 		return 0;
3190 	}
3191 
3192 	struct hash_iterator iterator;
3193 	dev_t device = parse_expression(argv[argi]);
3194 	ino_t id = parse_expression(argv[argi + 1]);
3195 
3196 	hash_open(sVnodeTable, &iterator);
3197 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3198 		if (vnode->id != id || vnode->device != device)
3199 			continue;
3200 
3201 		_dump_vnode(vnode, printPath);
3202 	}
3203 
3204 	hash_close(sVnodeTable, &iterator, false);
3205 	return 0;
3206 }
3207 
3208 
3209 static int
3210 dump_vnodes(int argc, char** argv)
3211 {
3212 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3213 		kprintf("usage: %s [device]\n", argv[0]);
3214 		return 0;
3215 	}
3216 
3217 	// restrict dumped nodes to a certain device if requested
3218 	dev_t device = parse_expression(argv[1]);
3219 
3220 	struct hash_iterator iterator;
3221 	struct vnode* vnode;
3222 
3223 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3224 		"flags\n");
3225 
3226 	hash_open(sVnodeTable, &iterator);
3227 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3228 		if (vnode->device != device)
3229 			continue;
3230 
3231 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3232 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3233 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3234 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3235 	}
3236 
3237 	hash_close(sVnodeTable, &iterator, false);
3238 	return 0;
3239 }
3240 
3241 
3242 static int
3243 dump_vnode_caches(int argc, char** argv)
3244 {
3245 	struct hash_iterator iterator;
3246 	struct vnode* vnode;
3247 
3248 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3249 		kprintf("usage: %s [device]\n", argv[0]);
3250 		return 0;
3251 	}
3252 
3253 	// restrict dumped nodes to a certain device if requested
3254 	dev_t device = -1;
3255 	if (argc > 1)
3256 		device = parse_expression(argv[1]);
3257 
3258 	kprintf("address    dev     inode cache          size   pages\n");
3259 
3260 	hash_open(sVnodeTable, &iterator);
3261 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3262 		if (vnode->cache == NULL)
3263 			continue;
3264 		if (device != -1 && vnode->device != device)
3265 			continue;
3266 
3267 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3268 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3269 				/ B_PAGE_SIZE, vnode->cache->page_count);
3270 	}
3271 
3272 	hash_close(sVnodeTable, &iterator, false);
3273 	return 0;
3274 }
3275 
3276 
3277 int
3278 dump_io_context(int argc, char** argv)
3279 {
3280 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3281 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3282 		return 0;
3283 	}
3284 
3285 	struct io_context* context = NULL;
3286 
3287 	if (argc > 1) {
3288 		uint32 num = parse_expression(argv[1]);
3289 		if (IS_KERNEL_ADDRESS(num))
3290 			context = (struct io_context*)num;
3291 		else {
3292 			Team* team = team_get_team_struct_locked(num);
3293 			if (team == NULL) {
3294 				kprintf("could not find team with ID %ld\n", num);
3295 				return 0;
3296 			}
3297 			context = (struct io_context*)team->io_context;
3298 		}
3299 	} else
3300 		context = get_current_io_context(true);
3301 
3302 	kprintf("I/O CONTEXT: %p\n", context);
3303 	kprintf(" root vnode:\t%p\n", context->root);
3304 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3305 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3306 	kprintf(" max fds:\t%lu\n", context->table_size);
3307 
3308 	if (context->num_used_fds)
3309 		kprintf("   no.  type         ops  ref  open  mode         pos"
3310 			"      cookie\n");
3311 
3312 	for (uint32 i = 0; i < context->table_size; i++) {
3313 		struct file_descriptor* fd = context->fds[i];
3314 		if (fd == NULL)
3315 			continue;
3316 
3317 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3318 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3319 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3320 			fd->pos, fd->cookie,
3321 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3322 				? "mount" : "vnode",
3323 			fd->u.vnode);
3324 	}
3325 
3326 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3327 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3328 
3329 	set_debug_variable("_cwd", (addr_t)context->cwd);
3330 
3331 	return 0;
3332 }
3333 
3334 
3335 int
3336 dump_vnode_usage(int argc, char** argv)
3337 {
3338 	if (argc != 1) {
3339 		kprintf("usage: %s\n", argv[0]);
3340 		return 0;
3341 	}
3342 
3343 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3344 		kMaxUnusedVnodes);
3345 
3346 	struct hash_iterator iterator;
3347 	hash_open(sVnodeTable, &iterator);
3348 
3349 	uint32 count = 0;
3350 	struct vnode* vnode;
3351 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3352 		count++;
3353 	}
3354 
3355 	hash_close(sVnodeTable, &iterator, false);
3356 
3357 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3358 	return 0;
3359 }
3360 
3361 #endif	// ADD_DEBUGGER_COMMANDS
3362 
3363 /*!	Clears an iovec array of physical pages.
3364 	Returns in \a _bytes the number of bytes successfully cleared.
3365 */
3366 static status_t
3367 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3368 {
3369 	size_t bytes = *_bytes;
3370 	size_t index = 0;
3371 
3372 	while (bytes > 0) {
3373 		size_t length = min_c(vecs[index].iov_len, bytes);
3374 
3375 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3376 			length);
3377 		if (status != B_OK) {
3378 			*_bytes -= bytes;
3379 			return status;
3380 		}
3381 
3382 		bytes -= length;
3383 	}
3384 
3385 	return B_OK;
3386 }
3387 
3388 
3389 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3390 	and calls the file system hooks to read/write the request to disk.
3391 */
3392 static status_t
3393 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3394 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3395 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3396 	bool doWrite)
3397 {
3398 	if (fileVecCount == 0) {
3399 		// There are no file vecs at this offset, so we're obviously trying
3400 		// to access the file outside of its bounds
3401 		return B_BAD_VALUE;
3402 	}
3403 
3404 	size_t numBytes = *_numBytes;
3405 	uint32 fileVecIndex;
3406 	size_t vecOffset = *_vecOffset;
3407 	uint32 vecIndex = *_vecIndex;
3408 	status_t status;
3409 	size_t size;
3410 
3411 	if (!doWrite && vecOffset == 0) {
3412 		// now directly read the data from the device
3413 		// the first file_io_vec can be read directly
3414 
3415 		if (fileVecs[0].length < numBytes)
3416 			size = fileVecs[0].length;
3417 		else
3418 			size = numBytes;
3419 
3420 		if (fileVecs[0].offset >= 0) {
3421 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3422 				&vecs[vecIndex], vecCount - vecIndex, &size);
3423 		} else {
3424 			// sparse read
3425 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3426 		}
3427 		if (status != B_OK)
3428 			return status;
3429 
3430 		// TODO: this is a work-around for buggy device drivers!
3431 		//	When our own drivers honour the length, we can:
3432 		//	a) also use this direct I/O for writes (otherwise, it would
3433 		//	   overwrite precious data)
3434 		//	b) panic if the term below is true (at least for writes)
3435 		if (size > fileVecs[0].length) {
3436 			//dprintf("warning: device driver %p doesn't respect total length "
3437 			//	"in read_pages() call!\n", ref->device);
3438 			size = fileVecs[0].length;
3439 		}
3440 
3441 		ASSERT(size <= fileVecs[0].length);
3442 
3443 		// If the file portion was contiguous, we're already done now
3444 		if (size == numBytes)
3445 			return B_OK;
3446 
3447 		// if we reached the end of the file, we can return as well
3448 		if (size != fileVecs[0].length) {
3449 			*_numBytes = size;
3450 			return B_OK;
3451 		}
3452 
3453 		fileVecIndex = 1;
3454 
3455 		// first, find out where we have to continue in our iovecs
3456 		for (; vecIndex < vecCount; vecIndex++) {
3457 			if (size < vecs[vecIndex].iov_len)
3458 				break;
3459 
3460 			size -= vecs[vecIndex].iov_len;
3461 		}
3462 
3463 		vecOffset = size;
3464 	} else {
3465 		fileVecIndex = 0;
3466 		size = 0;
3467 	}
3468 
3469 	// Too bad, let's process the rest of the file_io_vecs
3470 
3471 	size_t totalSize = size;
3472 	size_t bytesLeft = numBytes - size;
3473 
3474 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3475 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3476 		off_t fileOffset = fileVec.offset;
3477 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3478 
3479 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3480 
3481 		// process the complete fileVec
3482 		while (fileLeft > 0) {
3483 			iovec tempVecs[MAX_TEMP_IO_VECS];
3484 			uint32 tempCount = 0;
3485 
3486 			// size tracks how much of what is left of the current fileVec
3487 			// (fileLeft) has been assigned to tempVecs
3488 			size = 0;
3489 
3490 			// assign what is left of the current fileVec to the tempVecs
3491 			for (size = 0; size < fileLeft && vecIndex < vecCount
3492 					&& tempCount < MAX_TEMP_IO_VECS;) {
3493 				// try to satisfy one iovec per iteration (or as much as
3494 				// possible)
3495 
3496 				// bytes left of the current iovec
3497 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3498 				if (vecLeft == 0) {
3499 					vecOffset = 0;
3500 					vecIndex++;
3501 					continue;
3502 				}
3503 
3504 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3505 					vecIndex, vecOffset, size));
3506 
3507 				// actually available bytes
3508 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3509 
3510 				tempVecs[tempCount].iov_base
3511 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3512 				tempVecs[tempCount].iov_len = tempVecSize;
3513 				tempCount++;
3514 
3515 				size += tempVecSize;
3516 				vecOffset += tempVecSize;
3517 			}
3518 
3519 			size_t bytes = size;
3520 
3521 			if (fileOffset == -1) {
3522 				if (doWrite) {
3523 					panic("sparse write attempt: vnode %p", vnode);
3524 					status = B_IO_ERROR;
3525 				} else {
3526 					// sparse read
3527 					status = zero_pages(tempVecs, tempCount, &bytes);
3528 				}
3529 			} else if (doWrite) {
3530 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3531 					tempVecs, tempCount, &bytes);
3532 			} else {
3533 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3534 					tempVecs, tempCount, &bytes);
3535 			}
3536 			if (status != B_OK)
3537 				return status;
3538 
3539 			totalSize += bytes;
3540 			bytesLeft -= size;
3541 			if (fileOffset >= 0)
3542 				fileOffset += size;
3543 			fileLeft -= size;
3544 			//dprintf("-> file left = %Lu\n", fileLeft);
3545 
3546 			if (size != bytes || vecIndex >= vecCount) {
3547 				// there are no more bytes or iovecs, let's bail out
3548 				*_numBytes = totalSize;
3549 				return B_OK;
3550 			}
3551 		}
3552 	}
3553 
3554 	*_vecIndex = vecIndex;
3555 	*_vecOffset = vecOffset;
3556 	*_numBytes = totalSize;
3557 	return B_OK;
3558 }
3559 
3560 
3561 //	#pragma mark - public API for file systems
3562 
3563 
3564 extern "C" status_t
3565 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3566 	fs_vnode_ops* ops)
3567 {
3568 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3569 		volume, volume->id, vnodeID, privateNode));
3570 
3571 	if (privateNode == NULL)
3572 		return B_BAD_VALUE;
3573 
3574 	// create the node
3575 	bool nodeCreated;
3576 	struct vnode* vnode;
3577 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3578 		nodeCreated);
3579 	if (status != B_OK)
3580 		return status;
3581 
3582 	WriteLocker nodeLocker(sVnodeLock, true);
3583 		// create_new_vnode_and_lock() has locked for us
3584 
3585 	// file system integrity check:
3586 	// test if the vnode already exists and bail out if this is the case!
3587 	if (!nodeCreated) {
3588 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3589 			volume->id, vnodeID, privateNode, vnode->private_node);
3590 		return B_ERROR;
3591 	}
3592 
3593 	vnode->private_node = privateNode;
3594 	vnode->ops = ops;
3595 	vnode->SetUnpublished(true);
3596 
3597 	TRACE(("returns: %s\n", strerror(status)));
3598 
3599 	return status;
3600 }
3601 
3602 
3603 extern "C" status_t
3604 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3605 	fs_vnode_ops* ops, int type, uint32 flags)
3606 {
3607 	FUNCTION(("publish_vnode()\n"));
3608 
3609 	WriteLocker locker(sVnodeLock);
3610 
3611 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3612 
3613 	bool nodeCreated = false;
3614 	if (vnode == NULL) {
3615 		if (privateNode == NULL)
3616 			return B_BAD_VALUE;
3617 
3618 		// create the node
3619 		locker.Unlock();
3620 			// create_new_vnode_and_lock() will re-lock for us on success
3621 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3622 			nodeCreated);
3623 		if (status != B_OK)
3624 			return status;
3625 
3626 		locker.SetTo(sVnodeLock, true);
3627 	}
3628 
3629 	if (nodeCreated) {
3630 		vnode->private_node = privateNode;
3631 		vnode->ops = ops;
3632 		vnode->SetUnpublished(true);
3633 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3634 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3635 		// already known, but not published
3636 	} else
3637 		return B_BAD_VALUE;
3638 
3639 	bool publishSpecialSubNode = false;
3640 
3641 	vnode->SetType(type);
3642 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3643 	publishSpecialSubNode = is_special_node_type(type)
3644 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3645 
3646 	status_t status = B_OK;
3647 
3648 	// create sub vnodes, if necessary
3649 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3650 		locker.Unlock();
3651 
3652 		fs_volume* subVolume = volume;
3653 		if (volume->sub_volume != NULL) {
3654 			while (status == B_OK && subVolume->sub_volume != NULL) {
3655 				subVolume = subVolume->sub_volume;
3656 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3657 					vnode);
3658 			}
3659 		}
3660 
3661 		if (status == B_OK && publishSpecialSubNode)
3662 			status = create_special_sub_node(vnode, flags);
3663 
3664 		if (status != B_OK) {
3665 			// error -- clean up the created sub vnodes
3666 			while (subVolume->super_volume != volume) {
3667 				subVolume = subVolume->super_volume;
3668 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3669 			}
3670 		}
3671 
3672 		if (status == B_OK) {
3673 			ReadLocker vnodesReadLocker(sVnodeLock);
3674 			AutoLocker<Vnode> nodeLocker(vnode);
3675 			vnode->SetBusy(false);
3676 			vnode->SetUnpublished(false);
3677 		} else {
3678 			locker.Lock();
3679 			hash_remove(sVnodeTable, vnode);
3680 			remove_vnode_from_mount_list(vnode, vnode->mount);
3681 			free(vnode);
3682 		}
3683 	} else {
3684 		// we still hold the write lock -- mark the node unbusy and published
3685 		vnode->SetBusy(false);
3686 		vnode->SetUnpublished(false);
3687 	}
3688 
3689 	TRACE(("returns: %s\n", strerror(status)));
3690 
3691 	return status;
3692 }
3693 
3694 
3695 extern "C" status_t
3696 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3697 {
3698 	struct vnode* vnode;
3699 
3700 	if (volume == NULL)
3701 		return B_BAD_VALUE;
3702 
3703 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3704 	if (status != B_OK)
3705 		return status;
3706 
3707 	// If this is a layered FS, we need to get the node cookie for the requested
3708 	// layer.
3709 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3710 		fs_vnode resolvedNode;
3711 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3712 			&resolvedNode);
3713 		if (status != B_OK) {
3714 			panic("get_vnode(): Failed to get super node for vnode %p, "
3715 				"volume: %p", vnode, volume);
3716 			put_vnode(vnode);
3717 			return status;
3718 		}
3719 
3720 		if (_privateNode != NULL)
3721 			*_privateNode = resolvedNode.private_node;
3722 	} else if (_privateNode != NULL)
3723 		*_privateNode = vnode->private_node;
3724 
3725 	return B_OK;
3726 }
3727 
3728 
3729 extern "C" status_t
3730 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3731 {
3732 	struct vnode* vnode;
3733 
3734 	rw_lock_read_lock(&sVnodeLock);
3735 	vnode = lookup_vnode(volume->id, vnodeID);
3736 	rw_lock_read_unlock(&sVnodeLock);
3737 
3738 	if (vnode == NULL)
3739 		return B_BAD_VALUE;
3740 
3741 	inc_vnode_ref_count(vnode);
3742 	return B_OK;
3743 }
3744 
3745 
3746 extern "C" status_t
3747 put_vnode(fs_volume* volume, ino_t vnodeID)
3748 {
3749 	struct vnode* vnode;
3750 
3751 	rw_lock_read_lock(&sVnodeLock);
3752 	vnode = lookup_vnode(volume->id, vnodeID);
3753 	rw_lock_read_unlock(&sVnodeLock);
3754 
3755 	if (vnode == NULL)
3756 		return B_BAD_VALUE;
3757 
3758 	dec_vnode_ref_count(vnode, false, true);
3759 	return B_OK;
3760 }
3761 
3762 
3763 extern "C" status_t
3764 remove_vnode(fs_volume* volume, ino_t vnodeID)
3765 {
3766 	ReadLocker locker(sVnodeLock);
3767 
3768 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3769 	if (vnode == NULL)
3770 		return B_ENTRY_NOT_FOUND;
3771 
3772 	if (vnode->covered_by != NULL) {
3773 		// this vnode is in use
3774 		return B_BUSY;
3775 	}
3776 
3777 	vnode->Lock();
3778 
3779 	vnode->SetRemoved(true);
3780 	bool removeUnpublished = false;
3781 
3782 	if (vnode->IsUnpublished()) {
3783 		// prepare the vnode for deletion
3784 		removeUnpublished = true;
3785 		vnode->SetBusy(true);
3786 	}
3787 
3788 	vnode->Unlock();
3789 	locker.Unlock();
3790 
3791 	if (removeUnpublished) {
3792 		// If the vnode hasn't been published yet, we delete it here
3793 		atomic_add(&vnode->ref_count, -1);
3794 		free_vnode(vnode, true);
3795 	}
3796 
3797 	return B_OK;
3798 }
3799 
3800 
3801 extern "C" status_t
3802 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3803 {
3804 	struct vnode* vnode;
3805 
3806 	rw_lock_read_lock(&sVnodeLock);
3807 
3808 	vnode = lookup_vnode(volume->id, vnodeID);
3809 	if (vnode) {
3810 		AutoLocker<Vnode> nodeLocker(vnode);
3811 		vnode->SetRemoved(false);
3812 	}
3813 
3814 	rw_lock_read_unlock(&sVnodeLock);
3815 	return B_OK;
3816 }
3817 
3818 
3819 extern "C" status_t
3820 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3821 {
3822 	ReadLocker _(sVnodeLock);
3823 
3824 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3825 		if (_removed != NULL)
3826 			*_removed = vnode->IsRemoved();
3827 		return B_OK;
3828 	}
3829 
3830 	return B_BAD_VALUE;
3831 }
3832 
3833 
3834 extern "C" fs_volume*
3835 volume_for_vnode(fs_vnode* _vnode)
3836 {
3837 	if (_vnode == NULL)
3838 		return NULL;
3839 
3840 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3841 	return vnode->mount->volume;
3842 }
3843 
3844 
3845 #if 0
3846 extern "C" status_t
3847 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3848 	size_t* _numBytes)
3849 {
3850 	struct file_descriptor* descriptor;
3851 	struct vnode* vnode;
3852 
3853 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3854 	if (descriptor == NULL)
3855 		return B_FILE_ERROR;
3856 
3857 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3858 		count, 0, _numBytes);
3859 
3860 	put_fd(descriptor);
3861 	return status;
3862 }
3863 
3864 
3865 extern "C" status_t
3866 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3867 	size_t* _numBytes)
3868 {
3869 	struct file_descriptor* descriptor;
3870 	struct vnode* vnode;
3871 
3872 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3873 	if (descriptor == NULL)
3874 		return B_FILE_ERROR;
3875 
3876 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3877 		count, 0, _numBytes);
3878 
3879 	put_fd(descriptor);
3880 	return status;
3881 }
3882 #endif
3883 
3884 
3885 extern "C" status_t
3886 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3887 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3888 	size_t* _bytes)
3889 {
3890 	struct file_descriptor* descriptor;
3891 	struct vnode* vnode;
3892 
3893 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3894 	if (descriptor == NULL)
3895 		return B_FILE_ERROR;
3896 
3897 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3898 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3899 		false);
3900 
3901 	put_fd(descriptor);
3902 	return status;
3903 }
3904 
3905 
3906 extern "C" status_t
3907 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3908 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3909 	size_t* _bytes)
3910 {
3911 	struct file_descriptor* descriptor;
3912 	struct vnode* vnode;
3913 
3914 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3915 	if (descriptor == NULL)
3916 		return B_FILE_ERROR;
3917 
3918 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3919 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3920 		true);
3921 
3922 	put_fd(descriptor);
3923 	return status;
3924 }
3925 
3926 
3927 extern "C" status_t
3928 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3929 {
3930 	// lookup mount -- the caller is required to make sure that the mount
3931 	// won't go away
3932 	MutexLocker locker(sMountMutex);
3933 	struct fs_mount* mount = find_mount(mountID);
3934 	if (mount == NULL)
3935 		return B_BAD_VALUE;
3936 	locker.Unlock();
3937 
3938 	return mount->entry_cache.Add(dirID, name, nodeID);
3939 }
3940 
3941 
3942 extern "C" status_t
3943 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3944 {
3945 	// lookup mount -- the caller is required to make sure that the mount
3946 	// won't go away
3947 	MutexLocker locker(sMountMutex);
3948 	struct fs_mount* mount = find_mount(mountID);
3949 	if (mount == NULL)
3950 		return B_BAD_VALUE;
3951 	locker.Unlock();
3952 
3953 	return mount->entry_cache.Remove(dirID, name);
3954 }
3955 
3956 
3957 //	#pragma mark - private VFS API
3958 //	Functions the VFS exports for other parts of the kernel
3959 
3960 
3961 /*! Acquires another reference to the vnode that has to be released
3962 	by calling vfs_put_vnode().
3963 */
3964 void
3965 vfs_acquire_vnode(struct vnode* vnode)
3966 {
3967 	inc_vnode_ref_count(vnode);
3968 }
3969 
3970 
3971 /*! This is currently called from file_cache_create() only.
3972 	It's probably a temporary solution as long as devfs requires that
3973 	fs_read_pages()/fs_write_pages() are called with the standard
3974 	open cookie and not with a device cookie.
3975 	If that's done differently, remove this call; it has no other
3976 	purpose.
3977 */
3978 extern "C" status_t
3979 vfs_get_cookie_from_fd(int fd, void** _cookie)
3980 {
3981 	struct file_descriptor* descriptor;
3982 
3983 	descriptor = get_fd(get_current_io_context(true), fd);
3984 	if (descriptor == NULL)
3985 		return B_FILE_ERROR;
3986 
3987 	*_cookie = descriptor->cookie;
3988 	return B_OK;
3989 }
3990 
3991 
3992 extern "C" status_t
3993 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
3994 {
3995 	*vnode = get_vnode_from_fd(fd, kernel);
3996 
3997 	if (*vnode == NULL)
3998 		return B_FILE_ERROR;
3999 
4000 	return B_NO_ERROR;
4001 }
4002 
4003 
4004 extern "C" status_t
4005 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4006 {
4007 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4008 		path, kernel));
4009 
4010 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4011 	if (pathBuffer.InitCheck() != B_OK)
4012 		return B_NO_MEMORY;
4013 
4014 	char* buffer = pathBuffer.LockBuffer();
4015 	strlcpy(buffer, path, pathBuffer.BufferSize());
4016 
4017 	struct vnode* vnode;
4018 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4019 	if (status != B_OK)
4020 		return status;
4021 
4022 	*_vnode = vnode;
4023 	return B_OK;
4024 }
4025 
4026 
4027 extern "C" status_t
4028 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4029 {
4030 	struct vnode* vnode;
4031 
4032 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4033 	if (status != B_OK)
4034 		return status;
4035 
4036 	*_vnode = vnode;
4037 	return B_OK;
4038 }
4039 
4040 
4041 extern "C" status_t
4042 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4043 	const char* name, struct vnode** _vnode)
4044 {
4045 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4046 }
4047 
4048 
4049 extern "C" void
4050 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4051 {
4052 	*_mountID = vnode->device;
4053 	*_vnodeID = vnode->id;
4054 }
4055 
4056 
4057 /*!
4058 	Helper function abstracting the process of "converting" a given
4059 	vnode-pointer to a fs_vnode-pointer.
4060 	Currently only used in bindfs.
4061 */
4062 extern "C" fs_vnode*
4063 vfs_fsnode_for_vnode(struct vnode* vnode)
4064 {
4065 	return vnode;
4066 }
4067 
4068 
4069 /*!
4070 	Calls fs_open() on the given vnode and returns a new
4071 	file descriptor for it
4072 */
4073 int
4074 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4075 {
4076 	return open_vnode(vnode, openMode, kernel);
4077 }
4078 
4079 
4080 /*!	Looks up a vnode with the given mount and vnode ID.
4081 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4082 	to the node.
4083 	It's currently only be used by file_cache_create().
4084 */
4085 extern "C" status_t
4086 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4087 {
4088 	rw_lock_read_lock(&sVnodeLock);
4089 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4090 	rw_lock_read_unlock(&sVnodeLock);
4091 
4092 	if (vnode == NULL)
4093 		return B_ERROR;
4094 
4095 	*_vnode = vnode;
4096 	return B_OK;
4097 }
4098 
4099 
4100 extern "C" status_t
4101 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4102 	bool traverseLeafLink, bool kernel, void** _node)
4103 {
4104 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4105 		volume, path, kernel));
4106 
4107 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4108 	if (pathBuffer.InitCheck() != B_OK)
4109 		return B_NO_MEMORY;
4110 
4111 	fs_mount* mount;
4112 	status_t status = get_mount(volume->id, &mount);
4113 	if (status != B_OK)
4114 		return status;
4115 
4116 	char* buffer = pathBuffer.LockBuffer();
4117 	strlcpy(buffer, path, pathBuffer.BufferSize());
4118 
4119 	struct vnode* vnode = mount->root_vnode;
4120 
4121 	if (buffer[0] == '/')
4122 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4123 	else {
4124 		inc_vnode_ref_count(vnode);
4125 			// vnode_path_to_vnode() releases a reference to the starting vnode
4126 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4127 			kernel, &vnode, NULL);
4128 	}
4129 
4130 	put_mount(mount);
4131 
4132 	if (status != B_OK)
4133 		return status;
4134 
4135 	if (vnode->device != volume->id) {
4136 		// wrong mount ID - must not gain access on foreign file system nodes
4137 		put_vnode(vnode);
4138 		return B_BAD_VALUE;
4139 	}
4140 
4141 	// Use get_vnode() to resolve the cookie for the right layer.
4142 	status = get_vnode(volume, vnode->id, _node);
4143 	put_vnode(vnode);
4144 
4145 	return status;
4146 }
4147 
4148 
4149 status_t
4150 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4151 	struct stat* stat, bool kernel)
4152 {
4153 	status_t status;
4154 
4155 	if (path) {
4156 		// path given: get the stat of the node referred to by (fd, path)
4157 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4158 		if (pathBuffer.InitCheck() != B_OK)
4159 			return B_NO_MEMORY;
4160 
4161 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4162 			traverseLeafLink, stat, kernel);
4163 	} else {
4164 		// no path given: get the FD and use the FD operation
4165 		struct file_descriptor* descriptor
4166 			= get_fd(get_current_io_context(kernel), fd);
4167 		if (descriptor == NULL)
4168 			return B_FILE_ERROR;
4169 
4170 		if (descriptor->ops->fd_read_stat)
4171 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4172 		else
4173 			status = B_UNSUPPORTED;
4174 
4175 		put_fd(descriptor);
4176 	}
4177 
4178 	return status;
4179 }
4180 
4181 
4182 /*!	Finds the full path to the file that contains the module \a moduleName,
4183 	puts it into \a pathBuffer, and returns B_OK for success.
4184 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4185 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4186 	\a pathBuffer is clobbered in any case and must not be relied on if this
4187 	functions returns unsuccessfully.
4188 	\a basePath and \a pathBuffer must not point to the same space.
4189 */
4190 status_t
4191 vfs_get_module_path(const char* basePath, const char* moduleName,
4192 	char* pathBuffer, size_t bufferSize)
4193 {
4194 	struct vnode* dir;
4195 	struct vnode* file;
4196 	status_t status;
4197 	size_t length;
4198 	char* path;
4199 
4200 	if (bufferSize == 0
4201 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4202 		return B_BUFFER_OVERFLOW;
4203 
4204 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4205 	if (status != B_OK)
4206 		return status;
4207 
4208 	// the path buffer had been clobbered by the above call
4209 	length = strlcpy(pathBuffer, basePath, bufferSize);
4210 	if (pathBuffer[length - 1] != '/')
4211 		pathBuffer[length++] = '/';
4212 
4213 	path = pathBuffer + length;
4214 	bufferSize -= length;
4215 
4216 	while (moduleName) {
4217 		char* nextPath = strchr(moduleName, '/');
4218 		if (nextPath == NULL)
4219 			length = strlen(moduleName);
4220 		else {
4221 			length = nextPath - moduleName;
4222 			nextPath++;
4223 		}
4224 
4225 		if (length + 1 >= bufferSize) {
4226 			status = B_BUFFER_OVERFLOW;
4227 			goto err;
4228 		}
4229 
4230 		memcpy(path, moduleName, length);
4231 		path[length] = '\0';
4232 		moduleName = nextPath;
4233 
4234 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4235 		if (status != B_OK) {
4236 			// vnode_path_to_vnode() has already released the reference to dir
4237 			return status;
4238 		}
4239 
4240 		if (S_ISDIR(file->Type())) {
4241 			// goto the next directory
4242 			path[length] = '/';
4243 			path[length + 1] = '\0';
4244 			path += length + 1;
4245 			bufferSize -= length + 1;
4246 
4247 			dir = file;
4248 		} else if (S_ISREG(file->Type())) {
4249 			// it's a file so it should be what we've searched for
4250 			put_vnode(file);
4251 
4252 			return B_OK;
4253 		} else {
4254 			TRACE(("vfs_get_module_path(): something is strange here: "
4255 				"0x%08lx...\n", file->Type()));
4256 			status = B_ERROR;
4257 			dir = file;
4258 			goto err;
4259 		}
4260 	}
4261 
4262 	// if we got here, the moduleName just pointed to a directory, not to
4263 	// a real module - what should we do in this case?
4264 	status = B_ENTRY_NOT_FOUND;
4265 
4266 err:
4267 	put_vnode(dir);
4268 	return status;
4269 }
4270 
4271 
4272 /*!	\brief Normalizes a given path.
4273 
4274 	The path must refer to an existing or non-existing entry in an existing
4275 	directory, that is chopping off the leaf component the remaining path must
4276 	refer to an existing directory.
4277 
4278 	The returned will be canonical in that it will be absolute, will not
4279 	contain any "." or ".." components or duplicate occurrences of '/'s,
4280 	and none of the directory components will by symbolic links.
4281 
4282 	Any two paths referring to the same entry, will result in the same
4283 	normalized path (well, that is pretty much the definition of `normalized',
4284 	isn't it :-).
4285 
4286 	\param path The path to be normalized.
4287 	\param buffer The buffer into which the normalized path will be written.
4288 		   May be the same one as \a path.
4289 	\param bufferSize The size of \a buffer.
4290 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4291 	\param kernel \c true, if the IO context of the kernel shall be used,
4292 		   otherwise that of the team this thread belongs to. Only relevant,
4293 		   if the path is relative (to get the CWD).
4294 	\return \c B_OK if everything went fine, another error code otherwise.
4295 */
4296 status_t
4297 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4298 	bool traverseLink, bool kernel)
4299 {
4300 	if (!path || !buffer || bufferSize < 1)
4301 		return B_BAD_VALUE;
4302 
4303 	if (path != buffer) {
4304 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4305 			return B_BUFFER_OVERFLOW;
4306 	}
4307 
4308 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4309 }
4310 
4311 
4312 /*!	\brief Creates a special node in the file system.
4313 
4314 	The caller gets a reference to the newly created node (which is passed
4315 	back through \a _createdVnode) and is responsible for releasing it.
4316 
4317 	\param path The path where to create the entry for the node. Can be \c NULL,
4318 		in which case the node is created without an entry in the root FS -- it
4319 		will automatically be deleted when the last reference has been released.
4320 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4321 		the target file system will just create the node with its standard
4322 		operations. Depending on the type of the node a subnode might be created
4323 		automatically, though.
4324 	\param mode The type and permissions for the node to be created.
4325 	\param flags Flags to be passed to the creating FS.
4326 	\param kernel \c true, if called in the kernel context (relevant only if
4327 		\a path is not \c NULL and not absolute).
4328 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4329 		file system creating the node, with the private data pointer and
4330 		operations for the super node. Can be \c NULL.
4331 	\param _createVnode Pointer to pre-allocated storage where to store the
4332 		pointer to the newly created node.
4333 	\return \c B_OK, if everything went fine, another error code otherwise.
4334 */
4335 status_t
4336 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4337 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4338 	struct vnode** _createdVnode)
4339 {
4340 	struct vnode* dirNode;
4341 	char _leaf[B_FILE_NAME_LENGTH];
4342 	char* leaf = NULL;
4343 
4344 	if (path) {
4345 		// We've got a path. Get the dir vnode and the leaf name.
4346 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4347 		if (tmpPathBuffer.InitCheck() != B_OK)
4348 			return B_NO_MEMORY;
4349 
4350 		char* tmpPath = tmpPathBuffer.LockBuffer();
4351 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4352 			return B_NAME_TOO_LONG;
4353 
4354 		// get the dir vnode and the leaf name
4355 		leaf = _leaf;
4356 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4357 		if (error != B_OK)
4358 			return error;
4359 	} else {
4360 		// No path. Create the node in the root FS.
4361 		dirNode = sRoot;
4362 		inc_vnode_ref_count(dirNode);
4363 	}
4364 
4365 	VNodePutter _(dirNode);
4366 
4367 	// check support for creating special nodes
4368 	if (!HAS_FS_CALL(dirNode, create_special_node))
4369 		return B_UNSUPPORTED;
4370 
4371 	// create the node
4372 	fs_vnode superVnode;
4373 	ino_t nodeID;
4374 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4375 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4376 	if (status != B_OK)
4377 		return status;
4378 
4379 	// lookup the node
4380 	rw_lock_read_lock(&sVnodeLock);
4381 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4382 	rw_lock_read_unlock(&sVnodeLock);
4383 
4384 	if (*_createdVnode == NULL) {
4385 		panic("vfs_create_special_node(): lookup of node failed");
4386 		return B_ERROR;
4387 	}
4388 
4389 	return B_OK;
4390 }
4391 
4392 
4393 extern "C" void
4394 vfs_put_vnode(struct vnode* vnode)
4395 {
4396 	put_vnode(vnode);
4397 }
4398 
4399 
4400 extern "C" status_t
4401 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4402 {
4403 	// Get current working directory from io context
4404 	struct io_context* context = get_current_io_context(false);
4405 	status_t status = B_OK;
4406 
4407 	mutex_lock(&context->io_mutex);
4408 
4409 	if (context->cwd != NULL) {
4410 		*_mountID = context->cwd->device;
4411 		*_vnodeID = context->cwd->id;
4412 	} else
4413 		status = B_ERROR;
4414 
4415 	mutex_unlock(&context->io_mutex);
4416 	return status;
4417 }
4418 
4419 
4420 status_t
4421 vfs_unmount(dev_t mountID, uint32 flags)
4422 {
4423 	return fs_unmount(NULL, mountID, flags, true);
4424 }
4425 
4426 
4427 extern "C" status_t
4428 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4429 {
4430 	struct vnode* vnode;
4431 
4432 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4433 	if (status != B_OK)
4434 		return status;
4435 
4436 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4437 	put_vnode(vnode);
4438 	return B_OK;
4439 }
4440 
4441 
4442 extern "C" void
4443 vfs_free_unused_vnodes(int32 level)
4444 {
4445 	vnode_low_resource_handler(NULL,
4446 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4447 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4448 		level);
4449 }
4450 
4451 
4452 extern "C" bool
4453 vfs_can_page(struct vnode* vnode, void* cookie)
4454 {
4455 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4456 
4457 	if (HAS_FS_CALL(vnode, can_page))
4458 		return FS_CALL(vnode, can_page, cookie);
4459 	return false;
4460 }
4461 
4462 
4463 extern "C" status_t
4464 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4465 	const generic_io_vec* vecs, size_t count, uint32 flags,
4466 	generic_size_t* _numBytes)
4467 {
4468 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4469 		pos));
4470 
4471 #if VFS_PAGES_IO_TRACING
4472 	generic_size_t bytesRequested = *_numBytes;
4473 #endif
4474 
4475 	IORequest request;
4476 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4477 	if (status == B_OK) {
4478 		status = vfs_vnode_io(vnode, cookie, &request);
4479 		if (status == B_OK)
4480 			status = request.Wait();
4481 		*_numBytes = request.TransferredBytes();
4482 	}
4483 
4484 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4485 		status, *_numBytes));
4486 
4487 	return status;
4488 }
4489 
4490 
4491 extern "C" status_t
4492 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4493 	const generic_io_vec* vecs, size_t count, uint32 flags,
4494 	generic_size_t* _numBytes)
4495 {
4496 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4497 		pos));
4498 
4499 #if VFS_PAGES_IO_TRACING
4500 	generic_size_t bytesRequested = *_numBytes;
4501 #endif
4502 
4503 	IORequest request;
4504 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4505 	if (status == B_OK) {
4506 		status = vfs_vnode_io(vnode, cookie, &request);
4507 		if (status == B_OK)
4508 			status = request.Wait();
4509 		*_numBytes = request.TransferredBytes();
4510 	}
4511 
4512 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4513 		status, *_numBytes));
4514 
4515 	return status;
4516 }
4517 
4518 
4519 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4520 	created if \a allocate is \c true.
4521 	In case it's successful, it will also grab a reference to the cache
4522 	it returns.
4523 */
4524 extern "C" status_t
4525 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4526 {
4527 	if (vnode->cache != NULL) {
4528 		vnode->cache->AcquireRef();
4529 		*_cache = vnode->cache;
4530 		return B_OK;
4531 	}
4532 
4533 	rw_lock_read_lock(&sVnodeLock);
4534 	vnode->Lock();
4535 
4536 	status_t status = B_OK;
4537 
4538 	// The cache could have been created in the meantime
4539 	if (vnode->cache == NULL) {
4540 		if (allocate) {
4541 			// TODO: actually the vnode needs to be busy already here, or
4542 			//	else this won't work...
4543 			bool wasBusy = vnode->IsBusy();
4544 			vnode->SetBusy(true);
4545 
4546 			vnode->Unlock();
4547 			rw_lock_read_unlock(&sVnodeLock);
4548 
4549 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4550 
4551 			rw_lock_read_lock(&sVnodeLock);
4552 			vnode->Lock();
4553 			vnode->SetBusy(wasBusy);
4554 		} else
4555 			status = B_BAD_VALUE;
4556 	}
4557 
4558 	vnode->Unlock();
4559 	rw_lock_read_unlock(&sVnodeLock);
4560 
4561 	if (status == B_OK) {
4562 		vnode->cache->AcquireRef();
4563 		*_cache = vnode->cache;
4564 	}
4565 
4566 	return status;
4567 }
4568 
4569 
4570 status_t
4571 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4572 	file_io_vec* vecs, size_t* _count)
4573 {
4574 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4575 		vnode, vecs, offset, size));
4576 
4577 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4578 }
4579 
4580 
4581 status_t
4582 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4583 {
4584 	status_t status = FS_CALL(vnode, read_stat, stat);
4585 
4586 	// fill in the st_dev and st_ino fields
4587 	if (status == B_OK) {
4588 		stat->st_dev = vnode->device;
4589 		stat->st_ino = vnode->id;
4590 		stat->st_rdev = -1;
4591 	}
4592 
4593 	return status;
4594 }
4595 
4596 
4597 status_t
4598 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4599 {
4600 	struct vnode* vnode;
4601 	status_t status = get_vnode(device, inode, &vnode, true, false);
4602 	if (status != B_OK)
4603 		return status;
4604 
4605 	status = FS_CALL(vnode, read_stat, stat);
4606 
4607 	// fill in the st_dev and st_ino fields
4608 	if (status == B_OK) {
4609 		stat->st_dev = vnode->device;
4610 		stat->st_ino = vnode->id;
4611 		stat->st_rdev = -1;
4612 	}
4613 
4614 	put_vnode(vnode);
4615 	return status;
4616 }
4617 
4618 
4619 status_t
4620 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4621 {
4622 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4623 }
4624 
4625 
4626 status_t
4627 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4628 	char* path, size_t pathLength)
4629 {
4630 	struct vnode* vnode;
4631 	status_t status;
4632 
4633 	// filter invalid leaf names
4634 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4635 		return B_BAD_VALUE;
4636 
4637 	// get the vnode matching the dir's node_ref
4638 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4639 		// special cases "." and "..": we can directly get the vnode of the
4640 		// referenced directory
4641 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4642 		leaf = NULL;
4643 	} else
4644 		status = get_vnode(device, inode, &vnode, true, false);
4645 	if (status != B_OK)
4646 		return status;
4647 
4648 	// get the directory path
4649 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4650 	put_vnode(vnode);
4651 		// we don't need the vnode anymore
4652 	if (status != B_OK)
4653 		return status;
4654 
4655 	// append the leaf name
4656 	if (leaf) {
4657 		// insert a directory separator if this is not the file system root
4658 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4659 				>= pathLength)
4660 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4661 			return B_NAME_TOO_LONG;
4662 		}
4663 	}
4664 
4665 	return B_OK;
4666 }
4667 
4668 
4669 /*!	If the given descriptor locked its vnode, that lock will be released. */
4670 void
4671 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4672 {
4673 	struct vnode* vnode = fd_vnode(descriptor);
4674 
4675 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4676 		vnode->mandatory_locked_by = NULL;
4677 }
4678 
4679 
4680 /*!	Closes all file descriptors of the specified I/O context that
4681 	have the O_CLOEXEC flag set.
4682 */
4683 void
4684 vfs_exec_io_context(io_context* context)
4685 {
4686 	uint32 i;
4687 
4688 	for (i = 0; i < context->table_size; i++) {
4689 		mutex_lock(&context->io_mutex);
4690 
4691 		struct file_descriptor* descriptor = context->fds[i];
4692 		bool remove = false;
4693 
4694 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4695 			context->fds[i] = NULL;
4696 			context->num_used_fds--;
4697 
4698 			remove = true;
4699 		}
4700 
4701 		mutex_unlock(&context->io_mutex);
4702 
4703 		if (remove) {
4704 			close_fd(descriptor);
4705 			put_fd(descriptor);
4706 		}
4707 	}
4708 }
4709 
4710 
4711 /*! Sets up a new io_control structure, and inherits the properties
4712 	of the parent io_control if it is given.
4713 */
4714 io_context*
4715 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4716 {
4717 	io_context* context = (io_context*)malloc(sizeof(io_context));
4718 	if (context == NULL)
4719 		return NULL;
4720 
4721 	TIOC(NewIOContext(context, parentContext));
4722 
4723 	memset(context, 0, sizeof(io_context));
4724 	context->ref_count = 1;
4725 
4726 	MutexLocker parentLocker;
4727 
4728 	size_t tableSize;
4729 	if (parentContext) {
4730 		parentLocker.SetTo(parentContext->io_mutex, false);
4731 		tableSize = parentContext->table_size;
4732 	} else
4733 		tableSize = DEFAULT_FD_TABLE_SIZE;
4734 
4735 	// allocate space for FDs and their close-on-exec flag
4736 	context->fds = (file_descriptor**)malloc(
4737 		sizeof(struct file_descriptor*) * tableSize
4738 		+ sizeof(struct select_sync*) * tableSize
4739 		+ (tableSize + 7) / 8);
4740 	if (context->fds == NULL) {
4741 		free(context);
4742 		return NULL;
4743 	}
4744 
4745 	context->select_infos = (select_info**)(context->fds + tableSize);
4746 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4747 
4748 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4749 		+ sizeof(struct select_sync*) * tableSize
4750 		+ (tableSize + 7) / 8);
4751 
4752 	mutex_init(&context->io_mutex, "I/O context");
4753 
4754 	// Copy all parent file descriptors
4755 
4756 	if (parentContext) {
4757 		size_t i;
4758 
4759 		mutex_lock(&sIOContextRootLock);
4760 		context->root = parentContext->root;
4761 		if (context->root)
4762 			inc_vnode_ref_count(context->root);
4763 		mutex_unlock(&sIOContextRootLock);
4764 
4765 		context->cwd = parentContext->cwd;
4766 		if (context->cwd)
4767 			inc_vnode_ref_count(context->cwd);
4768 
4769 		for (i = 0; i < tableSize; i++) {
4770 			struct file_descriptor* descriptor = parentContext->fds[i];
4771 
4772 			if (descriptor != NULL) {
4773 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4774 				if (closeOnExec && purgeCloseOnExec)
4775 					continue;
4776 
4777 				TFD(InheritFD(context, i, descriptor, parentContext));
4778 
4779 				context->fds[i] = descriptor;
4780 				context->num_used_fds++;
4781 				atomic_add(&descriptor->ref_count, 1);
4782 				atomic_add(&descriptor->open_count, 1);
4783 
4784 				if (closeOnExec)
4785 					fd_set_close_on_exec(context, i, true);
4786 			}
4787 		}
4788 
4789 		parentLocker.Unlock();
4790 	} else {
4791 		context->root = sRoot;
4792 		context->cwd = sRoot;
4793 
4794 		if (context->root)
4795 			inc_vnode_ref_count(context->root);
4796 
4797 		if (context->cwd)
4798 			inc_vnode_ref_count(context->cwd);
4799 	}
4800 
4801 	context->table_size = tableSize;
4802 
4803 	list_init(&context->node_monitors);
4804 	context->max_monitors = DEFAULT_NODE_MONITORS;
4805 
4806 	return context;
4807 }
4808 
4809 
4810 static status_t
4811 vfs_free_io_context(io_context* context)
4812 {
4813 	uint32 i;
4814 
4815 	TIOC(FreeIOContext(context));
4816 
4817 	if (context->root)
4818 		put_vnode(context->root);
4819 
4820 	if (context->cwd)
4821 		put_vnode(context->cwd);
4822 
4823 	mutex_lock(&context->io_mutex);
4824 
4825 	for (i = 0; i < context->table_size; i++) {
4826 		if (struct file_descriptor* descriptor = context->fds[i]) {
4827 			close_fd(descriptor);
4828 			put_fd(descriptor);
4829 		}
4830 	}
4831 
4832 	mutex_destroy(&context->io_mutex);
4833 
4834 	remove_node_monitors(context);
4835 	free(context->fds);
4836 	free(context);
4837 
4838 	return B_OK;
4839 }
4840 
4841 
4842 void
4843 vfs_get_io_context(io_context* context)
4844 {
4845 	atomic_add(&context->ref_count, 1);
4846 }
4847 
4848 
4849 void
4850 vfs_put_io_context(io_context* context)
4851 {
4852 	if (atomic_add(&context->ref_count, -1) == 1)
4853 		vfs_free_io_context(context);
4854 }
4855 
4856 
4857 static status_t
4858 vfs_resize_fd_table(struct io_context* context, const int newSize)
4859 {
4860 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4861 		return B_BAD_VALUE;
4862 
4863 	TIOC(ResizeIOContext(context, newSize));
4864 
4865 	MutexLocker _(context->io_mutex);
4866 
4867 	int oldSize = context->table_size;
4868 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4869 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4870 
4871 	// If the tables shrink, make sure none of the fds being dropped are in use.
4872 	if (newSize < oldSize) {
4873 		for (int i = oldSize; i-- > newSize;) {
4874 			if (context->fds[i])
4875 				return B_BUSY;
4876 		}
4877 	}
4878 
4879 	// store pointers to the old tables
4880 	file_descriptor** oldFDs = context->fds;
4881 	select_info** oldSelectInfos = context->select_infos;
4882 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4883 
4884 	// allocate new tables
4885 	file_descriptor** newFDs = (file_descriptor**)malloc(
4886 		sizeof(struct file_descriptor*) * newSize
4887 		+ sizeof(struct select_sync*) * newSize
4888 		+ newCloseOnExitBitmapSize);
4889 	if (newFDs == NULL)
4890 		return B_NO_MEMORY;
4891 
4892 	context->fds = newFDs;
4893 	context->select_infos = (select_info**)(context->fds + newSize);
4894 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4895 	context->table_size = newSize;
4896 
4897 	// copy entries from old tables
4898 	int toCopy = min_c(oldSize, newSize);
4899 
4900 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4901 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4902 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4903 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4904 
4905 	// clear additional entries, if the tables grow
4906 	if (newSize > oldSize) {
4907 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4908 		memset(context->select_infos + oldSize, 0,
4909 			sizeof(void*) * (newSize - oldSize));
4910 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4911 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4912 	}
4913 
4914 	free(oldFDs);
4915 
4916 	return B_OK;
4917 }
4918 
4919 
4920 static status_t
4921 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4922 {
4923 	int	status = B_OK;
4924 
4925 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4926 		return B_BAD_VALUE;
4927 
4928 	mutex_lock(&context->io_mutex);
4929 
4930 	if ((size_t)newSize < context->num_monitors) {
4931 		status = B_BUSY;
4932 		goto out;
4933 	}
4934 	context->max_monitors = newSize;
4935 
4936 out:
4937 	mutex_unlock(&context->io_mutex);
4938 	return status;
4939 }
4940 
4941 
4942 int
4943 vfs_getrlimit(int resource, struct rlimit* rlp)
4944 {
4945 	if (!rlp)
4946 		return B_BAD_ADDRESS;
4947 
4948 	switch (resource) {
4949 		case RLIMIT_NOFILE:
4950 		{
4951 			struct io_context* context = get_current_io_context(false);
4952 			MutexLocker _(context->io_mutex);
4953 
4954 			rlp->rlim_cur = context->table_size;
4955 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4956 			return 0;
4957 		}
4958 
4959 		case RLIMIT_NOVMON:
4960 		{
4961 			struct io_context* context = get_current_io_context(false);
4962 			MutexLocker _(context->io_mutex);
4963 
4964 			rlp->rlim_cur = context->max_monitors;
4965 			rlp->rlim_max = MAX_NODE_MONITORS;
4966 			return 0;
4967 		}
4968 
4969 		default:
4970 			return B_BAD_VALUE;
4971 	}
4972 }
4973 
4974 
4975 int
4976 vfs_setrlimit(int resource, const struct rlimit* rlp)
4977 {
4978 	if (!rlp)
4979 		return B_BAD_ADDRESS;
4980 
4981 	switch (resource) {
4982 		case RLIMIT_NOFILE:
4983 			/* TODO: check getuid() */
4984 			if (rlp->rlim_max != RLIM_SAVED_MAX
4985 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
4986 				return B_NOT_ALLOWED;
4987 
4988 			return vfs_resize_fd_table(get_current_io_context(false),
4989 				rlp->rlim_cur);
4990 
4991 		case RLIMIT_NOVMON:
4992 			/* TODO: check getuid() */
4993 			if (rlp->rlim_max != RLIM_SAVED_MAX
4994 				&& rlp->rlim_max != MAX_NODE_MONITORS)
4995 				return B_NOT_ALLOWED;
4996 
4997 			return vfs_resize_monitor_table(get_current_io_context(false),
4998 				rlp->rlim_cur);
4999 
5000 		default:
5001 			return B_BAD_VALUE;
5002 	}
5003 }
5004 
5005 
5006 status_t
5007 vfs_init(kernel_args* args)
5008 {
5009 	vnode::StaticInit();
5010 
5011 	struct vnode dummyVnode;
5012 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5013 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5014 	if (sVnodeTable == NULL)
5015 		panic("vfs_init: error creating vnode hash table\n");
5016 
5017 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5018 
5019 	struct fs_mount dummyMount;
5020 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5021 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5022 	if (sMountsTable == NULL)
5023 		panic("vfs_init: error creating mounts hash table\n");
5024 
5025 	node_monitor_init();
5026 
5027 	sRoot = NULL;
5028 
5029 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5030 
5031 	if (block_cache_init() != B_OK)
5032 		return B_ERROR;
5033 
5034 #ifdef ADD_DEBUGGER_COMMANDS
5035 	// add some debugger commands
5036 	add_debugger_command_etc("vnode", &dump_vnode,
5037 		"Print info about the specified vnode",
5038 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5039 		"Prints information about the vnode specified by address <vnode> or\n"
5040 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5041 		"constructed and printed. It might not be possible to construct a\n"
5042 		"complete path, though.\n",
5043 		0);
5044 	add_debugger_command("vnodes", &dump_vnodes,
5045 		"list all vnodes (from the specified device)");
5046 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5047 		"list all vnode caches");
5048 	add_debugger_command("mount", &dump_mount,
5049 		"info about the specified fs_mount");
5050 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5051 	add_debugger_command("io_context", &dump_io_context,
5052 		"info about the I/O context");
5053 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5054 		"info about vnode usage");
5055 #endif
5056 
5057 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5058 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5059 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5060 		0);
5061 
5062 	file_map_init();
5063 
5064 	return file_cache_init();
5065 }
5066 
5067 
5068 //	#pragma mark - fd_ops implementations
5069 
5070 
5071 /*!
5072 	Calls fs_open() on the given vnode and returns a new
5073 	file descriptor for it
5074 */
5075 static int
5076 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5077 {
5078 	void* cookie;
5079 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5080 	if (status != B_OK)
5081 		return status;
5082 
5083 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5084 	if (fd < 0) {
5085 		FS_CALL(vnode, close, cookie);
5086 		FS_CALL(vnode, free_cookie, cookie);
5087 	}
5088 	return fd;
5089 }
5090 
5091 
5092 /*!
5093 	Calls fs_open() on the given vnode and returns a new
5094 	file descriptor for it
5095 */
5096 static int
5097 create_vnode(struct vnode* directory, const char* name, int openMode,
5098 	int perms, bool kernel)
5099 {
5100 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5101 	status_t status = B_ERROR;
5102 	struct vnode* vnode;
5103 	void* cookie;
5104 	ino_t newID;
5105 
5106 	// This is somewhat tricky: If the entry already exists, the FS responsible
5107 	// for the directory might not necessarily also be the one responsible for
5108 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5109 	// we can actually never call the create() hook without O_EXCL. Instead we
5110 	// try to look the entry up first. If it already exists, we just open the
5111 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5112 	// introduces a race condition, since someone else might have created the
5113 	// entry in the meantime. We hope the respective FS returns the correct
5114 	// error code and retry (up to 3 times) again.
5115 
5116 	for (int i = 0; i < 3 && status != B_OK; i++) {
5117 		// look the node up
5118 		status = lookup_dir_entry(directory, name, &vnode);
5119 		if (status == B_OK) {
5120 			VNodePutter putter(vnode);
5121 
5122 			if ((openMode & O_EXCL) != 0)
5123 				return B_FILE_EXISTS;
5124 
5125 			// If the node is a symlink, we have to follow it, unless
5126 			// O_NOTRAVERSE is set.
5127 			if (S_ISLNK(vnode->Type()) && traverse) {
5128 				putter.Put();
5129 				char clonedName[B_FILE_NAME_LENGTH + 1];
5130 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5131 						>= B_FILE_NAME_LENGTH) {
5132 					return B_NAME_TOO_LONG;
5133 				}
5134 
5135 				inc_vnode_ref_count(directory);
5136 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5137 					kernel, &vnode, NULL);
5138 				if (status != B_OK)
5139 					return status;
5140 
5141 				putter.SetTo(vnode);
5142 			}
5143 
5144 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5145 				put_vnode(vnode);
5146 				return B_LINK_LIMIT;
5147 			}
5148 
5149 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5150 			// on success keep the vnode reference for the FD
5151 			if (fd >= 0)
5152 				putter.Detach();
5153 
5154 			return fd;
5155 		}
5156 
5157 		// it doesn't exist yet -- try to create it
5158 
5159 		if (!HAS_FS_CALL(directory, create))
5160 			return B_READ_ONLY_DEVICE;
5161 
5162 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5163 			&cookie, &newID);
5164 		if (status != B_OK
5165 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5166 			return status;
5167 		}
5168 	}
5169 
5170 	if (status != B_OK)
5171 		return status;
5172 
5173 	// the node has been created successfully
5174 
5175 	rw_lock_read_lock(&sVnodeLock);
5176 	vnode = lookup_vnode(directory->device, newID);
5177 	rw_lock_read_unlock(&sVnodeLock);
5178 
5179 	if (vnode == NULL) {
5180 		panic("vfs: fs_create() returned success but there is no vnode, "
5181 			"mount ID %ld!\n", directory->device);
5182 		return B_BAD_VALUE;
5183 	}
5184 
5185 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5186 	if (fd >= 0)
5187 		return fd;
5188 
5189 	status = fd;
5190 
5191 	// something went wrong, clean up
5192 
5193 	FS_CALL(vnode, close, cookie);
5194 	FS_CALL(vnode, free_cookie, cookie);
5195 	put_vnode(vnode);
5196 
5197 	FS_CALL(directory, unlink, name);
5198 
5199 	return status;
5200 }
5201 
5202 
5203 /*! Calls fs open_dir() on the given vnode and returns a new
5204 	file descriptor for it
5205 */
5206 static int
5207 open_dir_vnode(struct vnode* vnode, bool kernel)
5208 {
5209 	void* cookie;
5210 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5211 	if (status != B_OK)
5212 		return status;
5213 
5214 	// directory is opened, create a fd
5215 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5216 	if (status >= 0)
5217 		return status;
5218 
5219 	FS_CALL(vnode, close_dir, cookie);
5220 	FS_CALL(vnode, free_dir_cookie, cookie);
5221 
5222 	return status;
5223 }
5224 
5225 
5226 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5227 	file descriptor for it.
5228 	Used by attr_dir_open(), and attr_dir_open_fd().
5229 */
5230 static int
5231 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5232 {
5233 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5234 		return B_UNSUPPORTED;
5235 
5236 	void* cookie;
5237 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5238 	if (status != B_OK)
5239 		return status;
5240 
5241 	// directory is opened, create a fd
5242 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5243 		kernel);
5244 	if (status >= 0)
5245 		return status;
5246 
5247 	FS_CALL(vnode, close_attr_dir, cookie);
5248 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5249 
5250 	return status;
5251 }
5252 
5253 
5254 static int
5255 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5256 	int openMode, int perms, bool kernel)
5257 {
5258 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5259 		"kernel %d\n", name, openMode, perms, kernel));
5260 
5261 	// get directory to put the new file in
5262 	struct vnode* directory;
5263 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5264 	if (status != B_OK)
5265 		return status;
5266 
5267 	status = create_vnode(directory, name, openMode, perms, kernel);
5268 	put_vnode(directory);
5269 
5270 	return status;
5271 }
5272 
5273 
5274 static int
5275 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5276 {
5277 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5278 		openMode, perms, kernel));
5279 
5280 	// get directory to put the new file in
5281 	char name[B_FILE_NAME_LENGTH];
5282 	struct vnode* directory;
5283 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5284 		kernel);
5285 	if (status < 0)
5286 		return status;
5287 
5288 	status = create_vnode(directory, name, openMode, perms, kernel);
5289 
5290 	put_vnode(directory);
5291 	return status;
5292 }
5293 
5294 
5295 static int
5296 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5297 	int openMode, bool kernel)
5298 {
5299 	if (name == NULL || *name == '\0')
5300 		return B_BAD_VALUE;
5301 
5302 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5303 		mountID, directoryID, name, openMode));
5304 
5305 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5306 
5307 	// get the vnode matching the entry_ref
5308 	struct vnode* vnode;
5309 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5310 		kernel, &vnode);
5311 	if (status != B_OK)
5312 		return status;
5313 
5314 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5315 		put_vnode(vnode);
5316 		return B_LINK_LIMIT;
5317 	}
5318 
5319 	int newFD = open_vnode(vnode, openMode, kernel);
5320 	if (newFD >= 0) {
5321 		// The vnode reference has been transferred to the FD
5322 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5323 			directoryID, vnode->id, name);
5324 	} else
5325 		put_vnode(vnode);
5326 
5327 	return newFD;
5328 }
5329 
5330 
5331 static int
5332 file_open(int fd, char* path, int openMode, bool kernel)
5333 {
5334 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5335 
5336 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5337 		fd, path, openMode, kernel));
5338 
5339 	// get the vnode matching the vnode + path combination
5340 	struct vnode* vnode;
5341 	ino_t parentID;
5342 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5343 		&parentID, kernel);
5344 	if (status != B_OK)
5345 		return status;
5346 
5347 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5348 		put_vnode(vnode);
5349 		return B_LINK_LIMIT;
5350 	}
5351 
5352 	// open the vnode
5353 	int newFD = open_vnode(vnode, openMode, kernel);
5354 	if (newFD >= 0) {
5355 		// The vnode reference has been transferred to the FD
5356 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5357 			vnode->device, parentID, vnode->id, NULL);
5358 	} else
5359 		put_vnode(vnode);
5360 
5361 	return newFD;
5362 }
5363 
5364 
5365 static status_t
5366 file_close(struct file_descriptor* descriptor)
5367 {
5368 	struct vnode* vnode = descriptor->u.vnode;
5369 	status_t status = B_OK;
5370 
5371 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5372 
5373 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5374 		vnode->id);
5375 	if (HAS_FS_CALL(vnode, close)) {
5376 		status = FS_CALL(vnode, close, descriptor->cookie);
5377 	}
5378 
5379 	if (status == B_OK) {
5380 		// remove all outstanding locks for this team
5381 		release_advisory_lock(vnode, NULL);
5382 	}
5383 	return status;
5384 }
5385 
5386 
5387 static void
5388 file_free_fd(struct file_descriptor* descriptor)
5389 {
5390 	struct vnode* vnode = descriptor->u.vnode;
5391 
5392 	if (vnode != NULL) {
5393 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5394 		put_vnode(vnode);
5395 	}
5396 }
5397 
5398 
5399 static status_t
5400 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5401 	size_t* length)
5402 {
5403 	struct vnode* vnode = descriptor->u.vnode;
5404 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5405 		*length));
5406 
5407 	if (S_ISDIR(vnode->Type()))
5408 		return B_IS_A_DIRECTORY;
5409 
5410 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5411 }
5412 
5413 
5414 static status_t
5415 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5416 	size_t* length)
5417 {
5418 	struct vnode* vnode = descriptor->u.vnode;
5419 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5420 
5421 	if (S_ISDIR(vnode->Type()))
5422 		return B_IS_A_DIRECTORY;
5423 	if (!HAS_FS_CALL(vnode, write))
5424 		return B_READ_ONLY_DEVICE;
5425 
5426 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5427 }
5428 
5429 
5430 static off_t
5431 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5432 {
5433 	struct vnode* vnode = descriptor->u.vnode;
5434 	off_t offset;
5435 
5436 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5437 
5438 	// some kinds of files are not seekable
5439 	switch (vnode->Type() & S_IFMT) {
5440 		case S_IFIFO:
5441 		case S_IFSOCK:
5442 			return ESPIPE;
5443 
5444 		// The Open Group Base Specs don't mention any file types besides pipes,
5445 		// fifos, and sockets specially, so we allow seeking them.
5446 		case S_IFREG:
5447 		case S_IFBLK:
5448 		case S_IFDIR:
5449 		case S_IFLNK:
5450 		case S_IFCHR:
5451 			break;
5452 	}
5453 
5454 	switch (seekType) {
5455 		case SEEK_SET:
5456 			offset = 0;
5457 			break;
5458 		case SEEK_CUR:
5459 			offset = descriptor->pos;
5460 			break;
5461 		case SEEK_END:
5462 		{
5463 			// stat() the node
5464 			if (!HAS_FS_CALL(vnode, read_stat))
5465 				return B_UNSUPPORTED;
5466 
5467 			struct stat stat;
5468 			status_t status = FS_CALL(vnode, read_stat, &stat);
5469 			if (status != B_OK)
5470 				return status;
5471 
5472 			offset = stat.st_size;
5473 			break;
5474 		}
5475 		default:
5476 			return B_BAD_VALUE;
5477 	}
5478 
5479 	// assumes off_t is 64 bits wide
5480 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5481 		return B_BUFFER_OVERFLOW;
5482 
5483 	pos += offset;
5484 	if (pos < 0)
5485 		return B_BAD_VALUE;
5486 
5487 	return descriptor->pos = pos;
5488 }
5489 
5490 
5491 static status_t
5492 file_select(struct file_descriptor* descriptor, uint8 event,
5493 	struct selectsync* sync)
5494 {
5495 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5496 
5497 	struct vnode* vnode = descriptor->u.vnode;
5498 
5499 	// If the FS has no select() hook, notify select() now.
5500 	if (!HAS_FS_CALL(vnode, select))
5501 		return notify_select_event(sync, event);
5502 
5503 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5504 }
5505 
5506 
5507 static status_t
5508 file_deselect(struct file_descriptor* descriptor, uint8 event,
5509 	struct selectsync* sync)
5510 {
5511 	struct vnode* vnode = descriptor->u.vnode;
5512 
5513 	if (!HAS_FS_CALL(vnode, deselect))
5514 		return B_OK;
5515 
5516 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5517 }
5518 
5519 
5520 static status_t
5521 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5522 	bool kernel)
5523 {
5524 	struct vnode* vnode;
5525 	status_t status;
5526 
5527 	if (name == NULL || *name == '\0')
5528 		return B_BAD_VALUE;
5529 
5530 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5531 		"perms = %d)\n", mountID, parentID, name, perms));
5532 
5533 	status = get_vnode(mountID, parentID, &vnode, true, false);
5534 	if (status != B_OK)
5535 		return status;
5536 
5537 	if (HAS_FS_CALL(vnode, create_dir))
5538 		status = FS_CALL(vnode, create_dir, name, perms);
5539 	else
5540 		status = B_READ_ONLY_DEVICE;
5541 
5542 	put_vnode(vnode);
5543 	return status;
5544 }
5545 
5546 
5547 static status_t
5548 dir_create(int fd, char* path, int perms, bool kernel)
5549 {
5550 	char filename[B_FILE_NAME_LENGTH];
5551 	struct vnode* vnode;
5552 	status_t status;
5553 
5554 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5555 		kernel));
5556 
5557 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5558 	if (status < 0)
5559 		return status;
5560 
5561 	if (HAS_FS_CALL(vnode, create_dir)) {
5562 		status = FS_CALL(vnode, create_dir, filename, perms);
5563 	} else
5564 		status = B_READ_ONLY_DEVICE;
5565 
5566 	put_vnode(vnode);
5567 	return status;
5568 }
5569 
5570 
5571 static int
5572 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5573 {
5574 	FUNCTION(("dir_open_entry_ref()\n"));
5575 
5576 	if (name && name[0] == '\0')
5577 		return B_BAD_VALUE;
5578 
5579 	// get the vnode matching the entry_ref/node_ref
5580 	struct vnode* vnode;
5581 	status_t status;
5582 	if (name) {
5583 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5584 			&vnode);
5585 	} else
5586 		status = get_vnode(mountID, parentID, &vnode, true, false);
5587 	if (status != B_OK)
5588 		return status;
5589 
5590 	int newFD = open_dir_vnode(vnode, kernel);
5591 	if (newFD >= 0) {
5592 		// The vnode reference has been transferred to the FD
5593 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5594 			vnode->id, name);
5595 	} else
5596 		put_vnode(vnode);
5597 
5598 	return newFD;
5599 }
5600 
5601 
5602 static int
5603 dir_open(int fd, char* path, bool kernel)
5604 {
5605 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5606 		kernel));
5607 
5608 	// get the vnode matching the vnode + path combination
5609 	struct vnode* vnode = NULL;
5610 	ino_t parentID;
5611 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5612 		kernel);
5613 	if (status != B_OK)
5614 		return status;
5615 
5616 	// open the dir
5617 	int newFD = open_dir_vnode(vnode, kernel);
5618 	if (newFD >= 0) {
5619 		// The vnode reference has been transferred to the FD
5620 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5621 			parentID, vnode->id, NULL);
5622 	} else
5623 		put_vnode(vnode);
5624 
5625 	return newFD;
5626 }
5627 
5628 
5629 static status_t
5630 dir_close(struct file_descriptor* descriptor)
5631 {
5632 	struct vnode* vnode = descriptor->u.vnode;
5633 
5634 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5635 
5636 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5637 		vnode->id);
5638 	if (HAS_FS_CALL(vnode, close_dir))
5639 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5640 
5641 	return B_OK;
5642 }
5643 
5644 
5645 static void
5646 dir_free_fd(struct file_descriptor* descriptor)
5647 {
5648 	struct vnode* vnode = descriptor->u.vnode;
5649 
5650 	if (vnode != NULL) {
5651 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5652 		put_vnode(vnode);
5653 	}
5654 }
5655 
5656 
5657 static status_t
5658 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5659 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5660 {
5661 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5662 		bufferSize, _count);
5663 }
5664 
5665 
5666 static status_t
5667 fix_dirent(struct vnode* parent, struct dirent* entry,
5668 	struct io_context* ioContext)
5669 {
5670 	// set d_pdev and d_pino
5671 	entry->d_pdev = parent->device;
5672 	entry->d_pino = parent->id;
5673 
5674 	// If this is the ".." entry and the directory is the root of a FS,
5675 	// we need to replace d_dev and d_ino with the actual values.
5676 	if (strcmp(entry->d_name, "..") == 0
5677 		&& parent->mount->root_vnode == parent
5678 		&& parent->mount->covers_vnode) {
5679 		inc_vnode_ref_count(parent);
5680 			// vnode_path_to_vnode() puts the node
5681 
5682 		// Make sure the IO context root is not bypassed.
5683 		if (parent == ioContext->root) {
5684 			entry->d_dev = parent->device;
5685 			entry->d_ino = parent->id;
5686 		} else {
5687 			// ".." is guaranteed not to be clobbered by this call
5688 			struct vnode* vnode;
5689 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5690 				ioContext, &vnode, NULL);
5691 
5692 			if (status == B_OK) {
5693 				entry->d_dev = vnode->device;
5694 				entry->d_ino = vnode->id;
5695 			}
5696 		}
5697 	} else {
5698 		// resolve mount points
5699 		ReadLocker _(&sVnodeLock);
5700 
5701 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5702 		if (vnode != NULL) {
5703 			if (vnode->covered_by != NULL) {
5704 				entry->d_dev = vnode->covered_by->device;
5705 				entry->d_ino = vnode->covered_by->id;
5706 			}
5707 		}
5708 	}
5709 
5710 	return B_OK;
5711 }
5712 
5713 
5714 static status_t
5715 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5716 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5717 {
5718 	if (!HAS_FS_CALL(vnode, read_dir))
5719 		return B_UNSUPPORTED;
5720 
5721 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5722 		_count);
5723 	if (error != B_OK)
5724 		return error;
5725 
5726 	// we need to adjust the read dirents
5727 	uint32 count = *_count;
5728 	for (uint32 i = 0; i < count; i++) {
5729 		error = fix_dirent(vnode, buffer, ioContext);
5730 		if (error != B_OK)
5731 			return error;
5732 
5733 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5734 	}
5735 
5736 	return error;
5737 }
5738 
5739 
5740 static status_t
5741 dir_rewind(struct file_descriptor* descriptor)
5742 {
5743 	struct vnode* vnode = descriptor->u.vnode;
5744 
5745 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5746 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5747 	}
5748 
5749 	return B_UNSUPPORTED;
5750 }
5751 
5752 
5753 static status_t
5754 dir_remove(int fd, char* path, bool kernel)
5755 {
5756 	char name[B_FILE_NAME_LENGTH];
5757 	struct vnode* directory;
5758 	status_t status;
5759 
5760 	if (path != NULL) {
5761 		// we need to make sure our path name doesn't stop with "/", ".",
5762 		// or ".."
5763 		char* lastSlash;
5764 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5765 			char* leaf = lastSlash + 1;
5766 			if (!strcmp(leaf, ".."))
5767 				return B_NOT_ALLOWED;
5768 
5769 			// omit multiple slashes
5770 			while (lastSlash > path && lastSlash[-1] == '/')
5771 				lastSlash--;
5772 
5773 			if (leaf[0]
5774 				&& strcmp(leaf, ".")) {
5775 				break;
5776 			}
5777 			// "name/" -> "name", or "name/." -> "name"
5778 			lastSlash[0] = '\0';
5779 		}
5780 
5781 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5782 			return B_NOT_ALLOWED;
5783 	}
5784 
5785 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5786 	if (status != B_OK)
5787 		return status;
5788 
5789 	if (HAS_FS_CALL(directory, remove_dir))
5790 		status = FS_CALL(directory, remove_dir, name);
5791 	else
5792 		status = B_READ_ONLY_DEVICE;
5793 
5794 	put_vnode(directory);
5795 	return status;
5796 }
5797 
5798 
5799 static status_t
5800 common_ioctl(struct file_descriptor* descriptor, uint32 op, void* buffer,
5801 	size_t length)
5802 {
5803 	struct vnode* vnode = descriptor->u.vnode;
5804 
5805 	if (HAS_FS_CALL(vnode, ioctl))
5806 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5807 
5808 	return B_DEV_INVALID_IOCTL;
5809 }
5810 
5811 
5812 static status_t
5813 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5814 {
5815 	struct flock flock;
5816 
5817 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5818 		fd, op, argument, kernel ? "kernel" : "user"));
5819 
5820 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5821 		fd);
5822 	if (descriptor == NULL)
5823 		return B_FILE_ERROR;
5824 
5825 	struct vnode* vnode = fd_vnode(descriptor);
5826 
5827 	status_t status = B_OK;
5828 
5829 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5830 		if (descriptor->type != FDTYPE_FILE)
5831 			status = B_BAD_VALUE;
5832 		else if (user_memcpy(&flock, (struct flock*)argument,
5833 				sizeof(struct flock)) != B_OK)
5834 			status = B_BAD_ADDRESS;
5835 
5836 		if (status != B_OK) {
5837 			put_fd(descriptor);
5838 			return status;
5839 		}
5840 	}
5841 
5842 	switch (op) {
5843 		case F_SETFD:
5844 		{
5845 			struct io_context* context = get_current_io_context(kernel);
5846 			// Set file descriptor flags
5847 
5848 			// O_CLOEXEC is the only flag available at this time
5849 			mutex_lock(&context->io_mutex);
5850 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5851 			mutex_unlock(&context->io_mutex);
5852 
5853 			status = B_OK;
5854 			break;
5855 		}
5856 
5857 		case F_GETFD:
5858 		{
5859 			struct io_context* context = get_current_io_context(kernel);
5860 
5861 			// Get file descriptor flags
5862 			mutex_lock(&context->io_mutex);
5863 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5864 			mutex_unlock(&context->io_mutex);
5865 			break;
5866 		}
5867 
5868 		case F_SETFL:
5869 			// Set file descriptor open mode
5870 
5871 			// we only accept changes to O_APPEND and O_NONBLOCK
5872 			argument &= O_APPEND | O_NONBLOCK;
5873 			if (descriptor->ops->fd_set_flags != NULL) {
5874 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5875 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5876 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5877 					(int)argument);
5878 			} else
5879 				status = B_UNSUPPORTED;
5880 
5881 			if (status == B_OK) {
5882 				// update this descriptor's open_mode field
5883 				descriptor->open_mode = (descriptor->open_mode
5884 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5885 			}
5886 
5887 			break;
5888 
5889 		case F_GETFL:
5890 			// Get file descriptor open mode
5891 			status = descriptor->open_mode;
5892 			break;
5893 
5894 		case F_DUPFD:
5895 		{
5896 			struct io_context* context = get_current_io_context(kernel);
5897 
5898 			status = new_fd_etc(context, descriptor, (int)argument);
5899 			if (status >= 0) {
5900 				mutex_lock(&context->io_mutex);
5901 				fd_set_close_on_exec(context, fd, false);
5902 				mutex_unlock(&context->io_mutex);
5903 
5904 				atomic_add(&descriptor->ref_count, 1);
5905 			}
5906 			break;
5907 		}
5908 
5909 		case F_GETLK:
5910 			if (vnode != NULL) {
5911 				status = get_advisory_lock(vnode, &flock);
5912 				if (status == B_OK) {
5913 					// copy back flock structure
5914 					status = user_memcpy((struct flock*)argument, &flock,
5915 						sizeof(struct flock));
5916 				}
5917 			} else
5918 				status = B_BAD_VALUE;
5919 			break;
5920 
5921 		case F_SETLK:
5922 		case F_SETLKW:
5923 			status = normalize_flock(descriptor, &flock);
5924 			if (status != B_OK)
5925 				break;
5926 
5927 			if (vnode == NULL) {
5928 				status = B_BAD_VALUE;
5929 			} else if (flock.l_type == F_UNLCK) {
5930 				status = release_advisory_lock(vnode, &flock);
5931 			} else {
5932 				// the open mode must match the lock type
5933 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5934 						&& flock.l_type == F_WRLCK)
5935 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5936 						&& flock.l_type == F_RDLCK))
5937 					status = B_FILE_ERROR;
5938 				else {
5939 					status = acquire_advisory_lock(vnode, -1,
5940 						&flock, op == F_SETLKW);
5941 				}
5942 			}
5943 			break;
5944 
5945 		// ToDo: add support for more ops?
5946 
5947 		default:
5948 			status = B_BAD_VALUE;
5949 	}
5950 
5951 	put_fd(descriptor);
5952 	return status;
5953 }
5954 
5955 
5956 static status_t
5957 common_sync(int fd, bool kernel)
5958 {
5959 	struct file_descriptor* descriptor;
5960 	struct vnode* vnode;
5961 	status_t status;
5962 
5963 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
5964 
5965 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5966 	if (descriptor == NULL)
5967 		return B_FILE_ERROR;
5968 
5969 	if (HAS_FS_CALL(vnode, fsync))
5970 		status = FS_CALL_NO_PARAMS(vnode, fsync);
5971 	else
5972 		status = B_UNSUPPORTED;
5973 
5974 	put_fd(descriptor);
5975 	return status;
5976 }
5977 
5978 
5979 static status_t
5980 common_lock_node(int fd, bool kernel)
5981 {
5982 	struct file_descriptor* descriptor;
5983 	struct vnode* vnode;
5984 
5985 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5986 	if (descriptor == NULL)
5987 		return B_FILE_ERROR;
5988 
5989 	status_t status = B_OK;
5990 
5991 	// We need to set the locking atomically - someone
5992 	// else might set one at the same time
5993 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
5994 			(file_descriptor*)NULL) != NULL)
5995 		status = B_BUSY;
5996 
5997 	put_fd(descriptor);
5998 	return status;
5999 }
6000 
6001 
6002 static status_t
6003 common_unlock_node(int fd, bool kernel)
6004 {
6005 	struct file_descriptor* descriptor;
6006 	struct vnode* vnode;
6007 
6008 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6009 	if (descriptor == NULL)
6010 		return B_FILE_ERROR;
6011 
6012 	status_t status = B_OK;
6013 
6014 	// We need to set the locking atomically - someone
6015 	// else might set one at the same time
6016 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6017 			(file_descriptor*)NULL, descriptor) != descriptor)
6018 		status = B_BAD_VALUE;
6019 
6020 	put_fd(descriptor);
6021 	return status;
6022 }
6023 
6024 
6025 static status_t
6026 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6027 	bool kernel)
6028 {
6029 	struct vnode* vnode;
6030 	status_t status;
6031 
6032 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6033 	if (status != B_OK)
6034 		return status;
6035 
6036 	if (HAS_FS_CALL(vnode, read_symlink)) {
6037 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6038 	} else
6039 		status = B_BAD_VALUE;
6040 
6041 	put_vnode(vnode);
6042 	return status;
6043 }
6044 
6045 
6046 static status_t
6047 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6048 	bool kernel)
6049 {
6050 	// path validity checks have to be in the calling function!
6051 	char name[B_FILE_NAME_LENGTH];
6052 	struct vnode* vnode;
6053 	status_t status;
6054 
6055 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6056 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6057 
6058 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6059 	if (status != B_OK)
6060 		return status;
6061 
6062 	if (HAS_FS_CALL(vnode, create_symlink))
6063 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6064 	else {
6065 		status = HAS_FS_CALL(vnode, write)
6066 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6067 	}
6068 
6069 	put_vnode(vnode);
6070 
6071 	return status;
6072 }
6073 
6074 
6075 static status_t
6076 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6077 	bool traverseLeafLink, bool kernel)
6078 {
6079 	// path validity checks have to be in the calling function!
6080 
6081 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6082 		toPath, kernel));
6083 
6084 	char name[B_FILE_NAME_LENGTH];
6085 	struct vnode* directory;
6086 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6087 		kernel);
6088 	if (status != B_OK)
6089 		return status;
6090 
6091 	struct vnode* vnode;
6092 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6093 		kernel);
6094 	if (status != B_OK)
6095 		goto err;
6096 
6097 	if (directory->mount != vnode->mount) {
6098 		status = B_CROSS_DEVICE_LINK;
6099 		goto err1;
6100 	}
6101 
6102 	if (HAS_FS_CALL(directory, link))
6103 		status = FS_CALL(directory, link, name, vnode);
6104 	else
6105 		status = B_READ_ONLY_DEVICE;
6106 
6107 err1:
6108 	put_vnode(vnode);
6109 err:
6110 	put_vnode(directory);
6111 
6112 	return status;
6113 }
6114 
6115 
6116 static status_t
6117 common_unlink(int fd, char* path, bool kernel)
6118 {
6119 	char filename[B_FILE_NAME_LENGTH];
6120 	struct vnode* vnode;
6121 	status_t status;
6122 
6123 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6124 		kernel));
6125 
6126 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6127 	if (status < 0)
6128 		return status;
6129 
6130 	if (HAS_FS_CALL(vnode, unlink))
6131 		status = FS_CALL(vnode, unlink, filename);
6132 	else
6133 		status = B_READ_ONLY_DEVICE;
6134 
6135 	put_vnode(vnode);
6136 
6137 	return status;
6138 }
6139 
6140 
6141 static status_t
6142 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6143 {
6144 	struct vnode* vnode;
6145 	status_t status;
6146 
6147 	// TODO: honor effectiveUserGroup argument
6148 
6149 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6150 	if (status != B_OK)
6151 		return status;
6152 
6153 	if (HAS_FS_CALL(vnode, access))
6154 		status = FS_CALL(vnode, access, mode);
6155 	else
6156 		status = B_OK;
6157 
6158 	put_vnode(vnode);
6159 
6160 	return status;
6161 }
6162 
6163 
6164 static status_t
6165 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6166 {
6167 	struct vnode* fromVnode;
6168 	struct vnode* toVnode;
6169 	char fromName[B_FILE_NAME_LENGTH];
6170 	char toName[B_FILE_NAME_LENGTH];
6171 	status_t status;
6172 
6173 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6174 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6175 
6176 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6177 	if (status != B_OK)
6178 		return status;
6179 
6180 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6181 	if (status != B_OK)
6182 		goto err1;
6183 
6184 	if (fromVnode->device != toVnode->device) {
6185 		status = B_CROSS_DEVICE_LINK;
6186 		goto err2;
6187 	}
6188 
6189 	if (fromName[0] == '\0' || toName[0] == '\0'
6190 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6191 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6192 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6193 		status = B_BAD_VALUE;
6194 		goto err2;
6195 	}
6196 
6197 	if (HAS_FS_CALL(fromVnode, rename))
6198 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6199 	else
6200 		status = B_READ_ONLY_DEVICE;
6201 
6202 err2:
6203 	put_vnode(toVnode);
6204 err1:
6205 	put_vnode(fromVnode);
6206 
6207 	return status;
6208 }
6209 
6210 
6211 static status_t
6212 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6213 {
6214 	struct vnode* vnode = descriptor->u.vnode;
6215 
6216 	FUNCTION(("common_read_stat: stat %p\n", stat));
6217 
6218 	// TODO: remove this once all file systems properly set them!
6219 	stat->st_crtim.tv_nsec = 0;
6220 	stat->st_ctim.tv_nsec = 0;
6221 	stat->st_mtim.tv_nsec = 0;
6222 	stat->st_atim.tv_nsec = 0;
6223 
6224 	status_t status = FS_CALL(vnode, read_stat, stat);
6225 
6226 	// fill in the st_dev and st_ino fields
6227 	if (status == B_OK) {
6228 		stat->st_dev = vnode->device;
6229 		stat->st_ino = vnode->id;
6230 		stat->st_rdev = -1;
6231 	}
6232 
6233 	return status;
6234 }
6235 
6236 
6237 static status_t
6238 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6239 	int statMask)
6240 {
6241 	struct vnode* vnode = descriptor->u.vnode;
6242 
6243 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6244 		vnode, stat, statMask));
6245 
6246 	if (!HAS_FS_CALL(vnode, write_stat))
6247 		return B_READ_ONLY_DEVICE;
6248 
6249 	return FS_CALL(vnode, write_stat, stat, statMask);
6250 }
6251 
6252 
6253 static status_t
6254 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6255 	struct stat* stat, bool kernel)
6256 {
6257 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6258 		stat));
6259 
6260 	struct vnode* vnode;
6261 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6262 		NULL, kernel);
6263 	if (status != B_OK)
6264 		return status;
6265 
6266 	status = FS_CALL(vnode, read_stat, stat);
6267 
6268 	// fill in the st_dev and st_ino fields
6269 	if (status == B_OK) {
6270 		stat->st_dev = vnode->device;
6271 		stat->st_ino = vnode->id;
6272 		stat->st_rdev = -1;
6273 	}
6274 
6275 	put_vnode(vnode);
6276 	return status;
6277 }
6278 
6279 
6280 static status_t
6281 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6282 	const struct stat* stat, int statMask, bool kernel)
6283 {
6284 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6285 		"kernel %d\n", fd, path, stat, statMask, kernel));
6286 
6287 	struct vnode* vnode;
6288 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6289 		NULL, kernel);
6290 	if (status != B_OK)
6291 		return status;
6292 
6293 	if (HAS_FS_CALL(vnode, write_stat))
6294 		status = FS_CALL(vnode, write_stat, stat, statMask);
6295 	else
6296 		status = B_READ_ONLY_DEVICE;
6297 
6298 	put_vnode(vnode);
6299 
6300 	return status;
6301 }
6302 
6303 
6304 static int
6305 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6306 {
6307 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6308 		kernel));
6309 
6310 	struct vnode* vnode;
6311 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6312 		NULL, kernel);
6313 	if (status != B_OK)
6314 		return status;
6315 
6316 	status = open_attr_dir_vnode(vnode, kernel);
6317 	if (status < 0)
6318 		put_vnode(vnode);
6319 
6320 	return status;
6321 }
6322 
6323 
6324 static status_t
6325 attr_dir_close(struct file_descriptor* descriptor)
6326 {
6327 	struct vnode* vnode = descriptor->u.vnode;
6328 
6329 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6330 
6331 	if (HAS_FS_CALL(vnode, close_attr_dir))
6332 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6333 
6334 	return B_OK;
6335 }
6336 
6337 
6338 static void
6339 attr_dir_free_fd(struct file_descriptor* descriptor)
6340 {
6341 	struct vnode* vnode = descriptor->u.vnode;
6342 
6343 	if (vnode != NULL) {
6344 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6345 		put_vnode(vnode);
6346 	}
6347 }
6348 
6349 
6350 static status_t
6351 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6352 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6353 {
6354 	struct vnode* vnode = descriptor->u.vnode;
6355 
6356 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6357 
6358 	if (HAS_FS_CALL(vnode, read_attr_dir))
6359 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6360 			bufferSize, _count);
6361 
6362 	return B_UNSUPPORTED;
6363 }
6364 
6365 
6366 static status_t
6367 attr_dir_rewind(struct file_descriptor* descriptor)
6368 {
6369 	struct vnode* vnode = descriptor->u.vnode;
6370 
6371 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6372 
6373 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6374 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6375 
6376 	return B_UNSUPPORTED;
6377 }
6378 
6379 
6380 static int
6381 attr_create(int fd, char* path, const char* name, uint32 type,
6382 	int openMode, bool kernel)
6383 {
6384 	if (name == NULL || *name == '\0')
6385 		return B_BAD_VALUE;
6386 
6387 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6388 	struct vnode* vnode;
6389 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6390 		kernel);
6391 	if (status != B_OK)
6392 		return status;
6393 
6394 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6395 		status = B_LINK_LIMIT;
6396 		goto err;
6397 	}
6398 
6399 	if (!HAS_FS_CALL(vnode, create_attr)) {
6400 		status = B_READ_ONLY_DEVICE;
6401 		goto err;
6402 	}
6403 
6404 	void* cookie;
6405 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6406 	if (status != B_OK)
6407 		goto err;
6408 
6409 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6410 	if (fd >= 0)
6411 		return fd;
6412 
6413 	status = fd;
6414 
6415 	FS_CALL(vnode, close_attr, cookie);
6416 	FS_CALL(vnode, free_attr_cookie, cookie);
6417 
6418 	FS_CALL(vnode, remove_attr, name);
6419 
6420 err:
6421 	put_vnode(vnode);
6422 
6423 	return status;
6424 }
6425 
6426 
6427 static int
6428 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6429 {
6430 	if (name == NULL || *name == '\0')
6431 		return B_BAD_VALUE;
6432 
6433 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6434 	struct vnode* vnode;
6435 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6436 		kernel);
6437 	if (status != B_OK)
6438 		return status;
6439 
6440 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6441 		status = B_LINK_LIMIT;
6442 		goto err;
6443 	}
6444 
6445 	if (!HAS_FS_CALL(vnode, open_attr)) {
6446 		status = B_UNSUPPORTED;
6447 		goto err;
6448 	}
6449 
6450 	void* cookie;
6451 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6452 	if (status != B_OK)
6453 		goto err;
6454 
6455 	// now we only need a file descriptor for this attribute and we're done
6456 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6457 	if (fd >= 0)
6458 		return fd;
6459 
6460 	status = fd;
6461 
6462 	FS_CALL(vnode, close_attr, cookie);
6463 	FS_CALL(vnode, free_attr_cookie, cookie);
6464 
6465 err:
6466 	put_vnode(vnode);
6467 
6468 	return status;
6469 }
6470 
6471 
6472 static status_t
6473 attr_close(struct file_descriptor* descriptor)
6474 {
6475 	struct vnode* vnode = descriptor->u.vnode;
6476 
6477 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6478 
6479 	if (HAS_FS_CALL(vnode, close_attr))
6480 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6481 
6482 	return B_OK;
6483 }
6484 
6485 
6486 static void
6487 attr_free_fd(struct file_descriptor* descriptor)
6488 {
6489 	struct vnode* vnode = descriptor->u.vnode;
6490 
6491 	if (vnode != NULL) {
6492 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6493 		put_vnode(vnode);
6494 	}
6495 }
6496 
6497 
6498 static status_t
6499 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6500 	size_t* length)
6501 {
6502 	struct vnode* vnode = descriptor->u.vnode;
6503 
6504 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6505 		*length));
6506 
6507 	if (!HAS_FS_CALL(vnode, read_attr))
6508 		return B_UNSUPPORTED;
6509 
6510 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6511 }
6512 
6513 
6514 static status_t
6515 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6516 	size_t* length)
6517 {
6518 	struct vnode* vnode = descriptor->u.vnode;
6519 
6520 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6521 	if (!HAS_FS_CALL(vnode, write_attr))
6522 		return B_UNSUPPORTED;
6523 
6524 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6525 }
6526 
6527 
6528 static off_t
6529 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6530 {
6531 	off_t offset;
6532 
6533 	switch (seekType) {
6534 		case SEEK_SET:
6535 			offset = 0;
6536 			break;
6537 		case SEEK_CUR:
6538 			offset = descriptor->pos;
6539 			break;
6540 		case SEEK_END:
6541 		{
6542 			struct vnode* vnode = descriptor->u.vnode;
6543 			if (!HAS_FS_CALL(vnode, read_stat))
6544 				return B_UNSUPPORTED;
6545 
6546 			struct stat stat;
6547 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6548 				&stat);
6549 			if (status != B_OK)
6550 				return status;
6551 
6552 			offset = stat.st_size;
6553 			break;
6554 		}
6555 		default:
6556 			return B_BAD_VALUE;
6557 	}
6558 
6559 	// assumes off_t is 64 bits wide
6560 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6561 		return B_BUFFER_OVERFLOW;
6562 
6563 	pos += offset;
6564 	if (pos < 0)
6565 		return B_BAD_VALUE;
6566 
6567 	return descriptor->pos = pos;
6568 }
6569 
6570 
6571 static status_t
6572 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6573 {
6574 	struct vnode* vnode = descriptor->u.vnode;
6575 
6576 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6577 
6578 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6579 		return B_UNSUPPORTED;
6580 
6581 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6582 }
6583 
6584 
6585 static status_t
6586 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6587 	int statMask)
6588 {
6589 	struct vnode* vnode = descriptor->u.vnode;
6590 
6591 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6592 
6593 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6594 		return B_READ_ONLY_DEVICE;
6595 
6596 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6597 }
6598 
6599 
6600 static status_t
6601 attr_remove(int fd, const char* name, bool kernel)
6602 {
6603 	struct file_descriptor* descriptor;
6604 	struct vnode* vnode;
6605 	status_t status;
6606 
6607 	if (name == NULL || *name == '\0')
6608 		return B_BAD_VALUE;
6609 
6610 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6611 		kernel));
6612 
6613 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6614 	if (descriptor == NULL)
6615 		return B_FILE_ERROR;
6616 
6617 	if (HAS_FS_CALL(vnode, remove_attr))
6618 		status = FS_CALL(vnode, remove_attr, name);
6619 	else
6620 		status = B_READ_ONLY_DEVICE;
6621 
6622 	put_fd(descriptor);
6623 
6624 	return status;
6625 }
6626 
6627 
6628 static status_t
6629 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6630 	bool kernel)
6631 {
6632 	struct file_descriptor* fromDescriptor;
6633 	struct file_descriptor* toDescriptor;
6634 	struct vnode* fromVnode;
6635 	struct vnode* toVnode;
6636 	status_t status;
6637 
6638 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6639 		|| *toName == '\0')
6640 		return B_BAD_VALUE;
6641 
6642 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6643 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6644 
6645 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6646 	if (fromDescriptor == NULL)
6647 		return B_FILE_ERROR;
6648 
6649 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6650 	if (toDescriptor == NULL) {
6651 		status = B_FILE_ERROR;
6652 		goto err;
6653 	}
6654 
6655 	// are the files on the same volume?
6656 	if (fromVnode->device != toVnode->device) {
6657 		status = B_CROSS_DEVICE_LINK;
6658 		goto err1;
6659 	}
6660 
6661 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6662 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6663 	} else
6664 		status = B_READ_ONLY_DEVICE;
6665 
6666 err1:
6667 	put_fd(toDescriptor);
6668 err:
6669 	put_fd(fromDescriptor);
6670 
6671 	return status;
6672 }
6673 
6674 
6675 static int
6676 index_dir_open(dev_t mountID, bool kernel)
6677 {
6678 	struct fs_mount* mount;
6679 	void* cookie;
6680 
6681 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6682 
6683 	status_t status = get_mount(mountID, &mount);
6684 	if (status != B_OK)
6685 		return status;
6686 
6687 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6688 		status = B_UNSUPPORTED;
6689 		goto error;
6690 	}
6691 
6692 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6693 	if (status != B_OK)
6694 		goto error;
6695 
6696 	// get fd for the index directory
6697 	int fd;
6698 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6699 	if (fd >= 0)
6700 		return fd;
6701 
6702 	// something went wrong
6703 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6704 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6705 
6706 	status = fd;
6707 
6708 error:
6709 	put_mount(mount);
6710 	return status;
6711 }
6712 
6713 
6714 static status_t
6715 index_dir_close(struct file_descriptor* descriptor)
6716 {
6717 	struct fs_mount* mount = descriptor->u.mount;
6718 
6719 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6720 
6721 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6722 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6723 
6724 	return B_OK;
6725 }
6726 
6727 
6728 static void
6729 index_dir_free_fd(struct file_descriptor* descriptor)
6730 {
6731 	struct fs_mount* mount = descriptor->u.mount;
6732 
6733 	if (mount != NULL) {
6734 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6735 		put_mount(mount);
6736 	}
6737 }
6738 
6739 
6740 static status_t
6741 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6742 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6743 {
6744 	struct fs_mount* mount = descriptor->u.mount;
6745 
6746 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6747 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6748 			bufferSize, _count);
6749 	}
6750 
6751 	return B_UNSUPPORTED;
6752 }
6753 
6754 
6755 static status_t
6756 index_dir_rewind(struct file_descriptor* descriptor)
6757 {
6758 	struct fs_mount* mount = descriptor->u.mount;
6759 
6760 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6761 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6762 
6763 	return B_UNSUPPORTED;
6764 }
6765 
6766 
6767 static status_t
6768 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6769 	bool kernel)
6770 {
6771 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6772 		name, kernel));
6773 
6774 	struct fs_mount* mount;
6775 	status_t status = get_mount(mountID, &mount);
6776 	if (status != B_OK)
6777 		return status;
6778 
6779 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6780 		status = B_READ_ONLY_DEVICE;
6781 		goto out;
6782 	}
6783 
6784 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6785 
6786 out:
6787 	put_mount(mount);
6788 	return status;
6789 }
6790 
6791 
6792 #if 0
6793 static status_t
6794 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6795 {
6796 	struct vnode* vnode = descriptor->u.vnode;
6797 
6798 	// ToDo: currently unused!
6799 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6800 	if (!HAS_FS_CALL(vnode, read_index_stat))
6801 		return B_UNSUPPORTED;
6802 
6803 	return B_UNSUPPORTED;
6804 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6805 }
6806 
6807 
6808 static void
6809 index_free_fd(struct file_descriptor* descriptor)
6810 {
6811 	struct vnode* vnode = descriptor->u.vnode;
6812 
6813 	if (vnode != NULL) {
6814 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6815 		put_vnode(vnode);
6816 	}
6817 }
6818 #endif
6819 
6820 
6821 static status_t
6822 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6823 	bool kernel)
6824 {
6825 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6826 		name, kernel));
6827 
6828 	struct fs_mount* mount;
6829 	status_t status = get_mount(mountID, &mount);
6830 	if (status != B_OK)
6831 		return status;
6832 
6833 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6834 		status = B_UNSUPPORTED;
6835 		goto out;
6836 	}
6837 
6838 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6839 
6840 out:
6841 	put_mount(mount);
6842 	return status;
6843 }
6844 
6845 
6846 static status_t
6847 index_remove(dev_t mountID, const char* name, bool kernel)
6848 {
6849 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6850 		name, kernel));
6851 
6852 	struct fs_mount* mount;
6853 	status_t status = get_mount(mountID, &mount);
6854 	if (status != B_OK)
6855 		return status;
6856 
6857 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6858 		status = B_READ_ONLY_DEVICE;
6859 		goto out;
6860 	}
6861 
6862 	status = FS_MOUNT_CALL(mount, remove_index, name);
6863 
6864 out:
6865 	put_mount(mount);
6866 	return status;
6867 }
6868 
6869 
6870 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6871 		It would be nice if the FS would find some more kernel support
6872 		for them.
6873 		For example, query parsing should be moved into the kernel.
6874 */
6875 static int
6876 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6877 	int32 token, bool kernel)
6878 {
6879 	struct fs_mount* mount;
6880 	void* cookie;
6881 
6882 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6883 		query, kernel));
6884 
6885 	status_t status = get_mount(device, &mount);
6886 	if (status != B_OK)
6887 		return status;
6888 
6889 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6890 		status = B_UNSUPPORTED;
6891 		goto error;
6892 	}
6893 
6894 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6895 		&cookie);
6896 	if (status != B_OK)
6897 		goto error;
6898 
6899 	// get fd for the index directory
6900 	int fd;
6901 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
6902 	if (fd >= 0)
6903 		return fd;
6904 
6905 	status = fd;
6906 
6907 	// something went wrong
6908 	FS_MOUNT_CALL(mount, close_query, cookie);
6909 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6910 
6911 error:
6912 	put_mount(mount);
6913 	return status;
6914 }
6915 
6916 
6917 static status_t
6918 query_close(struct file_descriptor* descriptor)
6919 {
6920 	struct fs_mount* mount = descriptor->u.mount;
6921 
6922 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6923 
6924 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6925 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6926 
6927 	return B_OK;
6928 }
6929 
6930 
6931 static void
6932 query_free_fd(struct file_descriptor* descriptor)
6933 {
6934 	struct fs_mount* mount = descriptor->u.mount;
6935 
6936 	if (mount != NULL) {
6937 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6938 		put_mount(mount);
6939 	}
6940 }
6941 
6942 
6943 static status_t
6944 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6945 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6946 {
6947 	struct fs_mount* mount = descriptor->u.mount;
6948 
6949 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6950 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6951 			bufferSize, _count);
6952 	}
6953 
6954 	return B_UNSUPPORTED;
6955 }
6956 
6957 
6958 static status_t
6959 query_rewind(struct file_descriptor* descriptor)
6960 {
6961 	struct fs_mount* mount = descriptor->u.mount;
6962 
6963 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6964 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6965 
6966 	return B_UNSUPPORTED;
6967 }
6968 
6969 
6970 //	#pragma mark - General File System functions
6971 
6972 
6973 static dev_t
6974 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
6975 	const char* args, bool kernel)
6976 {
6977 	struct ::fs_mount* mount;
6978 	status_t status = B_OK;
6979 	fs_volume* volume = NULL;
6980 	int32 layer = 0;
6981 
6982 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
6983 
6984 	// The path is always safe, we just have to make sure that fsName is
6985 	// almost valid - we can't make any assumptions about args, though.
6986 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
6987 	// We'll get it from the DDM later.
6988 	if (fsName == NULL) {
6989 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
6990 			return B_BAD_VALUE;
6991 	} else if (fsName[0] == '\0')
6992 		return B_BAD_VALUE;
6993 
6994 	RecursiveLocker mountOpLocker(sMountOpLock);
6995 
6996 	// Helper to delete a newly created file device on failure.
6997 	// Not exactly beautiful, but helps to keep the code below cleaner.
6998 	struct FileDeviceDeleter {
6999 		FileDeviceDeleter() : id(-1) {}
7000 		~FileDeviceDeleter()
7001 		{
7002 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7003 		}
7004 
7005 		partition_id id;
7006 	} fileDeviceDeleter;
7007 
7008 	// If the file system is not a "virtual" one, the device argument should
7009 	// point to a real file/device (if given at all).
7010 	// get the partition
7011 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7012 	KPartition* partition = NULL;
7013 	KPath normalizedDevice;
7014 	bool newlyCreatedFileDevice = false;
7015 
7016 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7017 		// normalize the device path
7018 		status = normalizedDevice.SetTo(device, true);
7019 		if (status != B_OK)
7020 			return status;
7021 
7022 		// get a corresponding partition from the DDM
7023 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7024 		if (partition == NULL) {
7025 			// Partition not found: This either means, the user supplied
7026 			// an invalid path, or the path refers to an image file. We try
7027 			// to let the DDM create a file device for the path.
7028 			partition_id deviceID = ddm->CreateFileDevice(
7029 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7030 			if (deviceID >= 0) {
7031 				partition = ddm->RegisterPartition(deviceID);
7032 				if (newlyCreatedFileDevice)
7033 					fileDeviceDeleter.id = deviceID;
7034 			}
7035 		}
7036 
7037 		if (!partition) {
7038 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7039 				normalizedDevice.Path()));
7040 			return B_ENTRY_NOT_FOUND;
7041 		}
7042 
7043 		device = normalizedDevice.Path();
7044 			// correct path to file device
7045 	}
7046 	PartitionRegistrar partitionRegistrar(partition, true);
7047 
7048 	// Write lock the partition's device. For the time being, we keep the lock
7049 	// until we're done mounting -- not nice, but ensure, that no-one is
7050 	// interfering.
7051 	// TODO: Just mark the partition busy while mounting!
7052 	KDiskDevice* diskDevice = NULL;
7053 	if (partition) {
7054 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7055 		if (!diskDevice) {
7056 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7057 			return B_ERROR;
7058 		}
7059 	}
7060 
7061 	DeviceWriteLocker writeLocker(diskDevice, true);
7062 		// this takes over the write lock acquired before
7063 
7064 	if (partition != NULL) {
7065 		// make sure, that the partition is not busy
7066 		if (partition->IsBusy()) {
7067 			TRACE(("fs_mount(): Partition is busy.\n"));
7068 			return B_BUSY;
7069 		}
7070 
7071 		// if no FS name had been supplied, we get it from the partition
7072 		if (fsName == NULL) {
7073 			KDiskSystem* diskSystem = partition->DiskSystem();
7074 			if (!diskSystem) {
7075 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7076 					"recognize it.\n"));
7077 				return B_BAD_VALUE;
7078 			}
7079 
7080 			if (!diskSystem->IsFileSystem()) {
7081 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7082 					"partitioning system.\n"));
7083 				return B_BAD_VALUE;
7084 			}
7085 
7086 			// The disk system name will not change, and the KDiskSystem
7087 			// object will not go away while the disk device is locked (and
7088 			// the partition has a reference to it), so this is safe.
7089 			fsName = diskSystem->Name();
7090 		}
7091 	}
7092 
7093 	mount = new(std::nothrow) (struct ::fs_mount);
7094 	if (mount == NULL)
7095 		return B_NO_MEMORY;
7096 
7097 	mount->device_name = strdup(device);
7098 		// "device" can be NULL
7099 
7100 	status = mount->entry_cache.Init();
7101 	if (status != B_OK)
7102 		goto err1;
7103 
7104 	// initialize structure
7105 	mount->id = sNextMountID++;
7106 	mount->partition = NULL;
7107 	mount->root_vnode = NULL;
7108 	mount->covers_vnode = NULL;
7109 	mount->unmounting = false;
7110 	mount->owns_file_device = false;
7111 	mount->volume = NULL;
7112 
7113 	// build up the volume(s)
7114 	while (true) {
7115 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7116 		if (layerFSName == NULL) {
7117 			if (layer == 0) {
7118 				status = B_NO_MEMORY;
7119 				goto err1;
7120 			}
7121 
7122 			break;
7123 		}
7124 
7125 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7126 		if (volume == NULL) {
7127 			status = B_NO_MEMORY;
7128 			free(layerFSName);
7129 			goto err1;
7130 		}
7131 
7132 		volume->id = mount->id;
7133 		volume->partition = partition != NULL ? partition->ID() : -1;
7134 		volume->layer = layer++;
7135 		volume->private_volume = NULL;
7136 		volume->ops = NULL;
7137 		volume->sub_volume = NULL;
7138 		volume->super_volume = NULL;
7139 		volume->file_system = NULL;
7140 		volume->file_system_name = NULL;
7141 
7142 		volume->file_system_name = get_file_system_name(layerFSName);
7143 		if (volume->file_system_name == NULL) {
7144 			status = B_NO_MEMORY;
7145 			free(layerFSName);
7146 			free(volume);
7147 			goto err1;
7148 		}
7149 
7150 		volume->file_system = get_file_system(layerFSName);
7151 		if (volume->file_system == NULL) {
7152 			status = B_DEVICE_NOT_FOUND;
7153 			free(layerFSName);
7154 			free(volume->file_system_name);
7155 			free(volume);
7156 			goto err1;
7157 		}
7158 
7159 		if (mount->volume == NULL)
7160 			mount->volume = volume;
7161 		else {
7162 			volume->super_volume = mount->volume;
7163 			mount->volume->sub_volume = volume;
7164 			mount->volume = volume;
7165 		}
7166 	}
7167 
7168 	// insert mount struct into list before we call FS's mount() function
7169 	// so that vnodes can be created for this mount
7170 	mutex_lock(&sMountMutex);
7171 	hash_insert(sMountsTable, mount);
7172 	mutex_unlock(&sMountMutex);
7173 
7174 	ino_t rootID;
7175 
7176 	if (!sRoot) {
7177 		// we haven't mounted anything yet
7178 		if (strcmp(path, "/") != 0) {
7179 			status = B_ERROR;
7180 			goto err2;
7181 		}
7182 
7183 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7184 			args, &rootID);
7185 		if (status != 0)
7186 			goto err2;
7187 	} else {
7188 		status = path_to_vnode(path, true, &mount->covers_vnode, NULL, kernel);
7189 		if (status != B_OK)
7190 			goto err2;
7191 
7192 		// make sure covered_vnode is a directory
7193 		if (!S_ISDIR(mount->covers_vnode->Type())) {
7194 			status = B_NOT_A_DIRECTORY;
7195 			goto err3;
7196 		}
7197 
7198 		if (mount->covers_vnode->mount->root_vnode == mount->covers_vnode) {
7199 			// this is already a mount point
7200 			status = B_BUSY;
7201 			goto err3;
7202 		}
7203 
7204 		// mount it/them
7205 		fs_volume* volume = mount->volume;
7206 		while (volume) {
7207 			status = volume->file_system->mount(volume, device, flags, args,
7208 				&rootID);
7209 			if (status != B_OK) {
7210 				if (volume->sub_volume)
7211 					goto err4;
7212 				goto err3;
7213 			}
7214 
7215 			volume = volume->super_volume;
7216 		}
7217 
7218 		volume = mount->volume;
7219 		while (volume) {
7220 			if (volume->ops->all_layers_mounted != NULL)
7221 				volume->ops->all_layers_mounted(volume);
7222 			volume = volume->super_volume;
7223 		}
7224 	}
7225 
7226 	// the root node is supposed to be owned by the file system - it must
7227 	// exist at this point
7228 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7229 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7230 		panic("fs_mount: file system does not own its root node!\n");
7231 		status = B_ERROR;
7232 		goto err4;
7233 	}
7234 
7235 	// No race here, since fs_mount() is the only function changing
7236 	// covers_vnode (and holds sMountOpLock at that time).
7237 	rw_lock_write_lock(&sVnodeLock);
7238 	if (mount->covers_vnode)
7239 		mount->covers_vnode->covered_by = mount->root_vnode;
7240 	rw_lock_write_unlock(&sVnodeLock);
7241 
7242 	if (!sRoot) {
7243 		sRoot = mount->root_vnode;
7244 		mutex_lock(&sIOContextRootLock);
7245 		get_current_io_context(true)->root = sRoot;
7246 		mutex_unlock(&sIOContextRootLock);
7247 		inc_vnode_ref_count(sRoot);
7248 	}
7249 
7250 	// supply the partition (if any) with the mount cookie and mark it mounted
7251 	if (partition) {
7252 		partition->SetMountCookie(mount->volume->private_volume);
7253 		partition->SetVolumeID(mount->id);
7254 
7255 		// keep a partition reference as long as the partition is mounted
7256 		partitionRegistrar.Detach();
7257 		mount->partition = partition;
7258 		mount->owns_file_device = newlyCreatedFileDevice;
7259 		fileDeviceDeleter.id = -1;
7260 	}
7261 
7262 	notify_mount(mount->id,
7263 		mount->covers_vnode ? mount->covers_vnode->device : -1,
7264 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7265 
7266 	return mount->id;
7267 
7268 err4:
7269 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7270 err3:
7271 	if (mount->covers_vnode != NULL)
7272 		put_vnode(mount->covers_vnode);
7273 err2:
7274 	mutex_lock(&sMountMutex);
7275 	hash_remove(sMountsTable, mount);
7276 	mutex_unlock(&sMountMutex);
7277 err1:
7278 	delete mount;
7279 
7280 	return status;
7281 }
7282 
7283 
7284 static status_t
7285 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7286 {
7287 	struct fs_mount* mount;
7288 	status_t err;
7289 
7290 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7291 		kernel));
7292 
7293 	struct vnode* pathVnode = NULL;
7294 	if (path != NULL) {
7295 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7296 		if (err != B_OK)
7297 			return B_ENTRY_NOT_FOUND;
7298 	}
7299 
7300 	RecursiveLocker mountOpLocker(sMountOpLock);
7301 
7302 	// this lock is not strictly necessary, but here in case of KDEBUG
7303 	// to keep the ASSERT in find_mount() working.
7304 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7305 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7306 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7307 	if (mount == NULL) {
7308 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7309 			pathVnode);
7310 	}
7311 
7312 	if (path != NULL) {
7313 		put_vnode(pathVnode);
7314 
7315 		if (mount->root_vnode != pathVnode) {
7316 			// not mountpoint
7317 			return B_BAD_VALUE;
7318 		}
7319 	}
7320 
7321 	// if the volume is associated with a partition, lock the device of the
7322 	// partition as long as we are unmounting
7323 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7324 	KPartition* partition = mount->partition;
7325 	KDiskDevice* diskDevice = NULL;
7326 	if (partition != NULL) {
7327 		if (partition->Device() == NULL) {
7328 			dprintf("fs_unmount(): There is no device!\n");
7329 			return B_ERROR;
7330 		}
7331 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7332 		if (!diskDevice) {
7333 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7334 			return B_ERROR;
7335 		}
7336 	}
7337 	DeviceWriteLocker writeLocker(diskDevice, true);
7338 
7339 	// make sure, that the partition is not busy
7340 	if (partition != NULL) {
7341 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7342 			TRACE(("fs_unmount(): Partition is busy.\n"));
7343 			return B_BUSY;
7344 		}
7345 	}
7346 
7347 	// grab the vnode master mutex to keep someone from creating
7348 	// a vnode while we're figuring out if we can continue
7349 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7350 
7351 	bool disconnectedDescriptors = false;
7352 
7353 	while (true) {
7354 		bool busy = false;
7355 
7356 		// cycle through the list of vnodes associated with this mount and
7357 		// make sure all of them are not busy or have refs on them
7358 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7359 		while (struct vnode* vnode = iterator.Next()) {
7360 			// The root vnode ref_count needs to be 1 here (the mount has a
7361 			// reference).
7362 			if (vnode->IsBusy()
7363 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7364 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7365 				// there are still vnodes in use on this mount, so we cannot
7366 				// unmount yet
7367 				busy = true;
7368 				break;
7369 			}
7370 		}
7371 
7372 		if (!busy)
7373 			break;
7374 
7375 		if ((flags & B_FORCE_UNMOUNT) == 0)
7376 			return B_BUSY;
7377 
7378 		if (disconnectedDescriptors) {
7379 			// wait a bit until the last access is finished, and then try again
7380 			vnodesWriteLocker.Unlock();
7381 			snooze(100000);
7382 			// TODO: if there is some kind of bug that prevents the ref counts
7383 			// from getting back to zero, this will fall into an endless loop...
7384 			vnodesWriteLocker.Lock();
7385 			continue;
7386 		}
7387 
7388 		// the file system is still busy - but we're forced to unmount it,
7389 		// so let's disconnect all open file descriptors
7390 
7391 		mount->unmounting = true;
7392 			// prevent new vnodes from being created
7393 
7394 		vnodesWriteLocker.Unlock();
7395 
7396 		disconnect_mount_or_vnode_fds(mount, NULL);
7397 		disconnectedDescriptors = true;
7398 
7399 		vnodesWriteLocker.Lock();
7400 	}
7401 
7402 	// we can safely continue, mark all of the vnodes busy and this mount
7403 	// structure in unmounting state
7404 	mount->unmounting = true;
7405 
7406 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7407 	while (struct vnode* vnode = iterator.Next()) {
7408 		vnode->SetBusy(true);
7409 		vnode_to_be_freed(vnode);
7410 	}
7411 
7412 	// The ref_count of the root node is 1 at this point, see above why this is
7413 	mount->root_vnode->ref_count--;
7414 	vnode_to_be_freed(mount->root_vnode);
7415 
7416 	mount->covers_vnode->covered_by = NULL;
7417 
7418 	vnodesWriteLocker.Unlock();
7419 
7420 	put_vnode(mount->covers_vnode);
7421 
7422 	// Free all vnodes associated with this mount.
7423 	// They will be removed from the mount list by free_vnode(), so
7424 	// we don't have to do this.
7425 	while (struct vnode* vnode = mount->vnodes.Head())
7426 		free_vnode(vnode, false);
7427 
7428 	// remove the mount structure from the hash table
7429 	mutex_lock(&sMountMutex);
7430 	hash_remove(sMountsTable, mount);
7431 	mutex_unlock(&sMountMutex);
7432 
7433 	mountOpLocker.Unlock();
7434 
7435 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7436 	notify_unmount(mount->id);
7437 
7438 	// dereference the partition and mark it unmounted
7439 	if (partition) {
7440 		partition->SetVolumeID(-1);
7441 		partition->SetMountCookie(NULL);
7442 
7443 		if (mount->owns_file_device)
7444 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7445 		partition->Unregister();
7446 	}
7447 
7448 	delete mount;
7449 	return B_OK;
7450 }
7451 
7452 
7453 static status_t
7454 fs_sync(dev_t device)
7455 {
7456 	struct fs_mount* mount;
7457 	status_t status = get_mount(device, &mount);
7458 	if (status != B_OK)
7459 		return status;
7460 
7461 	struct vnode marker;
7462 	memset(&marker, 0, sizeof(marker));
7463 	marker.SetBusy(true);
7464 	marker.SetRemoved(true);
7465 
7466 	// First, synchronize all file caches
7467 
7468 	while (true) {
7469 		WriteLocker locker(sVnodeLock);
7470 			// Note: That's the easy way. Which is probably OK for sync(),
7471 			// since it's a relatively rare call and doesn't need to allow for
7472 			// a lot of concurrency. Using a read lock would be possible, but
7473 			// also more involved, since we had to lock the individual nodes
7474 			// and take care of the locking order, which we might not want to
7475 			// do while holding fs_mount::rlock.
7476 
7477 		// synchronize access to vnode list
7478 		recursive_lock_lock(&mount->rlock);
7479 
7480 		struct vnode* vnode;
7481 		if (!marker.IsRemoved()) {
7482 			vnode = mount->vnodes.GetNext(&marker);
7483 			mount->vnodes.Remove(&marker);
7484 			marker.SetRemoved(true);
7485 		} else
7486 			vnode = mount->vnodes.First();
7487 
7488 		while (vnode != NULL && (vnode->cache == NULL
7489 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7490 			// TODO: we could track writes (and writable mapped vnodes)
7491 			//	and have a simple flag that we could test for here
7492 			vnode = mount->vnodes.GetNext(vnode);
7493 		}
7494 
7495 		if (vnode != NULL) {
7496 			// insert marker vnode again
7497 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7498 			marker.SetRemoved(false);
7499 		}
7500 
7501 		recursive_lock_unlock(&mount->rlock);
7502 
7503 		if (vnode == NULL)
7504 			break;
7505 
7506 		vnode = lookup_vnode(mount->id, vnode->id);
7507 		if (vnode == NULL || vnode->IsBusy())
7508 			continue;
7509 
7510 		if (vnode->ref_count == 0) {
7511 			// this vnode has been unused before
7512 			vnode_used(vnode);
7513 		}
7514 		inc_vnode_ref_count(vnode);
7515 
7516 		locker.Unlock();
7517 
7518 		if (vnode->cache != NULL && !vnode->IsRemoved())
7519 			vnode->cache->WriteModified();
7520 
7521 		put_vnode(vnode);
7522 	}
7523 
7524 	// And then, let the file systems do their synchronizing work
7525 
7526 	if (HAS_FS_MOUNT_CALL(mount, sync))
7527 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7528 
7529 	put_mount(mount);
7530 	return status;
7531 }
7532 
7533 
7534 static status_t
7535 fs_read_info(dev_t device, struct fs_info* info)
7536 {
7537 	struct fs_mount* mount;
7538 	status_t status = get_mount(device, &mount);
7539 	if (status != B_OK)
7540 		return status;
7541 
7542 	memset(info, 0, sizeof(struct fs_info));
7543 
7544 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7545 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7546 
7547 	// fill in info the file system doesn't (have to) know about
7548 	if (status == B_OK) {
7549 		info->dev = mount->id;
7550 		info->root = mount->root_vnode->id;
7551 
7552 		fs_volume* volume = mount->volume;
7553 		while (volume->super_volume != NULL)
7554 			volume = volume->super_volume;
7555 
7556 		strlcpy(info->fsh_name, volume->file_system_name,
7557 			sizeof(info->fsh_name));
7558 		if (mount->device_name != NULL) {
7559 			strlcpy(info->device_name, mount->device_name,
7560 				sizeof(info->device_name));
7561 		}
7562 	}
7563 
7564 	// if the call is not supported by the file system, there are still
7565 	// the parts that we filled out ourselves
7566 
7567 	put_mount(mount);
7568 	return status;
7569 }
7570 
7571 
7572 static status_t
7573 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7574 {
7575 	struct fs_mount* mount;
7576 	status_t status = get_mount(device, &mount);
7577 	if (status != B_OK)
7578 		return status;
7579 
7580 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7581 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7582 	else
7583 		status = B_READ_ONLY_DEVICE;
7584 
7585 	put_mount(mount);
7586 	return status;
7587 }
7588 
7589 
7590 static dev_t
7591 fs_next_device(int32* _cookie)
7592 {
7593 	struct fs_mount* mount = NULL;
7594 	dev_t device = *_cookie;
7595 
7596 	mutex_lock(&sMountMutex);
7597 
7598 	// Since device IDs are assigned sequentially, this algorithm
7599 	// does work good enough. It makes sure that the device list
7600 	// returned is sorted, and that no device is skipped when an
7601 	// already visited device got unmounted.
7602 
7603 	while (device < sNextMountID) {
7604 		mount = find_mount(device++);
7605 		if (mount != NULL && mount->volume->private_volume != NULL)
7606 			break;
7607 	}
7608 
7609 	*_cookie = device;
7610 
7611 	if (mount != NULL)
7612 		device = mount->id;
7613 	else
7614 		device = B_BAD_VALUE;
7615 
7616 	mutex_unlock(&sMountMutex);
7617 
7618 	return device;
7619 }
7620 
7621 
7622 ssize_t
7623 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7624 	void *buffer, size_t readBytes)
7625 {
7626 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7627 	if (attrFD < 0)
7628 		return attrFD;
7629 
7630 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7631 
7632 	_kern_close(attrFD);
7633 
7634 	return bytesRead;
7635 }
7636 
7637 
7638 static status_t
7639 get_cwd(char* buffer, size_t size, bool kernel)
7640 {
7641 	// Get current working directory from io context
7642 	struct io_context* context = get_current_io_context(kernel);
7643 	status_t status;
7644 
7645 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7646 
7647 	mutex_lock(&context->io_mutex);
7648 
7649 	struct vnode* vnode = context->cwd;
7650 	if (vnode)
7651 		inc_vnode_ref_count(vnode);
7652 
7653 	mutex_unlock(&context->io_mutex);
7654 
7655 	if (vnode) {
7656 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7657 		put_vnode(vnode);
7658 	} else
7659 		status = B_ERROR;
7660 
7661 	return status;
7662 }
7663 
7664 
7665 static status_t
7666 set_cwd(int fd, char* path, bool kernel)
7667 {
7668 	struct io_context* context;
7669 	struct vnode* vnode = NULL;
7670 	struct vnode* oldDirectory;
7671 	status_t status;
7672 
7673 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7674 
7675 	// Get vnode for passed path, and bail if it failed
7676 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7677 	if (status < 0)
7678 		return status;
7679 
7680 	if (!S_ISDIR(vnode->Type())) {
7681 		// nope, can't cwd to here
7682 		status = B_NOT_A_DIRECTORY;
7683 		goto err;
7684 	}
7685 
7686 	// Get current io context and lock
7687 	context = get_current_io_context(kernel);
7688 	mutex_lock(&context->io_mutex);
7689 
7690 	// save the old current working directory first
7691 	oldDirectory = context->cwd;
7692 	context->cwd = vnode;
7693 
7694 	mutex_unlock(&context->io_mutex);
7695 
7696 	if (oldDirectory)
7697 		put_vnode(oldDirectory);
7698 
7699 	return B_NO_ERROR;
7700 
7701 err:
7702 	put_vnode(vnode);
7703 	return status;
7704 }
7705 
7706 
7707 //	#pragma mark - kernel mirrored syscalls
7708 
7709 
7710 dev_t
7711 _kern_mount(const char* path, const char* device, const char* fsName,
7712 	uint32 flags, const char* args, size_t argsLength)
7713 {
7714 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7715 	if (pathBuffer.InitCheck() != B_OK)
7716 		return B_NO_MEMORY;
7717 
7718 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7719 }
7720 
7721 
7722 status_t
7723 _kern_unmount(const char* path, uint32 flags)
7724 {
7725 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7726 	if (pathBuffer.InitCheck() != B_OK)
7727 		return B_NO_MEMORY;
7728 
7729 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7730 }
7731 
7732 
7733 status_t
7734 _kern_read_fs_info(dev_t device, struct fs_info* info)
7735 {
7736 	if (info == NULL)
7737 		return B_BAD_VALUE;
7738 
7739 	return fs_read_info(device, info);
7740 }
7741 
7742 
7743 status_t
7744 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7745 {
7746 	if (info == NULL)
7747 		return B_BAD_VALUE;
7748 
7749 	return fs_write_info(device, info, mask);
7750 }
7751 
7752 
7753 status_t
7754 _kern_sync(void)
7755 {
7756 	// Note: _kern_sync() is also called from _user_sync()
7757 	int32 cookie = 0;
7758 	dev_t device;
7759 	while ((device = next_dev(&cookie)) >= 0) {
7760 		status_t status = fs_sync(device);
7761 		if (status != B_OK && status != B_BAD_VALUE) {
7762 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7763 				strerror(status));
7764 		}
7765 	}
7766 
7767 	return B_OK;
7768 }
7769 
7770 
7771 dev_t
7772 _kern_next_device(int32* _cookie)
7773 {
7774 	return fs_next_device(_cookie);
7775 }
7776 
7777 
7778 status_t
7779 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7780 	size_t infoSize)
7781 {
7782 	if (infoSize != sizeof(fd_info))
7783 		return B_BAD_VALUE;
7784 
7785 	// get the team
7786 	Team* team = Team::Get(teamID);
7787 	if (team == NULL)
7788 		return B_BAD_TEAM_ID;
7789 	BReference<Team> teamReference(team, true);
7790 
7791 	// now that we have a team reference, its I/O context won't go away
7792 	io_context* context = team->io_context;
7793 	MutexLocker contextLocker(context->io_mutex);
7794 
7795 	uint32 slot = *_cookie;
7796 
7797 	struct file_descriptor* descriptor;
7798 	while (slot < context->table_size
7799 		&& (descriptor = context->fds[slot]) == NULL) {
7800 		slot++;
7801 	}
7802 
7803 	if (slot >= context->table_size)
7804 		return B_ENTRY_NOT_FOUND;
7805 
7806 	info->number = slot;
7807 	info->open_mode = descriptor->open_mode;
7808 
7809 	struct vnode* vnode = fd_vnode(descriptor);
7810 	if (vnode != NULL) {
7811 		info->device = vnode->device;
7812 		info->node = vnode->id;
7813 	} else if (descriptor->u.mount != NULL) {
7814 		info->device = descriptor->u.mount->id;
7815 		info->node = -1;
7816 	}
7817 
7818 	*_cookie = slot + 1;
7819 	return B_OK;
7820 }
7821 
7822 
7823 int
7824 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7825 	int perms)
7826 {
7827 	if ((openMode & O_CREAT) != 0) {
7828 		return file_create_entry_ref(device, inode, name, openMode, perms,
7829 			true);
7830 	}
7831 
7832 	return file_open_entry_ref(device, inode, name, openMode, true);
7833 }
7834 
7835 
7836 /*!	\brief Opens a node specified by a FD + path pair.
7837 
7838 	At least one of \a fd and \a path must be specified.
7839 	If only \a fd is given, the function opens the node identified by this
7840 	FD. If only a path is given, this path is opened. If both are given and
7841 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7842 	of the directory (!) identified by \a fd.
7843 
7844 	\param fd The FD. May be < 0.
7845 	\param path The absolute or relative path. May be \c NULL.
7846 	\param openMode The open mode.
7847 	\return A FD referring to the newly opened node, or an error code,
7848 			if an error occurs.
7849 */
7850 int
7851 _kern_open(int fd, const char* path, int openMode, int perms)
7852 {
7853 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7854 	if (pathBuffer.InitCheck() != B_OK)
7855 		return B_NO_MEMORY;
7856 
7857 	if (openMode & O_CREAT)
7858 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7859 
7860 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7861 }
7862 
7863 
7864 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7865 
7866 	The supplied name may be \c NULL, in which case directory identified
7867 	by \a device and \a inode will be opened. Otherwise \a device and
7868 	\a inode identify the parent directory of the directory to be opened
7869 	and \a name its entry name.
7870 
7871 	\param device If \a name is specified the ID of the device the parent
7872 		   directory of the directory to be opened resides on, otherwise
7873 		   the device of the directory itself.
7874 	\param inode If \a name is specified the node ID of the parent
7875 		   directory of the directory to be opened, otherwise node ID of the
7876 		   directory itself.
7877 	\param name The entry name of the directory to be opened. If \c NULL,
7878 		   the \a device + \a inode pair identify the node to be opened.
7879 	\return The FD of the newly opened directory or an error code, if
7880 			something went wrong.
7881 */
7882 int
7883 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7884 {
7885 	return dir_open_entry_ref(device, inode, name, true);
7886 }
7887 
7888 
7889 /*!	\brief Opens a directory specified by a FD + path pair.
7890 
7891 	At least one of \a fd and \a path must be specified.
7892 	If only \a fd is given, the function opens the directory identified by this
7893 	FD. If only a path is given, this path is opened. If both are given and
7894 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7895 	of the directory (!) identified by \a fd.
7896 
7897 	\param fd The FD. May be < 0.
7898 	\param path The absolute or relative path. May be \c NULL.
7899 	\return A FD referring to the newly opened directory, or an error code,
7900 			if an error occurs.
7901 */
7902 int
7903 _kern_open_dir(int fd, const char* path)
7904 {
7905 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7906 	if (pathBuffer.InitCheck() != B_OK)
7907 		return B_NO_MEMORY;
7908 
7909 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7910 }
7911 
7912 
7913 status_t
7914 _kern_fcntl(int fd, int op, uint32 argument)
7915 {
7916 	return common_fcntl(fd, op, argument, true);
7917 }
7918 
7919 
7920 status_t
7921 _kern_fsync(int fd)
7922 {
7923 	return common_sync(fd, true);
7924 }
7925 
7926 
7927 status_t
7928 _kern_lock_node(int fd)
7929 {
7930 	return common_lock_node(fd, true);
7931 }
7932 
7933 
7934 status_t
7935 _kern_unlock_node(int fd)
7936 {
7937 	return common_unlock_node(fd, true);
7938 }
7939 
7940 
7941 status_t
7942 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
7943 	int perms)
7944 {
7945 	return dir_create_entry_ref(device, inode, name, perms, true);
7946 }
7947 
7948 
7949 /*!	\brief Creates a directory specified by a FD + path pair.
7950 
7951 	\a path must always be specified (it contains the name of the new directory
7952 	at least). If only a path is given, this path identifies the location at
7953 	which the directory shall be created. If both \a fd and \a path are given
7954 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
7955 	of the directory (!) identified by \a fd.
7956 
7957 	\param fd The FD. May be < 0.
7958 	\param path The absolute or relative path. Must not be \c NULL.
7959 	\param perms The access permissions the new directory shall have.
7960 	\return \c B_OK, if the directory has been created successfully, another
7961 			error code otherwise.
7962 */
7963 status_t
7964 _kern_create_dir(int fd, const char* path, int perms)
7965 {
7966 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7967 	if (pathBuffer.InitCheck() != B_OK)
7968 		return B_NO_MEMORY;
7969 
7970 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
7971 }
7972 
7973 
7974 status_t
7975 _kern_remove_dir(int fd, const char* path)
7976 {
7977 	if (path) {
7978 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7979 		if (pathBuffer.InitCheck() != B_OK)
7980 			return B_NO_MEMORY;
7981 
7982 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
7983 	}
7984 
7985 	return dir_remove(fd, NULL, true);
7986 }
7987 
7988 
7989 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
7990 
7991 	At least one of \a fd and \a path must be specified.
7992 	If only \a fd is given, the function the symlink to be read is the node
7993 	identified by this FD. If only a path is given, this path identifies the
7994 	symlink to be read. If both are given and the path is absolute, \a fd is
7995 	ignored; a relative path is reckoned off of the directory (!) identified
7996 	by \a fd.
7997 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
7998 	will still be updated to reflect the required buffer size.
7999 
8000 	\param fd The FD. May be < 0.
8001 	\param path The absolute or relative path. May be \c NULL.
8002 	\param buffer The buffer into which the contents of the symlink shall be
8003 		   written.
8004 	\param _bufferSize A pointer to the size of the supplied buffer.
8005 	\return The length of the link on success or an appropriate error code
8006 */
8007 status_t
8008 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8009 {
8010 	if (path) {
8011 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8012 		if (pathBuffer.InitCheck() != B_OK)
8013 			return B_NO_MEMORY;
8014 
8015 		return common_read_link(fd, pathBuffer.LockBuffer(),
8016 			buffer, _bufferSize, true);
8017 	}
8018 
8019 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8020 }
8021 
8022 
8023 /*!	\brief Creates a symlink specified by a FD + path pair.
8024 
8025 	\a path must always be specified (it contains the name of the new symlink
8026 	at least). If only a path is given, this path identifies the location at
8027 	which the symlink shall be created. If both \a fd and \a path are given and
8028 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8029 	of the directory (!) identified by \a fd.
8030 
8031 	\param fd The FD. May be < 0.
8032 	\param toPath The absolute or relative path. Must not be \c NULL.
8033 	\param mode The access permissions the new symlink shall have.
8034 	\return \c B_OK, if the symlink has been created successfully, another
8035 			error code otherwise.
8036 */
8037 status_t
8038 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8039 {
8040 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8041 	if (pathBuffer.InitCheck() != B_OK)
8042 		return B_NO_MEMORY;
8043 
8044 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8045 		toPath, mode, true);
8046 }
8047 
8048 
8049 status_t
8050 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8051 	bool traverseLeafLink)
8052 {
8053 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8054 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8055 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8056 		return B_NO_MEMORY;
8057 
8058 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8059 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8060 }
8061 
8062 
8063 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8064 
8065 	\a path must always be specified (it contains at least the name of the entry
8066 	to be deleted). If only a path is given, this path identifies the entry
8067 	directly. If both \a fd and \a path are given and the path is absolute,
8068 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8069 	identified by \a fd.
8070 
8071 	\param fd The FD. May be < 0.
8072 	\param path The absolute or relative path. Must not be \c NULL.
8073 	\return \c B_OK, if the entry has been removed successfully, another
8074 			error code otherwise.
8075 */
8076 status_t
8077 _kern_unlink(int fd, const char* path)
8078 {
8079 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8080 	if (pathBuffer.InitCheck() != B_OK)
8081 		return B_NO_MEMORY;
8082 
8083 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8084 }
8085 
8086 
8087 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8088 		   by another FD + path pair.
8089 
8090 	\a oldPath and \a newPath must always be specified (they contain at least
8091 	the name of the entry). If only a path is given, this path identifies the
8092 	entry directly. If both a FD and a path are given and the path is absolute,
8093 	the FD is ignored; a relative path is reckoned off of the directory (!)
8094 	identified by the respective FD.
8095 
8096 	\param oldFD The FD of the old location. May be < 0.
8097 	\param oldPath The absolute or relative path of the old location. Must not
8098 		   be \c NULL.
8099 	\param newFD The FD of the new location. May be < 0.
8100 	\param newPath The absolute or relative path of the new location. Must not
8101 		   be \c NULL.
8102 	\return \c B_OK, if the entry has been moved successfully, another
8103 			error code otherwise.
8104 */
8105 status_t
8106 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8107 {
8108 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8109 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8110 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8111 		return B_NO_MEMORY;
8112 
8113 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8114 		newFD, newPathBuffer.LockBuffer(), true);
8115 }
8116 
8117 
8118 status_t
8119 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8120 {
8121 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8122 	if (pathBuffer.InitCheck() != B_OK)
8123 		return B_NO_MEMORY;
8124 
8125 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8126 		true);
8127 }
8128 
8129 
8130 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8131 
8132 	If only \a fd is given, the stat operation associated with the type
8133 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8134 	given, this path identifies the entry for whose node to retrieve the
8135 	stat data. If both \a fd and \a path are given and the path is absolute,
8136 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8137 	identified by \a fd and specifies the entry whose stat data shall be
8138 	retrieved.
8139 
8140 	\param fd The FD. May be < 0.
8141 	\param path The absolute or relative path. Must not be \c NULL.
8142 	\param traverseLeafLink If \a path is given, \c true specifies that the
8143 		   function shall not stick to symlinks, but traverse them.
8144 	\param stat The buffer the stat data shall be written into.
8145 	\param statSize The size of the supplied stat buffer.
8146 	\return \c B_OK, if the the stat data have been read successfully, another
8147 			error code otherwise.
8148 */
8149 status_t
8150 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8151 	struct stat* stat, size_t statSize)
8152 {
8153 	struct stat completeStat;
8154 	struct stat* originalStat = NULL;
8155 	status_t status;
8156 
8157 	if (statSize > sizeof(struct stat))
8158 		return B_BAD_VALUE;
8159 
8160 	// this supports different stat extensions
8161 	if (statSize < sizeof(struct stat)) {
8162 		originalStat = stat;
8163 		stat = &completeStat;
8164 	}
8165 
8166 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8167 
8168 	if (status == B_OK && originalStat != NULL)
8169 		memcpy(originalStat, stat, statSize);
8170 
8171 	return status;
8172 }
8173 
8174 
8175 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8176 
8177 	If only \a fd is given, the stat operation associated with the type
8178 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8179 	given, this path identifies the entry for whose node to write the
8180 	stat data. If both \a fd and \a path are given and the path is absolute,
8181 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8182 	identified by \a fd and specifies the entry whose stat data shall be
8183 	written.
8184 
8185 	\param fd The FD. May be < 0.
8186 	\param path The absolute or relative path. Must not be \c NULL.
8187 	\param traverseLeafLink If \a path is given, \c true specifies that the
8188 		   function shall not stick to symlinks, but traverse them.
8189 	\param stat The buffer containing the stat data to be written.
8190 	\param statSize The size of the supplied stat buffer.
8191 	\param statMask A mask specifying which parts of the stat data shall be
8192 		   written.
8193 	\return \c B_OK, if the the stat data have been written successfully,
8194 			another error code otherwise.
8195 */
8196 status_t
8197 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8198 	const struct stat* stat, size_t statSize, int statMask)
8199 {
8200 	struct stat completeStat;
8201 
8202 	if (statSize > sizeof(struct stat))
8203 		return B_BAD_VALUE;
8204 
8205 	// this supports different stat extensions
8206 	if (statSize < sizeof(struct stat)) {
8207 		memset((uint8*)&completeStat + statSize, 0,
8208 			sizeof(struct stat) - statSize);
8209 		memcpy(&completeStat, stat, statSize);
8210 		stat = &completeStat;
8211 	}
8212 
8213 	status_t status;
8214 
8215 	if (path) {
8216 		// path given: write the stat of the node referred to by (fd, path)
8217 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8218 		if (pathBuffer.InitCheck() != B_OK)
8219 			return B_NO_MEMORY;
8220 
8221 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8222 			traverseLeafLink, stat, statMask, true);
8223 	} else {
8224 		// no path given: get the FD and use the FD operation
8225 		struct file_descriptor* descriptor
8226 			= get_fd(get_current_io_context(true), fd);
8227 		if (descriptor == NULL)
8228 			return B_FILE_ERROR;
8229 
8230 		if (descriptor->ops->fd_write_stat)
8231 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8232 		else
8233 			status = B_UNSUPPORTED;
8234 
8235 		put_fd(descriptor);
8236 	}
8237 
8238 	return status;
8239 }
8240 
8241 
8242 int
8243 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8244 {
8245 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8246 	if (pathBuffer.InitCheck() != B_OK)
8247 		return B_NO_MEMORY;
8248 
8249 	if (path != NULL)
8250 		pathBuffer.SetTo(path);
8251 
8252 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL,
8253 		traverseLeafLink, true);
8254 }
8255 
8256 
8257 int
8258 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8259 	int openMode)
8260 {
8261 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8262 	if (pathBuffer.InitCheck() != B_OK)
8263 		return B_NO_MEMORY;
8264 
8265 	if ((openMode & O_CREAT) != 0) {
8266 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8267 			true);
8268 	}
8269 
8270 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8271 }
8272 
8273 
8274 status_t
8275 _kern_remove_attr(int fd, const char* name)
8276 {
8277 	return attr_remove(fd, name, true);
8278 }
8279 
8280 
8281 status_t
8282 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8283 	const char* toName)
8284 {
8285 	return attr_rename(fromFile, fromName, toFile, toName, true);
8286 }
8287 
8288 
8289 int
8290 _kern_open_index_dir(dev_t device)
8291 {
8292 	return index_dir_open(device, true);
8293 }
8294 
8295 
8296 status_t
8297 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8298 {
8299 	return index_create(device, name, type, flags, true);
8300 }
8301 
8302 
8303 status_t
8304 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8305 {
8306 	return index_name_read_stat(device, name, stat, true);
8307 }
8308 
8309 
8310 status_t
8311 _kern_remove_index(dev_t device, const char* name)
8312 {
8313 	return index_remove(device, name, true);
8314 }
8315 
8316 
8317 status_t
8318 _kern_getcwd(char* buffer, size_t size)
8319 {
8320 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8321 
8322 	// Call vfs to get current working directory
8323 	return get_cwd(buffer, size, true);
8324 }
8325 
8326 
8327 status_t
8328 _kern_setcwd(int fd, const char* path)
8329 {
8330 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8331 	if (pathBuffer.InitCheck() != B_OK)
8332 		return B_NO_MEMORY;
8333 
8334 	if (path != NULL)
8335 		pathBuffer.SetTo(path);
8336 
8337 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8338 }
8339 
8340 
8341 //	#pragma mark - userland syscalls
8342 
8343 
8344 dev_t
8345 _user_mount(const char* userPath, const char* userDevice,
8346 	const char* userFileSystem, uint32 flags, const char* userArgs,
8347 	size_t argsLength)
8348 {
8349 	char fileSystem[B_FILE_NAME_LENGTH];
8350 	KPath path, device;
8351 	char* args = NULL;
8352 	status_t status;
8353 
8354 	if (!IS_USER_ADDRESS(userPath)
8355 		|| !IS_USER_ADDRESS(userFileSystem)
8356 		|| !IS_USER_ADDRESS(userDevice))
8357 		return B_BAD_ADDRESS;
8358 
8359 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8360 		return B_NO_MEMORY;
8361 
8362 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8363 		return B_BAD_ADDRESS;
8364 
8365 	if (userFileSystem != NULL
8366 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8367 		return B_BAD_ADDRESS;
8368 
8369 	if (userDevice != NULL
8370 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8371 			< B_OK)
8372 		return B_BAD_ADDRESS;
8373 
8374 	if (userArgs != NULL && argsLength > 0) {
8375 		// this is a safety restriction
8376 		if (argsLength >= 65536)
8377 			return B_NAME_TOO_LONG;
8378 
8379 		args = (char*)malloc(argsLength + 1);
8380 		if (args == NULL)
8381 			return B_NO_MEMORY;
8382 
8383 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8384 			free(args);
8385 			return B_BAD_ADDRESS;
8386 		}
8387 	}
8388 	path.UnlockBuffer();
8389 	device.UnlockBuffer();
8390 
8391 	status = fs_mount(path.LockBuffer(),
8392 		userDevice != NULL ? device.Path() : NULL,
8393 		userFileSystem ? fileSystem : NULL, flags, args, false);
8394 
8395 	free(args);
8396 	return status;
8397 }
8398 
8399 
8400 status_t
8401 _user_unmount(const char* userPath, uint32 flags)
8402 {
8403 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8404 	if (pathBuffer.InitCheck() != B_OK)
8405 		return B_NO_MEMORY;
8406 
8407 	char* path = pathBuffer.LockBuffer();
8408 
8409 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8410 		return B_BAD_ADDRESS;
8411 
8412 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8413 }
8414 
8415 
8416 status_t
8417 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8418 {
8419 	struct fs_info info;
8420 	status_t status;
8421 
8422 	if (userInfo == NULL)
8423 		return B_BAD_VALUE;
8424 
8425 	if (!IS_USER_ADDRESS(userInfo))
8426 		return B_BAD_ADDRESS;
8427 
8428 	status = fs_read_info(device, &info);
8429 	if (status != B_OK)
8430 		return status;
8431 
8432 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8433 		return B_BAD_ADDRESS;
8434 
8435 	return B_OK;
8436 }
8437 
8438 
8439 status_t
8440 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8441 {
8442 	struct fs_info info;
8443 
8444 	if (userInfo == NULL)
8445 		return B_BAD_VALUE;
8446 
8447 	if (!IS_USER_ADDRESS(userInfo)
8448 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8449 		return B_BAD_ADDRESS;
8450 
8451 	return fs_write_info(device, &info, mask);
8452 }
8453 
8454 
8455 dev_t
8456 _user_next_device(int32* _userCookie)
8457 {
8458 	int32 cookie;
8459 	dev_t device;
8460 
8461 	if (!IS_USER_ADDRESS(_userCookie)
8462 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8463 		return B_BAD_ADDRESS;
8464 
8465 	device = fs_next_device(&cookie);
8466 
8467 	if (device >= B_OK) {
8468 		// update user cookie
8469 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8470 			return B_BAD_ADDRESS;
8471 	}
8472 
8473 	return device;
8474 }
8475 
8476 
8477 status_t
8478 _user_sync(void)
8479 {
8480 	return _kern_sync();
8481 }
8482 
8483 
8484 status_t
8485 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8486 	size_t infoSize)
8487 {
8488 	struct fd_info info;
8489 	uint32 cookie;
8490 
8491 	// only root can do this (or should root's group be enough?)
8492 	if (geteuid() != 0)
8493 		return B_NOT_ALLOWED;
8494 
8495 	if (infoSize != sizeof(fd_info))
8496 		return B_BAD_VALUE;
8497 
8498 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8499 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8500 		return B_BAD_ADDRESS;
8501 
8502 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8503 	if (status != B_OK)
8504 		return status;
8505 
8506 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8507 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8508 		return B_BAD_ADDRESS;
8509 
8510 	return status;
8511 }
8512 
8513 
8514 status_t
8515 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8516 	char* userPath, size_t pathLength)
8517 {
8518 	if (!IS_USER_ADDRESS(userPath))
8519 		return B_BAD_ADDRESS;
8520 
8521 	KPath path(B_PATH_NAME_LENGTH + 1);
8522 	if (path.InitCheck() != B_OK)
8523 		return B_NO_MEMORY;
8524 
8525 	// copy the leaf name onto the stack
8526 	char stackLeaf[B_FILE_NAME_LENGTH];
8527 	if (leaf) {
8528 		if (!IS_USER_ADDRESS(leaf))
8529 			return B_BAD_ADDRESS;
8530 
8531 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8532 		if (length < 0)
8533 			return length;
8534 		if (length >= B_FILE_NAME_LENGTH)
8535 			return B_NAME_TOO_LONG;
8536 
8537 		leaf = stackLeaf;
8538 	}
8539 
8540 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8541 		path.LockBuffer(), path.BufferSize());
8542 	if (status != B_OK)
8543 		return status;
8544 
8545 	path.UnlockBuffer();
8546 
8547 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8548 	if (length < 0)
8549 		return length;
8550 	if (length >= (int)pathLength)
8551 		return B_BUFFER_OVERFLOW;
8552 
8553 	return B_OK;
8554 }
8555 
8556 
8557 status_t
8558 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8559 {
8560 	if (userPath == NULL || buffer == NULL)
8561 		return B_BAD_VALUE;
8562 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8563 		return B_BAD_ADDRESS;
8564 
8565 	// copy path from userland
8566 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8567 	if (pathBuffer.InitCheck() != B_OK)
8568 		return B_NO_MEMORY;
8569 	char* path = pathBuffer.LockBuffer();
8570 
8571 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8572 		return B_BAD_ADDRESS;
8573 
8574 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8575 		false);
8576 	if (error != B_OK)
8577 		return error;
8578 
8579 	// copy back to userland
8580 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8581 	if (len < 0)
8582 		return len;
8583 	if (len >= B_PATH_NAME_LENGTH)
8584 		return B_BUFFER_OVERFLOW;
8585 
8586 	return B_OK;
8587 }
8588 
8589 
8590 int
8591 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8592 	int openMode, int perms)
8593 {
8594 	char name[B_FILE_NAME_LENGTH];
8595 
8596 	if (userName == NULL || device < 0 || inode < 0)
8597 		return B_BAD_VALUE;
8598 	if (!IS_USER_ADDRESS(userName)
8599 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8600 		return B_BAD_ADDRESS;
8601 
8602 	if ((openMode & O_CREAT) != 0) {
8603 		return file_create_entry_ref(device, inode, name, openMode, perms,
8604 		 false);
8605 	}
8606 
8607 	return file_open_entry_ref(device, inode, name, openMode, false);
8608 }
8609 
8610 
8611 int
8612 _user_open(int fd, const char* userPath, int openMode, int perms)
8613 {
8614 	KPath path(B_PATH_NAME_LENGTH + 1);
8615 	if (path.InitCheck() != B_OK)
8616 		return B_NO_MEMORY;
8617 
8618 	char* buffer = path.LockBuffer();
8619 
8620 	if (!IS_USER_ADDRESS(userPath)
8621 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8622 		return B_BAD_ADDRESS;
8623 
8624 	if ((openMode & O_CREAT) != 0)
8625 		return file_create(fd, buffer, openMode, perms, false);
8626 
8627 	return file_open(fd, buffer, openMode, false);
8628 }
8629 
8630 
8631 int
8632 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8633 {
8634 	if (userName != NULL) {
8635 		char name[B_FILE_NAME_LENGTH];
8636 
8637 		if (!IS_USER_ADDRESS(userName)
8638 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8639 			return B_BAD_ADDRESS;
8640 
8641 		return dir_open_entry_ref(device, inode, name, false);
8642 	}
8643 	return dir_open_entry_ref(device, inode, NULL, false);
8644 }
8645 
8646 
8647 int
8648 _user_open_dir(int fd, const char* userPath)
8649 {
8650 	if (userPath == NULL)
8651 		return dir_open(fd, NULL, false);
8652 
8653 	KPath path(B_PATH_NAME_LENGTH + 1);
8654 	if (path.InitCheck() != B_OK)
8655 		return B_NO_MEMORY;
8656 
8657 	char* buffer = path.LockBuffer();
8658 
8659 	if (!IS_USER_ADDRESS(userPath)
8660 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8661 		return B_BAD_ADDRESS;
8662 
8663 	return dir_open(fd, buffer, false);
8664 }
8665 
8666 
8667 /*!	\brief Opens a directory's parent directory and returns the entry name
8668 		   of the former.
8669 
8670 	Aside from that is returns the directory's entry name, this method is
8671 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8672 	equivalent, if \a userName is \c NULL.
8673 
8674 	If a name buffer is supplied and the name does not fit the buffer, the
8675 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8676 
8677 	\param fd A FD referring to a directory.
8678 	\param userName Buffer the directory's entry name shall be written into.
8679 		   May be \c NULL.
8680 	\param nameLength Size of the name buffer.
8681 	\return The file descriptor of the opened parent directory, if everything
8682 			went fine, an error code otherwise.
8683 */
8684 int
8685 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8686 {
8687 	bool kernel = false;
8688 
8689 	if (userName && !IS_USER_ADDRESS(userName))
8690 		return B_BAD_ADDRESS;
8691 
8692 	// open the parent dir
8693 	int parentFD = dir_open(fd, (char*)"..", kernel);
8694 	if (parentFD < 0)
8695 		return parentFD;
8696 	FDCloser fdCloser(parentFD, kernel);
8697 
8698 	if (userName) {
8699 		// get the vnodes
8700 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8701 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8702 		VNodePutter parentVNodePutter(parentVNode);
8703 		VNodePutter dirVNodePutter(dirVNode);
8704 		if (!parentVNode || !dirVNode)
8705 			return B_FILE_ERROR;
8706 
8707 		// get the vnode name
8708 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8709 		struct dirent* buffer = (struct dirent*)_buffer;
8710 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8711 			sizeof(_buffer), get_current_io_context(false));
8712 		if (status != B_OK)
8713 			return status;
8714 
8715 		// copy the name to the userland buffer
8716 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8717 		if (len < 0)
8718 			return len;
8719 		if (len >= (int)nameLength)
8720 			return B_BUFFER_OVERFLOW;
8721 	}
8722 
8723 	return fdCloser.Detach();
8724 }
8725 
8726 
8727 status_t
8728 _user_fcntl(int fd, int op, uint32 argument)
8729 {
8730 	status_t status = common_fcntl(fd, op, argument, false);
8731 	if (op == F_SETLKW)
8732 		syscall_restart_handle_post(status);
8733 
8734 	return status;
8735 }
8736 
8737 
8738 status_t
8739 _user_fsync(int fd)
8740 {
8741 	return common_sync(fd, false);
8742 }
8743 
8744 
8745 status_t
8746 _user_flock(int fd, int operation)
8747 {
8748 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8749 
8750 	// Check if the operation is valid
8751 	switch (operation & ~LOCK_NB) {
8752 		case LOCK_UN:
8753 		case LOCK_SH:
8754 		case LOCK_EX:
8755 			break;
8756 
8757 		default:
8758 			return B_BAD_VALUE;
8759 	}
8760 
8761 	struct file_descriptor* descriptor;
8762 	struct vnode* vnode;
8763 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8764 	if (descriptor == NULL)
8765 		return B_FILE_ERROR;
8766 
8767 	if (descriptor->type != FDTYPE_FILE) {
8768 		put_fd(descriptor);
8769 		return B_BAD_VALUE;
8770 	}
8771 
8772 	struct flock flock;
8773 	flock.l_start = 0;
8774 	flock.l_len = OFF_MAX;
8775 	flock.l_whence = 0;
8776 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8777 
8778 	status_t status;
8779 	if ((operation & LOCK_UN) != 0)
8780 		status = release_advisory_lock(vnode, &flock);
8781 	else {
8782 		status = acquire_advisory_lock(vnode,
8783 			thread_get_current_thread()->team->session_id, &flock,
8784 			(operation & LOCK_NB) == 0);
8785 	}
8786 
8787 	syscall_restart_handle_post(status);
8788 
8789 	put_fd(descriptor);
8790 	return status;
8791 }
8792 
8793 
8794 status_t
8795 _user_lock_node(int fd)
8796 {
8797 	return common_lock_node(fd, false);
8798 }
8799 
8800 
8801 status_t
8802 _user_unlock_node(int fd)
8803 {
8804 	return common_unlock_node(fd, false);
8805 }
8806 
8807 
8808 status_t
8809 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8810 	int perms)
8811 {
8812 	char name[B_FILE_NAME_LENGTH];
8813 	status_t status;
8814 
8815 	if (!IS_USER_ADDRESS(userName))
8816 		return B_BAD_ADDRESS;
8817 
8818 	status = user_strlcpy(name, userName, sizeof(name));
8819 	if (status < 0)
8820 		return status;
8821 
8822 	return dir_create_entry_ref(device, inode, name, perms, false);
8823 }
8824 
8825 
8826 status_t
8827 _user_create_dir(int fd, const char* userPath, int perms)
8828 {
8829 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8830 	if (pathBuffer.InitCheck() != B_OK)
8831 		return B_NO_MEMORY;
8832 
8833 	char* path = pathBuffer.LockBuffer();
8834 
8835 	if (!IS_USER_ADDRESS(userPath)
8836 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8837 		return B_BAD_ADDRESS;
8838 
8839 	return dir_create(fd, path, perms, false);
8840 }
8841 
8842 
8843 status_t
8844 _user_remove_dir(int fd, const char* userPath)
8845 {
8846 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8847 	if (pathBuffer.InitCheck() != B_OK)
8848 		return B_NO_MEMORY;
8849 
8850 	char* path = pathBuffer.LockBuffer();
8851 
8852 	if (userPath != NULL) {
8853 		if (!IS_USER_ADDRESS(userPath)
8854 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8855 			return B_BAD_ADDRESS;
8856 	}
8857 
8858 	return dir_remove(fd, userPath ? path : NULL, false);
8859 }
8860 
8861 
8862 status_t
8863 _user_read_link(int fd, const char* userPath, char* userBuffer,
8864 	size_t* userBufferSize)
8865 {
8866 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8867 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8868 		return B_NO_MEMORY;
8869 
8870 	size_t bufferSize;
8871 
8872 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8873 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
8874 		return B_BAD_ADDRESS;
8875 
8876 	char* path = pathBuffer.LockBuffer();
8877 	char* buffer = linkBuffer.LockBuffer();
8878 
8879 	if (userPath) {
8880 		if (!IS_USER_ADDRESS(userPath)
8881 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8882 			return B_BAD_ADDRESS;
8883 
8884 		if (bufferSize > B_PATH_NAME_LENGTH)
8885 			bufferSize = B_PATH_NAME_LENGTH;
8886 	}
8887 
8888 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8889 		&bufferSize, false);
8890 
8891 	// we also update the bufferSize in case of errors
8892 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8893 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
8894 		return B_BAD_ADDRESS;
8895 
8896 	if (status != B_OK)
8897 		return status;
8898 
8899 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8900 		return B_BAD_ADDRESS;
8901 
8902 	return B_OK;
8903 }
8904 
8905 
8906 status_t
8907 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8908 	int mode)
8909 {
8910 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8911 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8912 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8913 		return B_NO_MEMORY;
8914 
8915 	char* path = pathBuffer.LockBuffer();
8916 	char* toPath = toPathBuffer.LockBuffer();
8917 
8918 	if (!IS_USER_ADDRESS(userPath)
8919 		|| !IS_USER_ADDRESS(userToPath)
8920 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8921 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8922 		return B_BAD_ADDRESS;
8923 
8924 	return common_create_symlink(fd, path, toPath, mode, false);
8925 }
8926 
8927 
8928 status_t
8929 _user_create_link(int pathFD, const char* userPath, int toFD,
8930 	const char* userToPath, bool traverseLeafLink)
8931 {
8932 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8933 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8934 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8935 		return B_NO_MEMORY;
8936 
8937 	char* path = pathBuffer.LockBuffer();
8938 	char* toPath = toPathBuffer.LockBuffer();
8939 
8940 	if (!IS_USER_ADDRESS(userPath)
8941 		|| !IS_USER_ADDRESS(userToPath)
8942 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8943 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8944 		return B_BAD_ADDRESS;
8945 
8946 	status_t status = check_path(toPath);
8947 	if (status != B_OK)
8948 		return status;
8949 
8950 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
8951 		false);
8952 }
8953 
8954 
8955 status_t
8956 _user_unlink(int fd, const char* userPath)
8957 {
8958 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8959 	if (pathBuffer.InitCheck() != B_OK)
8960 		return B_NO_MEMORY;
8961 
8962 	char* path = pathBuffer.LockBuffer();
8963 
8964 	if (!IS_USER_ADDRESS(userPath)
8965 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8966 		return B_BAD_ADDRESS;
8967 
8968 	return common_unlink(fd, path, false);
8969 }
8970 
8971 
8972 status_t
8973 _user_rename(int oldFD, const char* userOldPath, int newFD,
8974 	const char* userNewPath)
8975 {
8976 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
8977 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
8978 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8979 		return B_NO_MEMORY;
8980 
8981 	char* oldPath = oldPathBuffer.LockBuffer();
8982 	char* newPath = newPathBuffer.LockBuffer();
8983 
8984 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
8985 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
8986 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
8987 		return B_BAD_ADDRESS;
8988 
8989 	return common_rename(oldFD, oldPath, newFD, newPath, false);
8990 }
8991 
8992 
8993 status_t
8994 _user_create_fifo(int fd, const char* userPath, mode_t perms)
8995 {
8996 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8997 	if (pathBuffer.InitCheck() != B_OK)
8998 		return B_NO_MEMORY;
8999 
9000 	char* path = pathBuffer.LockBuffer();
9001 
9002 	if (!IS_USER_ADDRESS(userPath)
9003 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9004 		return B_BAD_ADDRESS;
9005 	}
9006 
9007 	// split into directory vnode and filename path
9008 	char filename[B_FILE_NAME_LENGTH];
9009 	struct vnode* dir;
9010 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9011 	if (status != B_OK)
9012 		return status;
9013 
9014 	VNodePutter _(dir);
9015 
9016 	// the underlying FS needs to support creating FIFOs
9017 	if (!HAS_FS_CALL(dir, create_special_node))
9018 		return B_UNSUPPORTED;
9019 
9020 	// create the entry	-- the FIFO sub node is set up automatically
9021 	fs_vnode superVnode;
9022 	ino_t nodeID;
9023 	status = FS_CALL(dir, create_special_node, filename, NULL,
9024 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9025 
9026 	// create_special_node() acquired a reference for us that we don't need.
9027 	if (status == B_OK)
9028 		put_vnode(dir->mount->volume, nodeID);
9029 
9030 	return status;
9031 }
9032 
9033 
9034 status_t
9035 _user_create_pipe(int* userFDs)
9036 {
9037 	// rootfs should support creating FIFOs, but let's be sure
9038 	if (!HAS_FS_CALL(sRoot, create_special_node))
9039 		return B_UNSUPPORTED;
9040 
9041 	// create the node	-- the FIFO sub node is set up automatically
9042 	fs_vnode superVnode;
9043 	ino_t nodeID;
9044 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9045 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9046 	if (status != B_OK)
9047 		return status;
9048 
9049 	// We've got one reference to the node and need another one.
9050 	struct vnode* vnode;
9051 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9052 	if (status != B_OK) {
9053 		// that should not happen
9054 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9055 			sRoot->mount->id, sRoot->id);
9056 		return status;
9057 	}
9058 
9059 	// Everything looks good so far. Open two FDs for reading respectively
9060 	// writing.
9061 	int fds[2];
9062 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9063 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9064 
9065 	FDCloser closer0(fds[0], false);
9066 	FDCloser closer1(fds[1], false);
9067 
9068 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9069 
9070 	// copy FDs to userland
9071 	if (status == B_OK) {
9072 		if (!IS_USER_ADDRESS(userFDs)
9073 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9074 			status = B_BAD_ADDRESS;
9075 		}
9076 	}
9077 
9078 	// keep FDs, if everything went fine
9079 	if (status == B_OK) {
9080 		closer0.Detach();
9081 		closer1.Detach();
9082 	}
9083 
9084 	return status;
9085 }
9086 
9087 
9088 status_t
9089 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9090 {
9091 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9092 	if (pathBuffer.InitCheck() != B_OK)
9093 		return B_NO_MEMORY;
9094 
9095 	char* path = pathBuffer.LockBuffer();
9096 
9097 	if (!IS_USER_ADDRESS(userPath)
9098 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9099 		return B_BAD_ADDRESS;
9100 
9101 	return common_access(fd, path, mode, effectiveUserGroup, false);
9102 }
9103 
9104 
9105 status_t
9106 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9107 	struct stat* userStat, size_t statSize)
9108 {
9109 	struct stat stat;
9110 	status_t status;
9111 
9112 	if (statSize > sizeof(struct stat))
9113 		return B_BAD_VALUE;
9114 
9115 	if (!IS_USER_ADDRESS(userStat))
9116 		return B_BAD_ADDRESS;
9117 
9118 	if (userPath) {
9119 		// path given: get the stat of the node referred to by (fd, path)
9120 		if (!IS_USER_ADDRESS(userPath))
9121 			return B_BAD_ADDRESS;
9122 
9123 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9124 		if (pathBuffer.InitCheck() != B_OK)
9125 			return B_NO_MEMORY;
9126 
9127 		char* path = pathBuffer.LockBuffer();
9128 
9129 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9130 		if (length < B_OK)
9131 			return length;
9132 		if (length >= B_PATH_NAME_LENGTH)
9133 			return B_NAME_TOO_LONG;
9134 
9135 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9136 	} else {
9137 		// no path given: get the FD and use the FD operation
9138 		struct file_descriptor* descriptor
9139 			= get_fd(get_current_io_context(false), fd);
9140 		if (descriptor == NULL)
9141 			return B_FILE_ERROR;
9142 
9143 		if (descriptor->ops->fd_read_stat)
9144 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9145 		else
9146 			status = B_UNSUPPORTED;
9147 
9148 		put_fd(descriptor);
9149 	}
9150 
9151 	if (status != B_OK)
9152 		return status;
9153 
9154 	return user_memcpy(userStat, &stat, statSize);
9155 }
9156 
9157 
9158 status_t
9159 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9160 	const struct stat* userStat, size_t statSize, int statMask)
9161 {
9162 	if (statSize > sizeof(struct stat))
9163 		return B_BAD_VALUE;
9164 
9165 	struct stat stat;
9166 
9167 	if (!IS_USER_ADDRESS(userStat)
9168 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9169 		return B_BAD_ADDRESS;
9170 
9171 	// clear additional stat fields
9172 	if (statSize < sizeof(struct stat))
9173 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9174 
9175 	status_t status;
9176 
9177 	if (userPath) {
9178 		// path given: write the stat of the node referred to by (fd, path)
9179 		if (!IS_USER_ADDRESS(userPath))
9180 			return B_BAD_ADDRESS;
9181 
9182 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9183 		if (pathBuffer.InitCheck() != B_OK)
9184 			return B_NO_MEMORY;
9185 
9186 		char* path = pathBuffer.LockBuffer();
9187 
9188 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9189 		if (length < B_OK)
9190 			return length;
9191 		if (length >= B_PATH_NAME_LENGTH)
9192 			return B_NAME_TOO_LONG;
9193 
9194 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9195 			statMask, false);
9196 	} else {
9197 		// no path given: get the FD and use the FD operation
9198 		struct file_descriptor* descriptor
9199 			= get_fd(get_current_io_context(false), fd);
9200 		if (descriptor == NULL)
9201 			return B_FILE_ERROR;
9202 
9203 		if (descriptor->ops->fd_write_stat) {
9204 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9205 				statMask);
9206 		} else
9207 			status = B_UNSUPPORTED;
9208 
9209 		put_fd(descriptor);
9210 	}
9211 
9212 	return status;
9213 }
9214 
9215 
9216 int
9217 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9218 {
9219 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9220 	if (pathBuffer.InitCheck() != B_OK)
9221 		return B_NO_MEMORY;
9222 
9223 	char* path = pathBuffer.LockBuffer();
9224 
9225 	if (userPath != NULL) {
9226 		if (!IS_USER_ADDRESS(userPath)
9227 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9228 			return B_BAD_ADDRESS;
9229 	}
9230 
9231 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9232 }
9233 
9234 
9235 ssize_t
9236 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9237 	size_t readBytes)
9238 {
9239 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9240 	if (attr < 0)
9241 		return attr;
9242 
9243 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9244 	_user_close(attr);
9245 
9246 	return bytes;
9247 }
9248 
9249 
9250 ssize_t
9251 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9252 	const void* buffer, size_t writeBytes)
9253 {
9254 	// Try to support the BeOS typical truncation as well as the position
9255 	// argument
9256 	int attr = attr_create(fd, NULL, attribute, type,
9257 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9258 	if (attr < 0)
9259 		return attr;
9260 
9261 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9262 	_user_close(attr);
9263 
9264 	return bytes;
9265 }
9266 
9267 
9268 status_t
9269 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9270 {
9271 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9272 	if (attr < 0)
9273 		return attr;
9274 
9275 	struct file_descriptor* descriptor
9276 		= get_fd(get_current_io_context(false), attr);
9277 	if (descriptor == NULL) {
9278 		_user_close(attr);
9279 		return B_FILE_ERROR;
9280 	}
9281 
9282 	struct stat stat;
9283 	status_t status;
9284 	if (descriptor->ops->fd_read_stat)
9285 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9286 	else
9287 		status = B_UNSUPPORTED;
9288 
9289 	put_fd(descriptor);
9290 	_user_close(attr);
9291 
9292 	if (status == B_OK) {
9293 		attr_info info;
9294 		info.type = stat.st_type;
9295 		info.size = stat.st_size;
9296 
9297 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9298 			return B_BAD_ADDRESS;
9299 	}
9300 
9301 	return status;
9302 }
9303 
9304 
9305 int
9306 _user_open_attr(int fd, const char* userPath, const char* userName,
9307 	uint32 type, int openMode)
9308 {
9309 	char name[B_FILE_NAME_LENGTH];
9310 
9311 	if (!IS_USER_ADDRESS(userName)
9312 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9313 		return B_BAD_ADDRESS;
9314 
9315 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9316 	if (pathBuffer.InitCheck() != B_OK)
9317 		return B_NO_MEMORY;
9318 
9319 	char* path = pathBuffer.LockBuffer();
9320 
9321 	if (userPath != NULL) {
9322 		if (!IS_USER_ADDRESS(userPath)
9323 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9324 			return B_BAD_ADDRESS;
9325 	}
9326 
9327 	if ((openMode & O_CREAT) != 0) {
9328 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9329 			false);
9330 	}
9331 
9332 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9333 }
9334 
9335 
9336 status_t
9337 _user_remove_attr(int fd, const char* userName)
9338 {
9339 	char name[B_FILE_NAME_LENGTH];
9340 
9341 	if (!IS_USER_ADDRESS(userName)
9342 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9343 		return B_BAD_ADDRESS;
9344 
9345 	return attr_remove(fd, name, false);
9346 }
9347 
9348 
9349 status_t
9350 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9351 	const char* userToName)
9352 {
9353 	if (!IS_USER_ADDRESS(userFromName)
9354 		|| !IS_USER_ADDRESS(userToName))
9355 		return B_BAD_ADDRESS;
9356 
9357 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9358 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9359 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9360 		return B_NO_MEMORY;
9361 
9362 	char* fromName = fromNameBuffer.LockBuffer();
9363 	char* toName = toNameBuffer.LockBuffer();
9364 
9365 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9366 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9367 		return B_BAD_ADDRESS;
9368 
9369 	return attr_rename(fromFile, fromName, toFile, toName, false);
9370 }
9371 
9372 
9373 int
9374 _user_open_index_dir(dev_t device)
9375 {
9376 	return index_dir_open(device, false);
9377 }
9378 
9379 
9380 status_t
9381 _user_create_index(dev_t device, const char* userName, uint32 type,
9382 	uint32 flags)
9383 {
9384 	char name[B_FILE_NAME_LENGTH];
9385 
9386 	if (!IS_USER_ADDRESS(userName)
9387 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9388 		return B_BAD_ADDRESS;
9389 
9390 	return index_create(device, name, type, flags, false);
9391 }
9392 
9393 
9394 status_t
9395 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9396 {
9397 	char name[B_FILE_NAME_LENGTH];
9398 	struct stat stat;
9399 	status_t status;
9400 
9401 	if (!IS_USER_ADDRESS(userName)
9402 		|| !IS_USER_ADDRESS(userStat)
9403 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9404 		return B_BAD_ADDRESS;
9405 
9406 	status = index_name_read_stat(device, name, &stat, false);
9407 	if (status == B_OK) {
9408 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9409 			return B_BAD_ADDRESS;
9410 	}
9411 
9412 	return status;
9413 }
9414 
9415 
9416 status_t
9417 _user_remove_index(dev_t device, const char* userName)
9418 {
9419 	char name[B_FILE_NAME_LENGTH];
9420 
9421 	if (!IS_USER_ADDRESS(userName)
9422 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9423 		return B_BAD_ADDRESS;
9424 
9425 	return index_remove(device, name, false);
9426 }
9427 
9428 
9429 status_t
9430 _user_getcwd(char* userBuffer, size_t size)
9431 {
9432 	if (size == 0)
9433 		return B_BAD_VALUE;
9434 	if (!IS_USER_ADDRESS(userBuffer))
9435 		return B_BAD_ADDRESS;
9436 
9437 	if (size > kMaxPathLength)
9438 		size = kMaxPathLength;
9439 
9440 	KPath pathBuffer(size);
9441 	if (pathBuffer.InitCheck() != B_OK)
9442 		return B_NO_MEMORY;
9443 
9444 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9445 
9446 	char* path = pathBuffer.LockBuffer();
9447 
9448 	status_t status = get_cwd(path, size, false);
9449 	if (status != B_OK)
9450 		return status;
9451 
9452 	// Copy back the result
9453 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9454 		return B_BAD_ADDRESS;
9455 
9456 	return status;
9457 }
9458 
9459 
9460 status_t
9461 _user_setcwd(int fd, const char* userPath)
9462 {
9463 	TRACE(("user_setcwd: path = %p\n", userPath));
9464 
9465 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9466 	if (pathBuffer.InitCheck() != B_OK)
9467 		return B_NO_MEMORY;
9468 
9469 	char* path = pathBuffer.LockBuffer();
9470 
9471 	if (userPath != NULL) {
9472 		if (!IS_USER_ADDRESS(userPath)
9473 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9474 			return B_BAD_ADDRESS;
9475 	}
9476 
9477 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9478 }
9479 
9480 
9481 status_t
9482 _user_change_root(const char* userPath)
9483 {
9484 	// only root is allowed to chroot()
9485 	if (geteuid() != 0)
9486 		return B_NOT_ALLOWED;
9487 
9488 	// alloc path buffer
9489 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9490 	if (pathBuffer.InitCheck() != B_OK)
9491 		return B_NO_MEMORY;
9492 
9493 	// copy userland path to kernel
9494 	char* path = pathBuffer.LockBuffer();
9495 	if (userPath != NULL) {
9496 		if (!IS_USER_ADDRESS(userPath)
9497 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9498 			return B_BAD_ADDRESS;
9499 	}
9500 
9501 	// get the vnode
9502 	struct vnode* vnode;
9503 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9504 	if (status != B_OK)
9505 		return status;
9506 
9507 	// set the new root
9508 	struct io_context* context = get_current_io_context(false);
9509 	mutex_lock(&sIOContextRootLock);
9510 	struct vnode* oldRoot = context->root;
9511 	context->root = vnode;
9512 	mutex_unlock(&sIOContextRootLock);
9513 
9514 	put_vnode(oldRoot);
9515 
9516 	return B_OK;
9517 }
9518 
9519 
9520 int
9521 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9522 	uint32 flags, port_id port, int32 token)
9523 {
9524 	char* query;
9525 
9526 	if (device < 0 || userQuery == NULL || queryLength == 0)
9527 		return B_BAD_VALUE;
9528 
9529 	// this is a safety restriction
9530 	if (queryLength >= 65536)
9531 		return B_NAME_TOO_LONG;
9532 
9533 	query = (char*)malloc(queryLength + 1);
9534 	if (query == NULL)
9535 		return B_NO_MEMORY;
9536 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9537 		free(query);
9538 		return B_BAD_ADDRESS;
9539 	}
9540 
9541 	int fd = query_open(device, query, flags, port, token, false);
9542 
9543 	free(query);
9544 	return fd;
9545 }
9546 
9547 
9548 #include "vfs_request_io.cpp"
9549