xref: /haiku/src/system/kernel/fs/vfs.cpp (revision e0ef64750f3169cd634bb2f7a001e22488b05231)
1 /*
2  * Copyright 2005-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and covers_vnode fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the covers_vnode, and thus
130 	making the access path vnode->mount->covers_vnode->mount->... safe if a
131 	reference to vnode is held (note that for the root mount covers_vnode
132 	is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
224 
225 	The thread trying to lock the lock must not hold sVnodeLock or
226 	sMountMutex.
227 */
228 static recursive_lock sMountOpLock;
229 
230 /*!	\brief Guards sVnodeTable.
231 
232 	The holder is allowed read/write access to sVnodeTable and to
233 	any unbusy vnode in that table, save to the immutable fields (device, id,
234 	private_node, mount) to which only read-only access is allowed.
235 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
236 	well as the busy, removed, unused flags, and the vnode's type can also be
237 	write access when holding a read lock to sVnodeLock *and* having the vnode
238 	locked. Writing access to covered_by requires to write lock sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not have this lock held when calling create_sem(), as this
242 	might call vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, uint32 op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
549 		status_t status, size_t bytesTransferred)
550 		:
551 		fVnode(vnode),
552 		fMountID(vnode->mount->id),
553 		fNodeID(vnode->id),
554 		fCookie(cookie),
555 		fPos(pos),
556 		fCount(count),
557 		fFlags(flags),
558 		fBytesRequested(bytesRequested),
559 		fStatus(status),
560 		fBytesTransferred(bytesTransferred)
561 	{
562 		fVecs = (iovec*)alloc_tracing_buffer_memcpy(vecs, sizeof(iovec) * count,
563 			false);
564 	}
565 
566 	void AddDump(TraceOutput& out, const char* mode)
567 	{
568 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
569 			"pos: %lld, size: %lu, vecs: {", mode, fVnode, fMountID, fNodeID,
570 			fCookie, fPos, fBytesRequested);
571 
572 		if (fVecs != NULL) {
573 			for (uint32 i = 0; i < fCount; i++) {
574 				if (i > 0)
575 					out.Print(", ");
576 				out.Print("(%p, %lu)", fVecs[i].iov_base, fVecs[i].iov_len);
577 			}
578 		}
579 
580 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %lu",
581 			fFlags, fStatus, fBytesTransferred);
582 	}
583 
584 protected:
585 	struct vnode*	fVnode;
586 	dev_t			fMountID;
587 	ino_t			fNodeID;
588 	void*			fCookie;
589 	off_t			fPos;
590 	iovec*			fVecs;
591 	uint32			fCount;
592 	uint32			fFlags;
593 	size_t			fBytesRequested;
594 	status_t		fStatus;
595 	size_t			fBytesTransferred;
596 };
597 
598 
599 class ReadPages : public PagesIOTraceEntry {
600 public:
601 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
602 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
603 		status_t status, size_t bytesTransferred)
604 		:
605 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
606 			bytesRequested, status, bytesTransferred)
607 	{
608 		Initialized();
609 	}
610 
611 	virtual void AddDump(TraceOutput& out)
612 	{
613 		PagesIOTraceEntry::AddDump(out, "read");
614 	}
615 };
616 
617 
618 class WritePages : public PagesIOTraceEntry {
619 public:
620 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
621 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
622 		status_t status, size_t bytesTransferred)
623 		:
624 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
625 			bytesRequested, status, bytesTransferred)
626 	{
627 		Initialized();
628 	}
629 
630 	virtual void AddDump(TraceOutput& out)
631 	{
632 		PagesIOTraceEntry::AddDump(out, "write");
633 	}
634 };
635 
636 }	// namespace VFSPagesIOTracing
637 
638 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
639 #else
640 #	define TPIO(x) ;
641 #endif	// VFS_PAGES_IO_TRACING
642 
643 
644 static int
645 mount_compare(void* _m, const void* _key)
646 {
647 	struct fs_mount* mount = (fs_mount*)_m;
648 	const dev_t* id = (dev_t*)_key;
649 
650 	if (mount->id == *id)
651 		return 0;
652 
653 	return -1;
654 }
655 
656 
657 static uint32
658 mount_hash(void* _m, const void* _key, uint32 range)
659 {
660 	struct fs_mount* mount = (fs_mount*)_m;
661 	const dev_t* id = (dev_t*)_key;
662 
663 	if (mount)
664 		return mount->id % range;
665 
666 	return (uint32)*id % range;
667 }
668 
669 
670 /*! Finds the mounted device (the fs_mount structure) with the given ID.
671 	Note, you must hold the gMountMutex lock when you call this function.
672 */
673 static struct fs_mount*
674 find_mount(dev_t id)
675 {
676 	ASSERT_LOCKED_MUTEX(&sMountMutex);
677 
678 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
679 }
680 
681 
682 static status_t
683 get_mount(dev_t id, struct fs_mount** _mount)
684 {
685 	struct fs_mount* mount;
686 
687 	ReadLocker nodeLocker(sVnodeLock);
688 	MutexLocker mountLocker(sMountMutex);
689 
690 	mount = find_mount(id);
691 	if (mount == NULL)
692 		return B_BAD_VALUE;
693 
694 	struct vnode* rootNode = mount->root_vnode;
695 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
696 		// might have been called during a mount/unmount operation
697 		return B_BUSY;
698 	}
699 
700 	inc_vnode_ref_count(mount->root_vnode);
701 	*_mount = mount;
702 	return B_OK;
703 }
704 
705 
706 static void
707 put_mount(struct fs_mount* mount)
708 {
709 	if (mount)
710 		put_vnode(mount->root_vnode);
711 }
712 
713 
714 /*!	Tries to open the specified file system module.
715 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
716 	Returns a pointer to file system module interface, or NULL if it
717 	could not open the module.
718 */
719 static file_system_module_info*
720 get_file_system(const char* fsName)
721 {
722 	char name[B_FILE_NAME_LENGTH];
723 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
724 		// construct module name if we didn't get one
725 		// (we currently support only one API)
726 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
727 		fsName = NULL;
728 	}
729 
730 	file_system_module_info* info;
731 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
732 		return NULL;
733 
734 	return info;
735 }
736 
737 
738 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
739 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
740 	The name is allocated for you, and you have to free() it when you're
741 	done with it.
742 	Returns NULL if the required memory is not available.
743 */
744 static char*
745 get_file_system_name(const char* fsName)
746 {
747 	const size_t length = strlen("file_systems/");
748 
749 	if (strncmp(fsName, "file_systems/", length)) {
750 		// the name already seems to be the module's file name
751 		return strdup(fsName);
752 	}
753 
754 	fsName += length;
755 	const char* end = strchr(fsName, '/');
756 	if (end == NULL) {
757 		// this doesn't seem to be a valid name, but well...
758 		return strdup(fsName);
759 	}
760 
761 	// cut off the trailing /v1
762 
763 	char* name = (char*)malloc(end + 1 - fsName);
764 	if (name == NULL)
765 		return NULL;
766 
767 	strlcpy(name, fsName, end + 1 - fsName);
768 	return name;
769 }
770 
771 
772 /*!	Accepts a list of file system names separated by a colon, one for each
773 	layer and returns the file system name for the specified layer.
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available or if there is no
777 	name for the specified layer.
778 */
779 static char*
780 get_file_system_name_for_layer(const char* fsNames, int32 layer)
781 {
782 	while (layer >= 0) {
783 		const char* end = strchr(fsNames, ':');
784 		if (end == NULL) {
785 			if (layer == 0)
786 				return strdup(fsNames);
787 			return NULL;
788 		}
789 
790 		if (layer == 0) {
791 			size_t length = end - fsNames + 1;
792 			char* result = (char*)malloc(length);
793 			strlcpy(result, fsNames, length);
794 			return result;
795 		}
796 
797 		fsNames = end + 1;
798 		layer--;
799 	}
800 
801 	return NULL;
802 }
803 
804 
805 static int
806 vnode_compare(void* _vnode, const void* _key)
807 {
808 	struct vnode* vnode = (struct vnode*)_vnode;
809 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
810 
811 	if (vnode->device == key->device && vnode->id == key->vnode)
812 		return 0;
813 
814 	return -1;
815 }
816 
817 
818 static uint32
819 vnode_hash(void* _vnode, const void* _key, uint32 range)
820 {
821 	struct vnode* vnode = (struct vnode*)_vnode;
822 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
823 
824 #define VHASH(mountid, vnodeid) \
825 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
826 
827 	if (vnode != NULL)
828 		return VHASH(vnode->device, vnode->id) % range;
829 
830 	return VHASH(key->device, key->vnode) % range;
831 
832 #undef VHASH
833 }
834 
835 
836 static void
837 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
838 {
839 	RecursiveLocker _(mount->rlock);
840 	mount->vnodes.Add(vnode);
841 }
842 
843 
844 static void
845 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
846 {
847 	RecursiveLocker _(mount->rlock);
848 	mount->vnodes.Remove(vnode);
849 }
850 
851 
852 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
853 
854 	The caller must hold the sVnodeLock (read lock at least).
855 
856 	\param mountID the mount ID.
857 	\param vnodeID the node ID.
858 
859 	\return The vnode structure, if it was found in the hash table, \c NULL
860 			otherwise.
861 */
862 static struct vnode*
863 lookup_vnode(dev_t mountID, ino_t vnodeID)
864 {
865 	struct vnode_hash_key key;
866 
867 	key.device = mountID;
868 	key.vnode = vnodeID;
869 
870 	return (vnode*)hash_lookup(sVnodeTable, &key);
871 }
872 
873 
874 /*!	Creates a new vnode with the given mount and node ID.
875 	If the node already exists, it is returned instead and no new node is
876 	created. In either case -- but not, if an error occurs -- the function write
877 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
878 	error the lock is not not held on return.
879 
880 	\param mountID The mount ID.
881 	\param vnodeID The vnode ID.
882 	\param _vnode Will be set to the new vnode on success.
883 	\param _nodeCreated Will be set to \c true when the returned vnode has
884 		been newly created, \c false when it already existed. Will not be
885 		changed on error.
886 	\return \c B_OK, when the vnode was successfully created and inserted or
887 		a node with the given ID was found, \c B_NO_MEMORY or
888 		\c B_ENTRY_NOT_FOUND on error.
889 */
890 static status_t
891 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
892 	bool& _nodeCreated)
893 {
894 	FUNCTION(("create_new_vnode_and_lock()\n"));
895 
896 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
897 	if (vnode == NULL)
898 		return B_NO_MEMORY;
899 
900 	// initialize basic values
901 	memset(vnode, 0, sizeof(struct vnode));
902 	vnode->device = mountID;
903 	vnode->id = vnodeID;
904 	vnode->ref_count = 1;
905 	vnode->SetBusy(true);
906 
907 	// look up the the node -- it might have been added by someone else in the
908 	// meantime
909 	rw_lock_write_lock(&sVnodeLock);
910 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
911 	if (existingVnode != NULL) {
912 		free(vnode);
913 		_vnode = existingVnode;
914 		_nodeCreated = false;
915 		return B_OK;
916 	}
917 
918 	// get the mount structure
919 	mutex_lock(&sMountMutex);
920 	vnode->mount = find_mount(mountID);
921 	if (!vnode->mount || vnode->mount->unmounting) {
922 		mutex_unlock(&sMountMutex);
923 		rw_lock_write_unlock(&sVnodeLock);
924 		free(vnode);
925 		return B_ENTRY_NOT_FOUND;
926 	}
927 
928 	// add the vnode to the mount's node list and the hash table
929 	hash_insert(sVnodeTable, vnode);
930 	add_vnode_to_mount_list(vnode, vnode->mount);
931 
932 	mutex_unlock(&sMountMutex);
933 
934 	_vnode = vnode;
935 	_nodeCreated = true;
936 
937 	// keep the vnode lock locked
938 	return B_OK;
939 }
940 
941 
942 /*!	Frees the vnode and all resources it has acquired, and removes
943 	it from the vnode hash as well as from its mount structure.
944 	Will also make sure that any cache modifications are written back.
945 */
946 static void
947 free_vnode(struct vnode* vnode, bool reenter)
948 {
949 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
950 		vnode);
951 
952 	// write back any changes in this vnode's cache -- but only
953 	// if the vnode won't be deleted, in which case the changes
954 	// will be discarded
955 
956 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
957 		FS_CALL_NO_PARAMS(vnode, fsync);
958 
959 	// Note: If this vnode has a cache attached, there will still be two
960 	// references to that cache at this point. The last one belongs to the vnode
961 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
962 	// cache. Each but the last reference to a cache also includes a reference
963 	// to the vnode. The file cache, however, released its reference (cf.
964 	// file_cache_create()), so that this vnode's ref count has the chance to
965 	// ever drop to 0. Deleting the file cache now, will cause the next to last
966 	// cache reference to be released, which will also release a (no longer
967 	// existing) vnode reference. To avoid problems, we set the vnode's ref
968 	// count, so that it will neither become negative nor 0.
969 	vnode->ref_count = 2;
970 
971 	if (!vnode->IsUnpublished()) {
972 		if (vnode->IsRemoved())
973 			FS_CALL(vnode, remove_vnode, reenter);
974 		else
975 			FS_CALL(vnode, put_vnode, reenter);
976 	}
977 
978 	// If the vnode has a VMCache attached, make sure that it won't try to get
979 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
980 	// long as the vnode is busy and in the hash, that won't happen, but as
981 	// soon as we've removed it from the hash, it could reload the vnode -- with
982 	// a new cache attached!
983 	if (vnode->cache != NULL)
984 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
985 
986 	// The file system has removed the resources of the vnode now, so we can
987 	// make it available again (by removing the busy vnode from the hash).
988 	rw_lock_write_lock(&sVnodeLock);
989 	hash_remove(sVnodeTable, vnode);
990 	rw_lock_write_unlock(&sVnodeLock);
991 
992 	// if we have a VMCache attached, remove it
993 	if (vnode->cache)
994 		vnode->cache->ReleaseRef();
995 
996 	vnode->cache = NULL;
997 
998 	remove_vnode_from_mount_list(vnode, vnode->mount);
999 
1000 	free(vnode);
1001 }
1002 
1003 
1004 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1005 	if the counter dropped to 0.
1006 
1007 	The caller must, of course, own a reference to the vnode to call this
1008 	function.
1009 	The caller must not hold the sVnodeLock or the sMountMutex.
1010 
1011 	\param vnode the vnode.
1012 	\param alwaysFree don't move this vnode into the unused list, but really
1013 		   delete it if possible.
1014 	\param reenter \c true, if this function is called (indirectly) from within
1015 		   a file system. This will be passed to file system hooks only.
1016 	\return \c B_OK, if everything went fine, an error code otherwise.
1017 */
1018 static status_t
1019 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1020 {
1021 	ReadLocker locker(sVnodeLock);
1022 	AutoLocker<Vnode> nodeLocker(vnode);
1023 
1024 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1025 
1026 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1027 
1028 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1029 		vnode->ref_count));
1030 
1031 	if (oldRefCount != 1)
1032 		return B_OK;
1033 
1034 	if (vnode->IsBusy())
1035 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1036 
1037 	bool freeNode = false;
1038 	bool freeUnusedNodes = false;
1039 
1040 	// Just insert the vnode into an unused list if we don't need
1041 	// to delete it
1042 	if (vnode->IsRemoved() || alwaysFree) {
1043 		vnode_to_be_freed(vnode);
1044 		vnode->SetBusy(true);
1045 		freeNode = true;
1046 	} else
1047 		freeUnusedNodes = vnode_unused(vnode);
1048 
1049 	nodeLocker.Unlock();
1050 	locker.Unlock();
1051 
1052 	if (freeNode)
1053 		free_vnode(vnode, reenter);
1054 	else if (freeUnusedNodes)
1055 		free_unused_vnodes();
1056 
1057 	return B_OK;
1058 }
1059 
1060 
1061 /*!	\brief Increments the reference counter of the given vnode.
1062 
1063 	The caller must make sure that the node isn't deleted while this function
1064 	is called. This can be done either:
1065 	- by ensuring that a reference to the node exists and remains in existence,
1066 	  or
1067 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1068 	  or by holding sVnodeLock write locked.
1069 
1070 	In the second case the caller is responsible for dealing with the ref count
1071 	0 -> 1 transition. That is 1. this function must not be invoked when the
1072 	node is busy in the first place and 2. vnode_used() must be called for the
1073 	node.
1074 
1075 	\param vnode the vnode.
1076 */
1077 static void
1078 inc_vnode_ref_count(struct vnode* vnode)
1079 {
1080 	atomic_add(&vnode->ref_count, 1);
1081 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1082 		vnode->ref_count));
1083 }
1084 
1085 
1086 static bool
1087 is_special_node_type(int type)
1088 {
1089 	// at the moment only FIFOs are supported
1090 	return S_ISFIFO(type);
1091 }
1092 
1093 
1094 static status_t
1095 create_special_sub_node(struct vnode* vnode, uint32 flags)
1096 {
1097 	if (S_ISFIFO(vnode->Type()))
1098 		return create_fifo_vnode(vnode->mount->volume, vnode);
1099 
1100 	return B_BAD_VALUE;
1101 }
1102 
1103 
1104 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1105 
1106 	If the node is not yet in memory, it will be loaded.
1107 
1108 	The caller must not hold the sVnodeLock or the sMountMutex.
1109 
1110 	\param mountID the mount ID.
1111 	\param vnodeID the node ID.
1112 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1113 		   retrieved vnode structure shall be written.
1114 	\param reenter \c true, if this function is called (indirectly) from within
1115 		   a file system.
1116 	\return \c B_OK, if everything when fine, an error code otherwise.
1117 */
1118 static status_t
1119 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1120 	int reenter)
1121 {
1122 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1123 		_vnode));
1124 
1125 	rw_lock_read_lock(&sVnodeLock);
1126 
1127 	int32 tries = 2000;
1128 		// try for 10 secs
1129 restart:
1130 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1131 	AutoLocker<Vnode> nodeLocker(vnode);
1132 
1133 	if (vnode && vnode->IsBusy()) {
1134 		nodeLocker.Unlock();
1135 		rw_lock_read_unlock(&sVnodeLock);
1136 		if (!canWait || --tries < 0) {
1137 			// vnode doesn't seem to become unbusy
1138 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1139 				vnodeID);
1140 			return B_BUSY;
1141 		}
1142 		snooze(5000); // 5 ms
1143 		rw_lock_read_lock(&sVnodeLock);
1144 		goto restart;
1145 	}
1146 
1147 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1148 
1149 	status_t status;
1150 
1151 	if (vnode) {
1152 		if (vnode->ref_count == 0) {
1153 			// this vnode has been unused before
1154 			vnode_used(vnode);
1155 		}
1156 		inc_vnode_ref_count(vnode);
1157 
1158 		nodeLocker.Unlock();
1159 		rw_lock_read_unlock(&sVnodeLock);
1160 	} else {
1161 		// we need to create a new vnode and read it in
1162 		rw_lock_read_unlock(&sVnodeLock);
1163 			// unlock -- create_new_vnode_and_lock() write-locks on success
1164 		bool nodeCreated;
1165 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1166 			nodeCreated);
1167 		if (status != B_OK)
1168 			return status;
1169 
1170 		if (!nodeCreated) {
1171 			rw_lock_read_lock(&sVnodeLock);
1172 			rw_lock_write_unlock(&sVnodeLock);
1173 			goto restart;
1174 		}
1175 
1176 		rw_lock_write_unlock(&sVnodeLock);
1177 
1178 		int type;
1179 		uint32 flags;
1180 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1181 			&flags, reenter);
1182 		if (status == B_OK && vnode->private_node == NULL)
1183 			status = B_BAD_VALUE;
1184 
1185 		bool gotNode = status == B_OK;
1186 		bool publishSpecialSubNode = false;
1187 		if (gotNode) {
1188 			vnode->SetType(type);
1189 			publishSpecialSubNode = is_special_node_type(type)
1190 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1191 		}
1192 
1193 		if (gotNode && publishSpecialSubNode)
1194 			status = create_special_sub_node(vnode, flags);
1195 
1196 		if (status != B_OK) {
1197 			if (gotNode)
1198 				FS_CALL(vnode, put_vnode, reenter);
1199 
1200 			rw_lock_write_lock(&sVnodeLock);
1201 			hash_remove(sVnodeTable, vnode);
1202 			remove_vnode_from_mount_list(vnode, vnode->mount);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 
1205 			free(vnode);
1206 			return status;
1207 		}
1208 
1209 		rw_lock_read_lock(&sVnodeLock);
1210 		vnode->Lock();
1211 
1212 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1213 		vnode->SetBusy(false);
1214 
1215 		vnode->Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	}
1218 
1219 	TRACE(("get_vnode: returning %p\n", vnode));
1220 
1221 	*_vnode = vnode;
1222 	return B_OK;
1223 }
1224 
1225 
1226 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1227 	if the counter dropped to 0.
1228 
1229 	The caller must, of course, own a reference to the vnode to call this
1230 	function.
1231 	The caller must not hold the sVnodeLock or the sMountMutex.
1232 
1233 	\param vnode the vnode.
1234 */
1235 static inline void
1236 put_vnode(struct vnode* vnode)
1237 {
1238 	dec_vnode_ref_count(vnode, false, false);
1239 }
1240 
1241 
1242 static void
1243 free_unused_vnodes(int32 level)
1244 {
1245 	unused_vnodes_check_started();
1246 
1247 	if (level == B_NO_LOW_RESOURCE) {
1248 		unused_vnodes_check_done();
1249 		return;
1250 	}
1251 
1252 	flush_hot_vnodes();
1253 
1254 	// determine how many nodes to free
1255 	uint32 count = 1;
1256 	{
1257 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1258 
1259 		switch (level) {
1260 			case B_LOW_RESOURCE_NOTE:
1261 				count = sUnusedVnodes / 100;
1262 				break;
1263 			case B_LOW_RESOURCE_WARNING:
1264 				count = sUnusedVnodes / 10;
1265 				break;
1266 			case B_LOW_RESOURCE_CRITICAL:
1267 				count = sUnusedVnodes;
1268 				break;
1269 		}
1270 
1271 		if (count > sUnusedVnodes)
1272 			count = sUnusedVnodes;
1273 	}
1274 
1275 	// Write back the modified pages of some unused vnodes and free them.
1276 
1277 	for (uint32 i = 0; i < count; i++) {
1278 		ReadLocker vnodesReadLocker(sVnodeLock);
1279 
1280 		// get the first node
1281 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1282 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1283 			&sUnusedVnodeList);
1284 		unusedVnodesLocker.Unlock();
1285 
1286 		if (vnode == NULL)
1287 			break;
1288 
1289 		// lock the node
1290 		AutoLocker<Vnode> nodeLocker(vnode);
1291 
1292 		// Check whether the node is still unused -- since we only append to the
1293 		// the tail of the unused queue, the vnode should still be at its head.
1294 		// Alternatively we could check its ref count for 0 and its busy flag,
1295 		// but if the node is no longer at the head of the queue, it means it
1296 		// has been touched in the meantime, i.e. it is no longer the least
1297 		// recently used unused vnode and we rather don't free it.
1298 		unusedVnodesLocker.Lock();
1299 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1300 			continue;
1301 		unusedVnodesLocker.Unlock();
1302 
1303 		ASSERT(!vnode->IsBusy());
1304 
1305 		// grab a reference
1306 		inc_vnode_ref_count(vnode);
1307 		vnode_used(vnode);
1308 
1309 		// write back changes and free the node
1310 		nodeLocker.Unlock();
1311 		vnodesReadLocker.Unlock();
1312 
1313 		if (vnode->cache != NULL)
1314 			vnode->cache->WriteModified();
1315 
1316 		dec_vnode_ref_count(vnode, true, false);
1317 			// this should free the vnode when it's still unused
1318 	}
1319 
1320 	unused_vnodes_check_done();
1321 }
1322 
1323 
1324 static void
1325 free_unused_vnodes()
1326 {
1327 	free_unused_vnodes(
1328 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1329 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1330 }
1331 
1332 
1333 static void
1334 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1335 {
1336 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1337 
1338 	free_unused_vnodes(level);
1339 }
1340 
1341 
1342 static inline void
1343 put_advisory_locking(struct advisory_locking* locking)
1344 {
1345 	release_sem(locking->lock);
1346 }
1347 
1348 
1349 /*!	Returns the advisory_locking object of the \a vnode in case it
1350 	has one, and locks it.
1351 	You have to call put_advisory_locking() when you're done with
1352 	it.
1353 	Note, you must not have the vnode mutex locked when calling
1354 	this function.
1355 */
1356 static struct advisory_locking*
1357 get_advisory_locking(struct vnode* vnode)
1358 {
1359 	rw_lock_read_lock(&sVnodeLock);
1360 	vnode->Lock();
1361 
1362 	struct advisory_locking* locking = vnode->advisory_locking;
1363 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1364 
1365 	vnode->Unlock();
1366 	rw_lock_read_unlock(&sVnodeLock);
1367 
1368 	if (lock >= 0)
1369 		lock = acquire_sem(lock);
1370 	if (lock < 0) {
1371 		// This means the locking has been deleted in the mean time
1372 		// or had never existed in the first place - otherwise, we
1373 		// would get the lock at some point.
1374 		return NULL;
1375 	}
1376 
1377 	return locking;
1378 }
1379 
1380 
1381 /*!	Creates a locked advisory_locking object, and attaches it to the
1382 	given \a vnode.
1383 	Returns B_OK in case of success - also if the vnode got such an
1384 	object from someone else in the mean time, you'll still get this
1385 	one locked then.
1386 */
1387 static status_t
1388 create_advisory_locking(struct vnode* vnode)
1389 {
1390 	if (vnode == NULL)
1391 		return B_FILE_ERROR;
1392 
1393 	ObjectDeleter<advisory_locking> lockingDeleter;
1394 	struct advisory_locking* locking = NULL;
1395 
1396 	while (get_advisory_locking(vnode) == NULL) {
1397 		// no locking object set on the vnode yet, create one
1398 		if (locking == NULL) {
1399 			locking = new(std::nothrow) advisory_locking;
1400 			if (locking == NULL)
1401 				return B_NO_MEMORY;
1402 			lockingDeleter.SetTo(locking);
1403 
1404 			locking->wait_sem = create_sem(0, "advisory lock");
1405 			if (locking->wait_sem < 0)
1406 				return locking->wait_sem;
1407 
1408 			locking->lock = create_sem(0, "advisory locking");
1409 			if (locking->lock < 0)
1410 				return locking->lock;
1411 		}
1412 
1413 		// set our newly created locking object
1414 		ReadLocker _(sVnodeLock);
1415 		AutoLocker<Vnode> nodeLocker(vnode);
1416 		if (vnode->advisory_locking == NULL) {
1417 			vnode->advisory_locking = locking;
1418 			lockingDeleter.Detach();
1419 			return B_OK;
1420 		}
1421 	}
1422 
1423 	// The vnode already had a locking object. That's just as well.
1424 
1425 	return B_OK;
1426 }
1427 
1428 
1429 /*!	Retrieves the first lock that has been set by the current team.
1430 */
1431 static status_t
1432 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1433 {
1434 	struct advisory_locking* locking = get_advisory_locking(vnode);
1435 	if (locking == NULL)
1436 		return B_BAD_VALUE;
1437 
1438 	// TODO: this should probably get the flock by its file descriptor!
1439 	team_id team = team_get_current_team_id();
1440 	status_t status = B_BAD_VALUE;
1441 
1442 	LockList::Iterator iterator = locking->locks.GetIterator();
1443 	while (iterator.HasNext()) {
1444 		struct advisory_lock* lock = iterator.Next();
1445 
1446 		if (lock->team == team) {
1447 			flock->l_start = lock->start;
1448 			flock->l_len = lock->end - lock->start + 1;
1449 			status = B_OK;
1450 			break;
1451 		}
1452 	}
1453 
1454 	put_advisory_locking(locking);
1455 	return status;
1456 }
1457 
1458 
1459 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1460 	with the advisory_lock \a lock.
1461 */
1462 static bool
1463 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1464 {
1465 	if (flock == NULL)
1466 		return true;
1467 
1468 	return lock->start <= flock->l_start - 1 + flock->l_len
1469 		&& lock->end >= flock->l_start;
1470 }
1471 
1472 
1473 /*!	Removes the specified lock, or all locks of the calling team
1474 	if \a flock is NULL.
1475 */
1476 static status_t
1477 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1478 {
1479 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1480 
1481 	struct advisory_locking* locking = get_advisory_locking(vnode);
1482 	if (locking == NULL)
1483 		return B_OK;
1484 
1485 	// TODO: use the thread ID instead??
1486 	team_id team = team_get_current_team_id();
1487 	pid_t session = thread_get_current_thread()->team->session_id;
1488 
1489 	// find matching lock entries
1490 
1491 	LockList::Iterator iterator = locking->locks.GetIterator();
1492 	while (iterator.HasNext()) {
1493 		struct advisory_lock* lock = iterator.Next();
1494 		bool removeLock = false;
1495 
1496 		if (lock->session == session)
1497 			removeLock = true;
1498 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1499 			bool endsBeyond = false;
1500 			bool startsBefore = false;
1501 			if (flock != NULL) {
1502 				startsBefore = lock->start < flock->l_start;
1503 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1504 			}
1505 
1506 			if (!startsBefore && !endsBeyond) {
1507 				// lock is completely contained in flock
1508 				removeLock = true;
1509 			} else if (startsBefore && !endsBeyond) {
1510 				// cut the end of the lock
1511 				lock->end = flock->l_start - 1;
1512 			} else if (!startsBefore && endsBeyond) {
1513 				// cut the start of the lock
1514 				lock->start = flock->l_start + flock->l_len;
1515 			} else {
1516 				// divide the lock into two locks
1517 				struct advisory_lock* secondLock = new advisory_lock;
1518 				if (secondLock == NULL) {
1519 					// TODO: we should probably revert the locks we already
1520 					// changed... (ie. allocate upfront)
1521 					put_advisory_locking(locking);
1522 					return B_NO_MEMORY;
1523 				}
1524 
1525 				lock->end = flock->l_start - 1;
1526 
1527 				secondLock->team = lock->team;
1528 				secondLock->session = lock->session;
1529 				// values must already be normalized when getting here
1530 				secondLock->start = flock->l_start + flock->l_len;
1531 				secondLock->end = lock->end;
1532 				secondLock->shared = lock->shared;
1533 
1534 				locking->locks.Add(secondLock);
1535 			}
1536 		}
1537 
1538 		if (removeLock) {
1539 			// this lock is no longer used
1540 			iterator.Remove();
1541 			free(lock);
1542 		}
1543 	}
1544 
1545 	bool removeLocking = locking->locks.IsEmpty();
1546 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1547 
1548 	put_advisory_locking(locking);
1549 
1550 	if (removeLocking) {
1551 		// We can remove the whole advisory locking structure; it's no
1552 		// longer used
1553 		locking = get_advisory_locking(vnode);
1554 		if (locking != NULL) {
1555 			ReadLocker locker(sVnodeLock);
1556 			AutoLocker<Vnode> nodeLocker(vnode);
1557 
1558 			// the locking could have been changed in the mean time
1559 			if (locking->locks.IsEmpty()) {
1560 				vnode->advisory_locking = NULL;
1561 				nodeLocker.Unlock();
1562 				locker.Unlock();
1563 
1564 				// we've detached the locking from the vnode, so we can
1565 				// safely delete it
1566 				delete_sem(locking->lock);
1567 				delete_sem(locking->wait_sem);
1568 				delete locking;
1569 			} else {
1570 				// the locking is in use again
1571 				nodeLocker.Unlock();
1572 				locker.Unlock();
1573 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1574 			}
1575 		}
1576 	}
1577 
1578 	return B_OK;
1579 }
1580 
1581 
1582 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1583 	will wait for the lock to become available, if there are any collisions
1584 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1585 
1586 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1587 	BSD flock() semantics are used, that is, all children can unlock the file
1588 	in question (we even allow parents to remove the lock, though, but that
1589 	seems to be in line to what the BSD's are doing).
1590 */
1591 static status_t
1592 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1593 	bool wait)
1594 {
1595 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1596 		vnode, flock, wait ? "yes" : "no"));
1597 
1598 	bool shared = flock->l_type == F_RDLCK;
1599 	status_t status = B_OK;
1600 
1601 	// TODO: do deadlock detection!
1602 
1603 	struct advisory_locking* locking;
1604 	sem_id waitForLock;
1605 
1606 	while (true) {
1607 		// if this vnode has an advisory_locking structure attached,
1608 		// lock that one and search for any colliding file lock
1609 		status = create_advisory_locking(vnode);
1610 		if (status != B_OK)
1611 			return status;
1612 
1613 		locking = vnode->advisory_locking;
1614 		team_id team = team_get_current_team_id();
1615 		waitForLock = -1;
1616 
1617 		// test for collisions
1618 		LockList::Iterator iterator = locking->locks.GetIterator();
1619 		while (iterator.HasNext()) {
1620 			struct advisory_lock* lock = iterator.Next();
1621 
1622 			// TODO: locks from the same team might be joinable!
1623 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1624 				// locks do overlap
1625 				if (!shared || !lock->shared) {
1626 					// we need to wait
1627 					waitForLock = locking->wait_sem;
1628 					break;
1629 				}
1630 			}
1631 		}
1632 
1633 		if (waitForLock < 0)
1634 			break;
1635 
1636 		// We need to wait. Do that or fail now, if we've been asked not to.
1637 
1638 		if (!wait) {
1639 			put_advisory_locking(locking);
1640 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1641 		}
1642 
1643 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1644 			B_CAN_INTERRUPT, 0);
1645 		if (status != B_OK && status != B_BAD_SEM_ID)
1646 			return status;
1647 
1648 		// We have been notified, but we need to re-lock the locking object. So
1649 		// go another round...
1650 	}
1651 
1652 	// install new lock
1653 
1654 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1655 		sizeof(struct advisory_lock));
1656 	if (lock == NULL) {
1657 		if (waitForLock >= B_OK)
1658 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1659 		release_sem(locking->lock);
1660 		return B_NO_MEMORY;
1661 	}
1662 
1663 	lock->team = team_get_current_team_id();
1664 	lock->session = session;
1665 	// values must already be normalized when getting here
1666 	lock->start = flock->l_start;
1667 	lock->end = flock->l_start - 1 + flock->l_len;
1668 	lock->shared = shared;
1669 
1670 	locking->locks.Add(lock);
1671 	put_advisory_locking(locking);
1672 
1673 	return status;
1674 }
1675 
1676 
1677 /*!	Normalizes the \a flock structure to make it easier to compare the
1678 	structure with others. The l_start and l_len fields are set to absolute
1679 	values according to the l_whence field.
1680 */
1681 static status_t
1682 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1683 {
1684 	switch (flock->l_whence) {
1685 		case SEEK_SET:
1686 			break;
1687 		case SEEK_CUR:
1688 			flock->l_start += descriptor->pos;
1689 			break;
1690 		case SEEK_END:
1691 		{
1692 			struct vnode* vnode = descriptor->u.vnode;
1693 			struct stat stat;
1694 			status_t status;
1695 
1696 			if (!HAS_FS_CALL(vnode, read_stat))
1697 				return B_NOT_SUPPORTED;
1698 
1699 			status = FS_CALL(vnode, read_stat, &stat);
1700 			if (status != B_OK)
1701 				return status;
1702 
1703 			flock->l_start += stat.st_size;
1704 			break;
1705 		}
1706 		default:
1707 			return B_BAD_VALUE;
1708 	}
1709 
1710 	if (flock->l_start < 0)
1711 		flock->l_start = 0;
1712 	if (flock->l_len == 0)
1713 		flock->l_len = OFF_MAX;
1714 
1715 	// don't let the offset and length overflow
1716 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1717 		flock->l_len = OFF_MAX - flock->l_start;
1718 
1719 	if (flock->l_len < 0) {
1720 		// a negative length reverses the region
1721 		flock->l_start += flock->l_len;
1722 		flock->l_len = -flock->l_len;
1723 	}
1724 
1725 	return B_OK;
1726 }
1727 
1728 
1729 static void
1730 replace_vnode_if_disconnected(struct fs_mount* mount,
1731 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1732 	struct vnode* fallBack, bool lockRootLock)
1733 {
1734 	if (lockRootLock)
1735 		mutex_lock(&sIOContextRootLock);
1736 
1737 	struct vnode* obsoleteVnode = NULL;
1738 
1739 	if (vnode != NULL && vnode->mount == mount
1740 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1741 		obsoleteVnode = vnode;
1742 
1743 		if (vnode == mount->root_vnode) {
1744 			// redirect the vnode to the covered vnode
1745 			vnode = mount->covers_vnode;
1746 		} else
1747 			vnode = fallBack;
1748 
1749 		if (vnode != NULL)
1750 			inc_vnode_ref_count(vnode);
1751 	}
1752 
1753 	if (lockRootLock)
1754 		mutex_unlock(&sIOContextRootLock);
1755 
1756 	if (obsoleteVnode != NULL)
1757 		put_vnode(obsoleteVnode);
1758 }
1759 
1760 
1761 /*!	Disconnects all file descriptors that are associated with the
1762 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1763 	\a mount object.
1764 
1765 	Note, after you've called this function, there might still be ongoing
1766 	accesses - they won't be interrupted if they already happened before.
1767 	However, any subsequent access will fail.
1768 
1769 	This is not a cheap function and should be used with care and rarely.
1770 	TODO: there is currently no means to stop a blocking read/write!
1771 */
1772 void
1773 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1774 	struct vnode* vnodeToDisconnect)
1775 {
1776 	// iterate over all teams and peek into their file descriptors
1777 	int32 nextTeamID = 0;
1778 
1779 	while (true) {
1780 		struct io_context* context = NULL;
1781 		bool contextLocked = false;
1782 		struct team* team = NULL;
1783 		team_id lastTeamID;
1784 
1785 		cpu_status state = disable_interrupts();
1786 		SpinLocker teamsLock(gTeamSpinlock);
1787 
1788 		lastTeamID = peek_next_thread_id();
1789 		if (nextTeamID < lastTeamID) {
1790 			// get next valid team
1791 			while (nextTeamID < lastTeamID
1792 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1793 				nextTeamID++;
1794 			}
1795 
1796 			if (team) {
1797 				context = (io_context*)team->io_context;
1798 
1799 				// Some acrobatics to lock the context in a safe way
1800 				// (cf. _kern_get_next_fd_info() for details).
1801 				GRAB_THREAD_LOCK();
1802 				teamsLock.Unlock();
1803 				contextLocked = mutex_lock_threads_locked(&context->io_mutex)
1804 					== B_OK;
1805 				RELEASE_THREAD_LOCK();
1806 
1807 				nextTeamID++;
1808 			}
1809 		}
1810 
1811 		teamsLock.Unlock();
1812 		restore_interrupts(state);
1813 
1814 		if (context == NULL)
1815 			break;
1816 
1817 		// we now have a context - since we couldn't lock it while having
1818 		// safe access to the team structure, we now need to lock the mutex
1819 		// manually
1820 
1821 		if (!contextLocked) {
1822 			// team seems to be gone, go over to the next team
1823 			continue;
1824 		}
1825 
1826 		// the team cannot be deleted completely while we're owning its
1827 		// io_context mutex, so we can safely play with it now
1828 
1829 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1830 			sRoot, true);
1831 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1832 			sRoot, false);
1833 
1834 		for (uint32 i = 0; i < context->table_size; i++) {
1835 			if (struct file_descriptor* descriptor = context->fds[i]) {
1836 				inc_fd_ref_count(descriptor);
1837 
1838 				// if this descriptor points at this mount, we
1839 				// need to disconnect it to be able to unmount
1840 				struct vnode* vnode = fd_vnode(descriptor);
1841 				if (vnodeToDisconnect != NULL) {
1842 					if (vnode == vnodeToDisconnect)
1843 						disconnect_fd(descriptor);
1844 				} else if ((vnode != NULL && vnode->mount == mount)
1845 					|| (vnode == NULL && descriptor->u.mount == mount))
1846 					disconnect_fd(descriptor);
1847 
1848 				put_fd(descriptor);
1849 			}
1850 		}
1851 
1852 		mutex_unlock(&context->io_mutex);
1853 	}
1854 }
1855 
1856 
1857 /*!	\brief Gets the root node of the current IO context.
1858 	If \a kernel is \c true, the kernel IO context will be used.
1859 	The caller obtains a reference to the returned node.
1860 */
1861 struct vnode*
1862 get_root_vnode(bool kernel)
1863 {
1864 	if (!kernel) {
1865 		// Get current working directory from io context
1866 		struct io_context* context = get_current_io_context(kernel);
1867 
1868 		mutex_lock(&sIOContextRootLock);
1869 
1870 		struct vnode* root = context->root;
1871 		if (root != NULL)
1872 			inc_vnode_ref_count(root);
1873 
1874 		mutex_unlock(&sIOContextRootLock);
1875 
1876 		if (root != NULL)
1877 			return root;
1878 
1879 		// That should never happen.
1880 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1881 			"root\n", team_get_current_team_id());
1882 	}
1883 
1884 	inc_vnode_ref_count(sRoot);
1885 	return sRoot;
1886 }
1887 
1888 
1889 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1890 		   by.
1891 
1892 	Given an arbitrary vnode, the function checks, whether the node is covered
1893 	by the root of a volume. If it is the function obtains a reference to the
1894 	volume root node and returns it.
1895 
1896 	\param vnode The vnode in question.
1897 	\return The volume root vnode the vnode cover is covered by, if it is
1898 			indeed a mount point, or \c NULL otherwise.
1899 */
1900 static struct vnode*
1901 resolve_mount_point_to_volume_root(struct vnode* vnode)
1902 {
1903 	if (!vnode)
1904 		return NULL;
1905 
1906 	struct vnode* volumeRoot = NULL;
1907 
1908 	rw_lock_read_lock(&sVnodeLock);
1909 
1910 	if (vnode->covered_by) {
1911 		volumeRoot = vnode->covered_by;
1912 		inc_vnode_ref_count(volumeRoot);
1913 	}
1914 
1915 	rw_lock_read_unlock(&sVnodeLock);
1916 
1917 	return volumeRoot;
1918 }
1919 
1920 
1921 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1922 		   by.
1923 
1924 	Given an arbitrary vnode (identified by mount and node ID), the function
1925 	checks, whether the node is covered by the root of a volume. If it is the
1926 	function returns the mount and node ID of the volume root node. Otherwise
1927 	it simply returns the supplied mount and node ID.
1928 
1929 	In case of error (e.g. the supplied node could not be found) the variables
1930 	for storing the resolved mount and node ID remain untouched and an error
1931 	code is returned.
1932 
1933 	\param mountID The mount ID of the vnode in question.
1934 	\param nodeID The node ID of the vnode in question.
1935 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1936 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1937 	\return
1938 	- \c B_OK, if everything went fine,
1939 	- another error code, if something went wrong.
1940 */
1941 status_t
1942 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1943 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1944 {
1945 	// get the node
1946 	struct vnode* node;
1947 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1948 	if (error != B_OK)
1949 		return error;
1950 
1951 	// resolve the node
1952 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
1953 	if (resolvedNode) {
1954 		put_vnode(node);
1955 		node = resolvedNode;
1956 	}
1957 
1958 	// set the return values
1959 	*resolvedMountID = node->device;
1960 	*resolvedNodeID = node->id;
1961 
1962 	put_vnode(node);
1963 
1964 	return B_OK;
1965 }
1966 
1967 
1968 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1969 
1970 	Given an arbitrary vnode, the function checks, whether the node is the
1971 	root of a volume. If it is (and if it is not "/"), the function obtains
1972 	a reference to the underlying mount point node and returns it.
1973 
1974 	\param vnode The vnode in question (caller must have a reference).
1975 	\return The mount point vnode the vnode covers, if it is indeed a volume
1976 			root and not "/", or \c NULL otherwise.
1977 */
1978 static struct vnode*
1979 resolve_volume_root_to_mount_point(struct vnode* vnode)
1980 {
1981 	if (!vnode)
1982 		return NULL;
1983 
1984 	struct vnode* mountPoint = NULL;
1985 
1986 	struct fs_mount* mount = vnode->mount;
1987 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1988 		mountPoint = mount->covers_vnode;
1989 		inc_vnode_ref_count(mountPoint);
1990 	}
1991 
1992 	return mountPoint;
1993 }
1994 
1995 
1996 /*!	\brief Gets the directory path and leaf name for a given path.
1997 
1998 	The supplied \a path is transformed to refer to the directory part of
1999 	the entry identified by the original path, and into the buffer \a filename
2000 	the leaf name of the original entry is written.
2001 	Neither the returned path nor the leaf name can be expected to be
2002 	canonical.
2003 
2004 	\param path The path to be analyzed. Must be able to store at least one
2005 		   additional character.
2006 	\param filename The buffer into which the leaf name will be written.
2007 		   Must be of size B_FILE_NAME_LENGTH at least.
2008 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2009 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2010 		   if the given path name is empty.
2011 */
2012 static status_t
2013 get_dir_path_and_leaf(char* path, char* filename)
2014 {
2015 	if (*path == '\0')
2016 		return B_ENTRY_NOT_FOUND;
2017 
2018 	char* last = strrchr(path, '/');
2019 		// '/' are not allowed in file names!
2020 
2021 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2022 
2023 	if (last == NULL) {
2024 		// this path is single segment with no '/' in it
2025 		// ex. "foo"
2026 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2027 			return B_NAME_TOO_LONG;
2028 
2029 		strcpy(path, ".");
2030 	} else {
2031 		last++;
2032 		if (last[0] == '\0') {
2033 			// special case: the path ends in one or more '/' - remove them
2034 			while (*--last == '/' && last != path);
2035 			last[1] = '\0';
2036 
2037 			if (last == path && last[0] == '/') {
2038 				// This path points to the root of the file system
2039 				strcpy(filename, ".");
2040 				return B_OK;
2041 			}
2042 			for (; last != path && *(last - 1) != '/'; last--);
2043 				// rewind to the start of the leaf before the '/'
2044 		}
2045 
2046 		// normal leaf: replace the leaf portion of the path with a '.'
2047 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2048 			return B_NAME_TOO_LONG;
2049 
2050 		last[0] = '.';
2051 		last[1] = '\0';
2052 	}
2053 	return B_OK;
2054 }
2055 
2056 
2057 static status_t
2058 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2059 	bool traverse, bool kernel, struct vnode** _vnode)
2060 {
2061 	char clonedName[B_FILE_NAME_LENGTH + 1];
2062 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2063 		return B_NAME_TOO_LONG;
2064 
2065 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2066 	struct vnode* directory;
2067 
2068 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2069 	if (status < 0)
2070 		return status;
2071 
2072 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2073 		_vnode, NULL);
2074 }
2075 
2076 
2077 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2078 	and returns the respective vnode.
2079 	On success a reference to the vnode is acquired for the caller.
2080 */
2081 static status_t
2082 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2083 {
2084 	ino_t id;
2085 
2086 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2087 		return get_vnode(dir->device, id, _vnode, true, false);
2088 
2089 	status_t status = FS_CALL(dir, lookup, name, &id);
2090 	if (status != B_OK)
2091 		return status;
2092 
2093 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2094 	// have a reference and just need to look the node up.
2095 	rw_lock_read_lock(&sVnodeLock);
2096 	*_vnode = lookup_vnode(dir->device, id);
2097 	rw_lock_read_unlock(&sVnodeLock);
2098 
2099 	if (*_vnode == NULL) {
2100 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2101 			"0x%Lx)\n", dir->device, id);
2102 		return B_ENTRY_NOT_FOUND;
2103 	}
2104 
2105 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2106 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2107 //		(*_vnode)->mount->id, (*_vnode)->id);
2108 
2109 	return B_OK;
2110 }
2111 
2112 
2113 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2114 	\a path must not be NULL.
2115 	If it returns successfully, \a path contains the name of the last path
2116 	component. This function clobbers the buffer pointed to by \a path only
2117 	if it does contain more than one component.
2118 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2119 	it is successful or not!
2120 */
2121 static status_t
2122 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2123 	int count, struct io_context* ioContext, struct vnode** _vnode,
2124 	ino_t* _parentID)
2125 {
2126 	status_t status = B_OK;
2127 	ino_t lastParentID = vnode->id;
2128 
2129 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2130 
2131 	if (path == NULL) {
2132 		put_vnode(vnode);
2133 		return B_BAD_VALUE;
2134 	}
2135 
2136 	if (*path == '\0') {
2137 		put_vnode(vnode);
2138 		return B_ENTRY_NOT_FOUND;
2139 	}
2140 
2141 	while (true) {
2142 		struct vnode* nextVnode;
2143 		char* nextPath;
2144 
2145 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2146 			path));
2147 
2148 		// done?
2149 		if (path[0] == '\0')
2150 			break;
2151 
2152 		// walk to find the next path component ("path" will point to a single
2153 		// path component), and filter out multiple slashes
2154 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2155 				nextPath++);
2156 
2157 		if (*nextPath == '/') {
2158 			*nextPath = '\0';
2159 			do
2160 				nextPath++;
2161 			while (*nextPath == '/');
2162 		}
2163 
2164 		// See if the '..' is at the root of a mount and move to the covered
2165 		// vnode so we pass the '..' path to the underlying filesystem.
2166 		// Also prevent breaking the root of the IO context.
2167 		if (strcmp("..", path) == 0) {
2168 			if (vnode == ioContext->root) {
2169 				// Attempted prison break! Keep it contained.
2170 				path = nextPath;
2171 				continue;
2172 			} else if (vnode->mount->root_vnode == vnode
2173 				&& vnode->mount->covers_vnode) {
2174 				nextVnode = vnode->mount->covers_vnode;
2175 				inc_vnode_ref_count(nextVnode);
2176 				put_vnode(vnode);
2177 				vnode = nextVnode;
2178 			}
2179 		}
2180 
2181 		// check if vnode is really a directory
2182 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2183 			status = B_NOT_A_DIRECTORY;
2184 
2185 		// Check if we have the right to search the current directory vnode.
2186 		// If a file system doesn't have the access() function, we assume that
2187 		// searching a directory is always allowed
2188 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2189 			status = FS_CALL(vnode, access, X_OK);
2190 
2191 		// Tell the filesystem to get the vnode of this path component (if we
2192 		// got the permission from the call above)
2193 		if (status == B_OK)
2194 			status = lookup_dir_entry(vnode, path, &nextVnode);
2195 
2196 		if (status != B_OK) {
2197 			put_vnode(vnode);
2198 			return status;
2199 		}
2200 
2201 		// If the new node is a symbolic link, resolve it (if we've been told
2202 		// to do it)
2203 		if (S_ISLNK(nextVnode->Type())
2204 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2205 			size_t bufferSize;
2206 			char* buffer;
2207 
2208 			TRACE(("traverse link\n"));
2209 
2210 			// it's not exactly nice style using goto in this way, but hey,
2211 			// it works :-/
2212 			if (count + 1 > B_MAX_SYMLINKS) {
2213 				status = B_LINK_LIMIT;
2214 				goto resolve_link_error;
2215 			}
2216 
2217 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2218 			if (buffer == NULL) {
2219 				status = B_NO_MEMORY;
2220 				goto resolve_link_error;
2221 			}
2222 
2223 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2224 				bufferSize--;
2225 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2226 				// null-terminate
2227 				if (status >= 0)
2228 					buffer[bufferSize] = '\0';
2229 			} else
2230 				status = B_BAD_VALUE;
2231 
2232 			if (status != B_OK) {
2233 				free(buffer);
2234 
2235 		resolve_link_error:
2236 				put_vnode(vnode);
2237 				put_vnode(nextVnode);
2238 
2239 				return status;
2240 			}
2241 			put_vnode(nextVnode);
2242 
2243 			// Check if we start from the root directory or the current
2244 			// directory ("vnode" still points to that one).
2245 			// Cut off all leading slashes if it's the root directory
2246 			path = buffer;
2247 			bool absoluteSymlink = false;
2248 			if (path[0] == '/') {
2249 				// we don't need the old directory anymore
2250 				put_vnode(vnode);
2251 
2252 				while (*++path == '/')
2253 					;
2254 
2255 				mutex_lock(&sIOContextRootLock);
2256 				vnode = ioContext->root;
2257 				inc_vnode_ref_count(vnode);
2258 				mutex_unlock(&sIOContextRootLock);
2259 
2260 				absoluteSymlink = true;
2261 			}
2262 
2263 			inc_vnode_ref_count(vnode);
2264 				// balance the next recursion - we will decrement the
2265 				// ref_count of the vnode, no matter if we succeeded or not
2266 
2267 			if (absoluteSymlink && *path == '\0') {
2268 				// symlink was just "/"
2269 				nextVnode = vnode;
2270 			} else {
2271 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2272 					ioContext, &nextVnode, &lastParentID);
2273 			}
2274 
2275 			free(buffer);
2276 
2277 			if (status != B_OK) {
2278 				put_vnode(vnode);
2279 				return status;
2280 			}
2281 		} else
2282 			lastParentID = vnode->id;
2283 
2284 		// decrease the ref count on the old dir we just looked up into
2285 		put_vnode(vnode);
2286 
2287 		path = nextPath;
2288 		vnode = nextVnode;
2289 
2290 		// see if we hit a mount point
2291 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2292 		if (mountPoint) {
2293 			put_vnode(vnode);
2294 			vnode = mountPoint;
2295 		}
2296 	}
2297 
2298 	*_vnode = vnode;
2299 	if (_parentID)
2300 		*_parentID = lastParentID;
2301 
2302 	return B_OK;
2303 }
2304 
2305 
2306 static status_t
2307 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2308 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2309 {
2310 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2311 		get_current_io_context(kernel), _vnode, _parentID);
2312 }
2313 
2314 
2315 static status_t
2316 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2317 	ino_t* _parentID, bool kernel)
2318 {
2319 	struct vnode* start = NULL;
2320 
2321 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2322 
2323 	if (!path)
2324 		return B_BAD_VALUE;
2325 
2326 	if (*path == '\0')
2327 		return B_ENTRY_NOT_FOUND;
2328 
2329 	// figure out if we need to start at root or at cwd
2330 	if (*path == '/') {
2331 		if (sRoot == NULL) {
2332 			// we're a bit early, aren't we?
2333 			return B_ERROR;
2334 		}
2335 
2336 		while (*++path == '/')
2337 			;
2338 		start = get_root_vnode(kernel);
2339 
2340 		if (*path == '\0') {
2341 			*_vnode = start;
2342 			return B_OK;
2343 		}
2344 
2345 	} else {
2346 		struct io_context* context = get_current_io_context(kernel);
2347 
2348 		mutex_lock(&context->io_mutex);
2349 		start = context->cwd;
2350 		if (start != NULL)
2351 			inc_vnode_ref_count(start);
2352 		mutex_unlock(&context->io_mutex);
2353 
2354 		if (start == NULL)
2355 			return B_ERROR;
2356 	}
2357 
2358 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2359 		_parentID);
2360 }
2361 
2362 
2363 /*! Returns the vnode in the next to last segment of the path, and returns
2364 	the last portion in filename.
2365 	The path buffer must be able to store at least one additional character.
2366 */
2367 static status_t
2368 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2369 	bool kernel)
2370 {
2371 	status_t status = get_dir_path_and_leaf(path, filename);
2372 	if (status != B_OK)
2373 		return status;
2374 
2375 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2376 }
2377 
2378 
2379 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2380 		   to by a FD + path pair.
2381 
2382 	\a path must be given in either case. \a fd might be omitted, in which
2383 	case \a path is either an absolute path or one relative to the current
2384 	directory. If both a supplied and \a path is relative it is reckoned off
2385 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2386 	ignored.
2387 
2388 	The caller has the responsibility to call put_vnode() on the returned
2389 	directory vnode.
2390 
2391 	\param fd The FD. May be < 0.
2392 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2393 	       is modified by this function. It must have at least room for a
2394 	       string one character longer than the path it contains.
2395 	\param _vnode A pointer to a variable the directory vnode shall be written
2396 		   into.
2397 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2398 		   the leaf name of the specified entry will be written.
2399 	\param kernel \c true, if invoked from inside the kernel, \c false if
2400 		   invoked from userland.
2401 	\return \c B_OK, if everything went fine, another error code otherwise.
2402 */
2403 static status_t
2404 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2405 	char* filename, bool kernel)
2406 {
2407 	if (!path)
2408 		return B_BAD_VALUE;
2409 	if (*path == '\0')
2410 		return B_ENTRY_NOT_FOUND;
2411 	if (fd < 0)
2412 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2413 
2414 	status_t status = get_dir_path_and_leaf(path, filename);
2415 	if (status != B_OK)
2416 		return status;
2417 
2418 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2419 }
2420 
2421 
2422 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2423 		   to by a vnode + path pair.
2424 
2425 	\a path must be given in either case. \a vnode might be omitted, in which
2426 	case \a path is either an absolute path or one relative to the current
2427 	directory. If both a supplied and \a path is relative it is reckoned off
2428 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2429 	ignored.
2430 
2431 	The caller has the responsibility to call put_vnode() on the returned
2432 	directory vnode.
2433 
2434 	\param vnode The vnode. May be \c NULL.
2435 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2436 	       is modified by this function. It must have at least room for a
2437 	       string one character longer than the path it contains.
2438 	\param _vnode A pointer to a variable the directory vnode shall be written
2439 		   into.
2440 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2441 		   the leaf name of the specified entry will be written.
2442 	\param kernel \c true, if invoked from inside the kernel, \c false if
2443 		   invoked from userland.
2444 	\return \c B_OK, if everything went fine, another error code otherwise.
2445 */
2446 static status_t
2447 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2448 	struct vnode** _vnode, char* filename, bool kernel)
2449 {
2450 	if (!path)
2451 		return B_BAD_VALUE;
2452 	if (*path == '\0')
2453 		return B_ENTRY_NOT_FOUND;
2454 	if (vnode == NULL || path[0] == '/')
2455 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2456 
2457 	status_t status = get_dir_path_and_leaf(path, filename);
2458 	if (status != B_OK)
2459 		return status;
2460 
2461 	inc_vnode_ref_count(vnode);
2462 		// vnode_path_to_vnode() always decrements the ref count
2463 
2464 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2465 }
2466 
2467 
2468 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2469 */
2470 static status_t
2471 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2472 	size_t bufferSize, struct io_context* ioContext)
2473 {
2474 	if (bufferSize < sizeof(struct dirent))
2475 		return B_BAD_VALUE;
2476 
2477 	// See if vnode is the root of a mount and move to the covered
2478 	// vnode so we get the underlying file system
2479 	VNodePutter vnodePutter;
2480 	if (vnode->mount->root_vnode == vnode
2481 		&& vnode->mount->covers_vnode != NULL) {
2482 		vnode = vnode->mount->covers_vnode;
2483 		inc_vnode_ref_count(vnode);
2484 		vnodePutter.SetTo(vnode);
2485 	}
2486 
2487 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2488 		// The FS supports getting the name of a vnode.
2489 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2490 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2491 			return B_OK;
2492 	}
2493 
2494 	// The FS doesn't support getting the name of a vnode. So we search the
2495 	// parent directory for the vnode, if the caller let us.
2496 
2497 	if (parent == NULL)
2498 		return B_NOT_SUPPORTED;
2499 
2500 	void* cookie;
2501 
2502 	status_t status = FS_CALL(parent, open_dir, &cookie);
2503 	if (status >= B_OK) {
2504 		while (true) {
2505 			uint32 num = 1;
2506 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2507 				&num);
2508 			if (status != B_OK)
2509 				break;
2510 			if (num == 0) {
2511 				status = B_ENTRY_NOT_FOUND;
2512 				break;
2513 			}
2514 
2515 			if (vnode->id == buffer->d_ino) {
2516 				// found correct entry!
2517 				break;
2518 			}
2519 		}
2520 
2521 		FS_CALL(vnode, close_dir, cookie);
2522 		FS_CALL(vnode, free_dir_cookie, cookie);
2523 	}
2524 	return status;
2525 }
2526 
2527 
2528 static status_t
2529 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2530 	size_t nameSize, bool kernel)
2531 {
2532 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2533 	struct dirent* dirent = (struct dirent*)buffer;
2534 
2535 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2536 		get_current_io_context(kernel));
2537 	if (status != B_OK)
2538 		return status;
2539 
2540 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2541 		return B_BUFFER_OVERFLOW;
2542 
2543 	return B_OK;
2544 }
2545 
2546 
2547 /*!	Gets the full path to a given directory vnode.
2548 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2549 	file system doesn't support this call, it will fall back to iterating
2550 	through the parent directory to get the name of the child.
2551 
2552 	To protect against circular loops, it supports a maximum tree depth
2553 	of 256 levels.
2554 
2555 	Note that the path may not be correct the time this function returns!
2556 	It doesn't use any locking to prevent returning the correct path, as
2557 	paths aren't safe anyway: the path to a file can change at any time.
2558 
2559 	It might be a good idea, though, to check if the returned path exists
2560 	in the calling function (it's not done here because of efficiency)
2561 */
2562 static status_t
2563 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2564 	bool kernel)
2565 {
2566 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2567 
2568 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2569 		return B_BAD_VALUE;
2570 
2571 	if (!S_ISDIR(vnode->Type()))
2572 		return B_NOT_A_DIRECTORY;
2573 
2574 	char* path = buffer;
2575 	int32 insert = bufferSize;
2576 	int32 maxLevel = 256;
2577 	int32 length;
2578 	status_t status;
2579 	struct io_context* ioContext = get_current_io_context(kernel);
2580 
2581 	// we don't use get_vnode() here because this call is more
2582 	// efficient and does all we need from get_vnode()
2583 	inc_vnode_ref_count(vnode);
2584 
2585 	if (vnode != ioContext->root) {
2586 		// we don't hit the IO context root
2587 		// resolve a volume root to its mount point
2588 		struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2589 		if (mountPoint) {
2590 			put_vnode(vnode);
2591 			vnode = mountPoint;
2592 		}
2593 	}
2594 
2595 	path[--insert] = '\0';
2596 		// the path is filled right to left
2597 
2598 	while (true) {
2599 		// the name buffer is also used for fs_read_dir()
2600 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2601 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2602 		struct vnode* parentVnode;
2603 		ino_t parentID;
2604 
2605 		// lookup the parent vnode
2606 		if (vnode == ioContext->root) {
2607 			// we hit the IO context root
2608 			parentVnode = vnode;
2609 			inc_vnode_ref_count(vnode);
2610 		} else {
2611 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2612 			if (status != B_OK)
2613 				goto out;
2614 		}
2615 
2616 		// get the node's name
2617 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2618 			sizeof(nameBuffer), ioContext);
2619 
2620 		if (vnode != ioContext->root) {
2621 			// we don't hit the IO context root
2622 			// resolve a volume root to its mount point
2623 			struct vnode* mountPoint
2624 				= resolve_volume_root_to_mount_point(parentVnode);
2625 			if (mountPoint) {
2626 				put_vnode(parentVnode);
2627 				parentVnode = mountPoint;
2628 				parentID = parentVnode->id;
2629 			}
2630 		}
2631 
2632 		bool hitRoot = (parentVnode == vnode);
2633 
2634 		// release the current vnode, we only need its parent from now on
2635 		put_vnode(vnode);
2636 		vnode = parentVnode;
2637 
2638 		if (status != B_OK)
2639 			goto out;
2640 
2641 		if (hitRoot) {
2642 			// we have reached "/", which means we have constructed the full
2643 			// path
2644 			break;
2645 		}
2646 
2647 		// TODO: add an explicit check for loops in about 10 levels to do
2648 		// real loop detection
2649 
2650 		// don't go deeper as 'maxLevel' to prevent circular loops
2651 		if (maxLevel-- < 0) {
2652 			status = B_LINK_LIMIT;
2653 			goto out;
2654 		}
2655 
2656 		// add the name in front of the current path
2657 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2658 		length = strlen(name);
2659 		insert -= length;
2660 		if (insert <= 0) {
2661 			status = B_RESULT_NOT_REPRESENTABLE;
2662 			goto out;
2663 		}
2664 		memcpy(path + insert, name, length);
2665 		path[--insert] = '/';
2666 	}
2667 
2668 	// the root dir will result in an empty path: fix it
2669 	if (path[insert] == '\0')
2670 		path[--insert] = '/';
2671 
2672 	TRACE(("  path is: %s\n", path + insert));
2673 
2674 	// move the path to the start of the buffer
2675 	length = bufferSize - insert;
2676 	memmove(buffer, path + insert, length);
2677 
2678 out:
2679 	put_vnode(vnode);
2680 	return status;
2681 }
2682 
2683 
2684 /*!	Checks the length of every path component, and adds a '.'
2685 	if the path ends in a slash.
2686 	The given path buffer must be able to store at least one
2687 	additional character.
2688 */
2689 static status_t
2690 check_path(char* to)
2691 {
2692 	int32 length = 0;
2693 
2694 	// check length of every path component
2695 
2696 	while (*to) {
2697 		char* begin;
2698 		if (*to == '/')
2699 			to++, length++;
2700 
2701 		begin = to;
2702 		while (*to != '/' && *to)
2703 			to++, length++;
2704 
2705 		if (to - begin > B_FILE_NAME_LENGTH)
2706 			return B_NAME_TOO_LONG;
2707 	}
2708 
2709 	if (length == 0)
2710 		return B_ENTRY_NOT_FOUND;
2711 
2712 	// complete path if there is a slash at the end
2713 
2714 	if (*(to - 1) == '/') {
2715 		if (length > B_PATH_NAME_LENGTH - 2)
2716 			return B_NAME_TOO_LONG;
2717 
2718 		to[0] = '.';
2719 		to[1] = '\0';
2720 	}
2721 
2722 	return B_OK;
2723 }
2724 
2725 
2726 static struct file_descriptor*
2727 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2728 {
2729 	struct file_descriptor* descriptor
2730 		= get_fd(get_current_io_context(kernel), fd);
2731 	if (descriptor == NULL)
2732 		return NULL;
2733 
2734 	struct vnode* vnode = fd_vnode(descriptor);
2735 	if (vnode == NULL) {
2736 		put_fd(descriptor);
2737 		return NULL;
2738 	}
2739 
2740 	// ToDo: when we can close a file descriptor at any point, investigate
2741 	//	if this is still valid to do (accessing the vnode without ref_count
2742 	//	or locking)
2743 	*_vnode = vnode;
2744 	return descriptor;
2745 }
2746 
2747 
2748 static struct vnode*
2749 get_vnode_from_fd(int fd, bool kernel)
2750 {
2751 	struct file_descriptor* descriptor;
2752 	struct vnode* vnode;
2753 
2754 	descriptor = get_fd(get_current_io_context(kernel), fd);
2755 	if (descriptor == NULL)
2756 		return NULL;
2757 
2758 	vnode = fd_vnode(descriptor);
2759 	if (vnode != NULL)
2760 		inc_vnode_ref_count(vnode);
2761 
2762 	put_fd(descriptor);
2763 	return vnode;
2764 }
2765 
2766 
2767 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2768 	only the path will be considered. In this case, the \a path must not be
2769 	NULL.
2770 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2771 	and should be NULL for files.
2772 */
2773 static status_t
2774 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2775 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2776 {
2777 	if (fd < 0 && !path)
2778 		return B_BAD_VALUE;
2779 
2780 	if (path != NULL && *path == '\0')
2781 		return B_ENTRY_NOT_FOUND;
2782 
2783 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2784 		// no FD or absolute path
2785 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2786 	}
2787 
2788 	// FD only, or FD + relative path
2789 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2790 	if (!vnode)
2791 		return B_FILE_ERROR;
2792 
2793 	if (path != NULL) {
2794 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2795 			_vnode, _parentID);
2796 	}
2797 
2798 	// there is no relative path to take into account
2799 
2800 	*_vnode = vnode;
2801 	if (_parentID)
2802 		*_parentID = -1;
2803 
2804 	return B_OK;
2805 }
2806 
2807 
2808 static int
2809 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2810 	void* cookie, int openMode, bool kernel)
2811 {
2812 	struct file_descriptor* descriptor;
2813 	int fd;
2814 
2815 	// If the vnode is locked, we don't allow creating a new file/directory
2816 	// file_descriptor for it
2817 	if (vnode && vnode->mandatory_locked_by != NULL
2818 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2819 		return B_BUSY;
2820 
2821 	descriptor = alloc_fd();
2822 	if (!descriptor)
2823 		return B_NO_MEMORY;
2824 
2825 	if (vnode)
2826 		descriptor->u.vnode = vnode;
2827 	else
2828 		descriptor->u.mount = mount;
2829 	descriptor->cookie = cookie;
2830 
2831 	switch (type) {
2832 		// vnode types
2833 		case FDTYPE_FILE:
2834 			descriptor->ops = &sFileOps;
2835 			break;
2836 		case FDTYPE_DIR:
2837 			descriptor->ops = &sDirectoryOps;
2838 			break;
2839 		case FDTYPE_ATTR:
2840 			descriptor->ops = &sAttributeOps;
2841 			break;
2842 		case FDTYPE_ATTR_DIR:
2843 			descriptor->ops = &sAttributeDirectoryOps;
2844 			break;
2845 
2846 		// mount types
2847 		case FDTYPE_INDEX_DIR:
2848 			descriptor->ops = &sIndexDirectoryOps;
2849 			break;
2850 		case FDTYPE_QUERY:
2851 			descriptor->ops = &sQueryOps;
2852 			break;
2853 
2854 		default:
2855 			panic("get_new_fd() called with unknown type %d\n", type);
2856 			break;
2857 	}
2858 	descriptor->type = type;
2859 	descriptor->open_mode = openMode;
2860 
2861 	io_context* context = get_current_io_context(kernel);
2862 	fd = new_fd(context, descriptor);
2863 	if (fd < 0) {
2864 		free(descriptor);
2865 		return B_NO_MORE_FDS;
2866 	}
2867 
2868 	mutex_lock(&context->io_mutex);
2869 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2870 	mutex_unlock(&context->io_mutex);
2871 
2872 	return fd;
2873 }
2874 
2875 
2876 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2877 	vfs_normalize_path(). See there for more documentation.
2878 */
2879 static status_t
2880 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2881 {
2882 	VNodePutter dirPutter;
2883 	struct vnode* dir = NULL;
2884 	status_t error;
2885 
2886 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2887 		// get dir vnode + leaf name
2888 		struct vnode* nextDir;
2889 		char leaf[B_FILE_NAME_LENGTH];
2890 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2891 		if (error != B_OK)
2892 			return error;
2893 
2894 		dir = nextDir;
2895 		strcpy(path, leaf);
2896 		dirPutter.SetTo(dir);
2897 
2898 		// get file vnode, if we shall resolve links
2899 		bool fileExists = false;
2900 		struct vnode* fileVnode;
2901 		VNodePutter fileVnodePutter;
2902 		if (traverseLink) {
2903 			inc_vnode_ref_count(dir);
2904 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2905 					NULL) == B_OK) {
2906 				fileVnodePutter.SetTo(fileVnode);
2907 				fileExists = true;
2908 			}
2909 		}
2910 
2911 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2912 			// we're done -- construct the path
2913 			bool hasLeaf = true;
2914 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2915 				// special cases "." and ".." -- get the dir, forget the leaf
2916 				inc_vnode_ref_count(dir);
2917 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2918 					&nextDir, NULL);
2919 				if (error != B_OK)
2920 					return error;
2921 				dir = nextDir;
2922 				dirPutter.SetTo(dir);
2923 				hasLeaf = false;
2924 			}
2925 
2926 			// get the directory path
2927 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2928 			if (error != B_OK)
2929 				return error;
2930 
2931 			// append the leaf name
2932 			if (hasLeaf) {
2933 				// insert a directory separator if this is not the file system
2934 				// root
2935 				if ((strcmp(path, "/") != 0
2936 					&& strlcat(path, "/", pathSize) >= pathSize)
2937 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2938 					return B_NAME_TOO_LONG;
2939 				}
2940 			}
2941 
2942 			return B_OK;
2943 		}
2944 
2945 		// read link
2946 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2947 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2948 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2949 			if (error != B_OK)
2950 				return error;
2951 			path[bufferSize] = '\0';
2952 		} else
2953 			return B_BAD_VALUE;
2954 	}
2955 
2956 	return B_LINK_LIMIT;
2957 }
2958 
2959 
2960 #ifdef ADD_DEBUGGER_COMMANDS
2961 
2962 
2963 static void
2964 _dump_advisory_locking(advisory_locking* locking)
2965 {
2966 	if (locking == NULL)
2967 		return;
2968 
2969 	kprintf("   lock:        %ld", locking->lock);
2970 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2971 
2972 	int32 index = 0;
2973 	LockList::Iterator iterator = locking->locks.GetIterator();
2974 	while (iterator.HasNext()) {
2975 		struct advisory_lock* lock = iterator.Next();
2976 
2977 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2978 		kprintf("        start:  %Ld\n", lock->start);
2979 		kprintf("        end:    %Ld\n", lock->end);
2980 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2981 	}
2982 }
2983 
2984 
2985 static void
2986 _dump_mount(struct fs_mount* mount)
2987 {
2988 	kprintf("MOUNT: %p\n", mount);
2989 	kprintf(" id:            %ld\n", mount->id);
2990 	kprintf(" device_name:   %s\n", mount->device_name);
2991 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2992 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2993 	kprintf(" partition:     %p\n", mount->partition);
2994 	kprintf(" lock:          %p\n", &mount->rlock);
2995 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2996 		mount->owns_file_device ? " owns_file_device" : "");
2997 
2998 	fs_volume* volume = mount->volume;
2999 	while (volume != NULL) {
3000 		kprintf(" volume %p:\n", volume);
3001 		kprintf("  layer:            %ld\n", volume->layer);
3002 		kprintf("  private_volume:   %p\n", volume->private_volume);
3003 		kprintf("  ops:              %p\n", volume->ops);
3004 		kprintf("  file_system:      %p\n", volume->file_system);
3005 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3006 		volume = volume->super_volume;
3007 	}
3008 
3009 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3010 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3011 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
3012 	set_debug_variable("_partition", (addr_t)mount->partition);
3013 }
3014 
3015 
3016 static bool
3017 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3018 	const char* name)
3019 {
3020 	bool insertSlash = buffer[bufferSize] != '\0';
3021 	size_t nameLength = strlen(name);
3022 
3023 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3024 		return false;
3025 
3026 	if (insertSlash)
3027 		buffer[--bufferSize] = '/';
3028 
3029 	bufferSize -= nameLength;
3030 	memcpy(buffer + bufferSize, name, nameLength);
3031 
3032 	return true;
3033 }
3034 
3035 
3036 static bool
3037 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3038 	ino_t nodeID)
3039 {
3040 	if (bufferSize == 0)
3041 		return false;
3042 
3043 	bool insertSlash = buffer[bufferSize] != '\0';
3044 	if (insertSlash)
3045 		buffer[--bufferSize] = '/';
3046 
3047 	size_t size = snprintf(buffer, bufferSize,
3048 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3049 	if (size > bufferSize) {
3050 		if (insertSlash)
3051 			bufferSize++;
3052 		return false;
3053 	}
3054 
3055 	if (size < bufferSize)
3056 		memmove(buffer + bufferSize - size, buffer, size);
3057 
3058 	bufferSize -= size;
3059 	return true;
3060 }
3061 
3062 
3063 static char*
3064 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3065 	bool& _truncated)
3066 {
3067 	// null-terminate the path
3068 	buffer[--bufferSize] = '\0';
3069 
3070 	while (true) {
3071 		while (vnode->mount->root_vnode == vnode
3072 				&& vnode->mount->covers_vnode != NULL) {
3073 			vnode = vnode->mount->covers_vnode;
3074 		}
3075 
3076 		if (vnode == sRoot) {
3077 			_truncated = bufferSize == 0;
3078 			if (!_truncated)
3079 				buffer[--bufferSize] = '/';
3080 			return buffer + bufferSize;
3081 		}
3082 
3083 		// resolve the name
3084 		ino_t dirID;
3085 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3086 			vnode->id, dirID);
3087 		if (name == NULL) {
3088 			// Failed to resolve the name -- prepend "<dev,node>/".
3089 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3090 				vnode->mount->id, vnode->id);
3091 			return buffer + bufferSize;
3092 		}
3093 
3094 		// prepend the name
3095 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3096 			_truncated = true;
3097 			return buffer + bufferSize;
3098 		}
3099 
3100 		// resolve the directory node
3101 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3102 		if (nextVnode == NULL) {
3103 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3104 				vnode->mount->id, dirID);
3105 			return buffer + bufferSize;
3106 		}
3107 
3108 		vnode = nextVnode;
3109 	}
3110 }
3111 
3112 
3113 static void
3114 _dump_vnode(struct vnode* vnode, bool printPath)
3115 {
3116 	kprintf("VNODE: %p\n", vnode);
3117 	kprintf(" device:        %ld\n", vnode->device);
3118 	kprintf(" id:            %Ld\n", vnode->id);
3119 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3120 	kprintf(" private_node:  %p\n", vnode->private_node);
3121 	kprintf(" mount:         %p\n", vnode->mount);
3122 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3123 	kprintf(" cache:         %p\n", vnode->cache);
3124 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3125 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3126 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3127 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3128 
3129 	_dump_advisory_locking(vnode->advisory_locking);
3130 
3131 	if (printPath) {
3132 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3133 		if (buffer != NULL) {
3134 			bool truncated;
3135 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3136 				B_PATH_NAME_LENGTH, truncated);
3137 			if (path != NULL) {
3138 				kprintf(" path:          ");
3139 				if (truncated)
3140 					kputs("<truncated>/");
3141 				kputs(path);
3142 				kputs("\n");
3143 			} else
3144 				kprintf("Failed to resolve vnode path.\n");
3145 
3146 			debug_free(buffer);
3147 		} else
3148 			kprintf("Failed to allocate memory for constructing the path.\n");
3149 	}
3150 
3151 	set_debug_variable("_node", (addr_t)vnode->private_node);
3152 	set_debug_variable("_mount", (addr_t)vnode->mount);
3153 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3154 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3155 }
3156 
3157 
3158 static int
3159 dump_mount(int argc, char** argv)
3160 {
3161 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3162 		kprintf("usage: %s [id|address]\n", argv[0]);
3163 		return 0;
3164 	}
3165 
3166 	uint32 id = parse_expression(argv[1]);
3167 	struct fs_mount* mount = NULL;
3168 
3169 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3170 	if (mount == NULL) {
3171 		if (IS_USER_ADDRESS(id)) {
3172 			kprintf("fs_mount not found\n");
3173 			return 0;
3174 		}
3175 		mount = (fs_mount*)id;
3176 	}
3177 
3178 	_dump_mount(mount);
3179 	return 0;
3180 }
3181 
3182 
3183 static int
3184 dump_mounts(int argc, char** argv)
3185 {
3186 	if (argc != 1) {
3187 		kprintf("usage: %s\n", argv[0]);
3188 		return 0;
3189 	}
3190 
3191 	kprintf("address     id root       covers     cookie     fs_name\n");
3192 
3193 	struct hash_iterator iterator;
3194 	struct fs_mount* mount;
3195 
3196 	hash_open(sMountsTable, &iterator);
3197 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3198 			!= NULL) {
3199 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3200 			mount->covers_vnode, mount->volume->private_volume,
3201 			mount->volume->file_system_name);
3202 
3203 		fs_volume* volume = mount->volume;
3204 		while (volume->super_volume != NULL) {
3205 			volume = volume->super_volume;
3206 			kprintf("                                     %p %s\n",
3207 				volume->private_volume, volume->file_system_name);
3208 		}
3209 	}
3210 
3211 	hash_close(sMountsTable, &iterator, false);
3212 	return 0;
3213 }
3214 
3215 
3216 static int
3217 dump_vnode(int argc, char** argv)
3218 {
3219 	bool printPath = false;
3220 	int argi = 1;
3221 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3222 		printPath = true;
3223 		argi++;
3224 	}
3225 
3226 	if (argi >= argc || argi + 2 < argc) {
3227 		print_debugger_command_usage(argv[0]);
3228 		return 0;
3229 	}
3230 
3231 	struct vnode* vnode = NULL;
3232 
3233 	if (argi + 1 == argc) {
3234 		vnode = (struct vnode*)parse_expression(argv[argi]);
3235 		if (IS_USER_ADDRESS(vnode)) {
3236 			kprintf("invalid vnode address\n");
3237 			return 0;
3238 		}
3239 		_dump_vnode(vnode, printPath);
3240 		return 0;
3241 	}
3242 
3243 	struct hash_iterator iterator;
3244 	dev_t device = parse_expression(argv[argi]);
3245 	ino_t id = parse_expression(argv[argi + 1]);
3246 
3247 	hash_open(sVnodeTable, &iterator);
3248 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3249 		if (vnode->id != id || vnode->device != device)
3250 			continue;
3251 
3252 		_dump_vnode(vnode, printPath);
3253 	}
3254 
3255 	hash_close(sVnodeTable, &iterator, false);
3256 	return 0;
3257 }
3258 
3259 
3260 static int
3261 dump_vnodes(int argc, char** argv)
3262 {
3263 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3264 		kprintf("usage: %s [device]\n", argv[0]);
3265 		return 0;
3266 	}
3267 
3268 	// restrict dumped nodes to a certain device if requested
3269 	dev_t device = parse_expression(argv[1]);
3270 
3271 	struct hash_iterator iterator;
3272 	struct vnode* vnode;
3273 
3274 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3275 		"flags\n");
3276 
3277 	hash_open(sVnodeTable, &iterator);
3278 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3279 		if (vnode->device != device)
3280 			continue;
3281 
3282 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3283 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3284 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3285 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3286 	}
3287 
3288 	hash_close(sVnodeTable, &iterator, false);
3289 	return 0;
3290 }
3291 
3292 
3293 static int
3294 dump_vnode_caches(int argc, char** argv)
3295 {
3296 	struct hash_iterator iterator;
3297 	struct vnode* vnode;
3298 
3299 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3300 		kprintf("usage: %s [device]\n", argv[0]);
3301 		return 0;
3302 	}
3303 
3304 	// restrict dumped nodes to a certain device if requested
3305 	dev_t device = -1;
3306 	if (argc > 1)
3307 		device = parse_expression(argv[1]);
3308 
3309 	kprintf("address    dev     inode cache          size   pages\n");
3310 
3311 	hash_open(sVnodeTable, &iterator);
3312 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3313 		if (vnode->cache == NULL)
3314 			continue;
3315 		if (device != -1 && vnode->device != device)
3316 			continue;
3317 
3318 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3319 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3320 				/ B_PAGE_SIZE, vnode->cache->page_count);
3321 	}
3322 
3323 	hash_close(sVnodeTable, &iterator, false);
3324 	return 0;
3325 }
3326 
3327 
3328 int
3329 dump_io_context(int argc, char** argv)
3330 {
3331 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3332 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3333 		return 0;
3334 	}
3335 
3336 	struct io_context* context = NULL;
3337 
3338 	if (argc > 1) {
3339 		uint32 num = parse_expression(argv[1]);
3340 		if (IS_KERNEL_ADDRESS(num))
3341 			context = (struct io_context*)num;
3342 		else {
3343 			struct team* team = team_get_team_struct_locked(num);
3344 			if (team == NULL) {
3345 				kprintf("could not find team with ID %ld\n", num);
3346 				return 0;
3347 			}
3348 			context = (struct io_context*)team->io_context;
3349 		}
3350 	} else
3351 		context = get_current_io_context(true);
3352 
3353 	kprintf("I/O CONTEXT: %p\n", context);
3354 	kprintf(" root vnode:\t%p\n", context->root);
3355 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3356 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3357 	kprintf(" max fds:\t%lu\n", context->table_size);
3358 
3359 	if (context->num_used_fds)
3360 		kprintf("   no.  type         ops  ref  open  mode         pos"
3361 			"      cookie\n");
3362 
3363 	for (uint32 i = 0; i < context->table_size; i++) {
3364 		struct file_descriptor* fd = context->fds[i];
3365 		if (fd == NULL)
3366 			continue;
3367 
3368 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3369 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3370 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3371 			fd->pos, fd->cookie,
3372 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3373 				? "mount" : "vnode",
3374 			fd->u.vnode);
3375 	}
3376 
3377 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3378 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3379 
3380 	set_debug_variable("_cwd", (addr_t)context->cwd);
3381 
3382 	return 0;
3383 }
3384 
3385 
3386 int
3387 dump_vnode_usage(int argc, char** argv)
3388 {
3389 	if (argc != 1) {
3390 		kprintf("usage: %s\n", argv[0]);
3391 		return 0;
3392 	}
3393 
3394 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3395 		kMaxUnusedVnodes);
3396 
3397 	struct hash_iterator iterator;
3398 	hash_open(sVnodeTable, &iterator);
3399 
3400 	uint32 count = 0;
3401 	struct vnode* vnode;
3402 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3403 		count++;
3404 	}
3405 
3406 	hash_close(sVnodeTable, &iterator, false);
3407 
3408 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3409 	return 0;
3410 }
3411 
3412 #endif	// ADD_DEBUGGER_COMMANDS
3413 
3414 /*!	Clears an iovec array of physical pages.
3415 	Returns in \a _bytes the number of bytes successfully cleared.
3416 */
3417 static status_t
3418 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3419 {
3420 	size_t bytes = *_bytes;
3421 	size_t index = 0;
3422 
3423 	while (bytes > 0) {
3424 		size_t length = min_c(vecs[index].iov_len, bytes);
3425 
3426 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3427 			length);
3428 		if (status != B_OK) {
3429 			*_bytes -= bytes;
3430 			return status;
3431 		}
3432 
3433 		bytes -= length;
3434 	}
3435 
3436 	return B_OK;
3437 }
3438 
3439 
3440 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3441 	and calls the file system hooks to read/write the request to disk.
3442 */
3443 static status_t
3444 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3445 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3446 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3447 	bool doWrite)
3448 {
3449 	if (fileVecCount == 0) {
3450 		// There are no file vecs at this offset, so we're obviously trying
3451 		// to access the file outside of its bounds
3452 		return B_BAD_VALUE;
3453 	}
3454 
3455 	size_t numBytes = *_numBytes;
3456 	uint32 fileVecIndex;
3457 	size_t vecOffset = *_vecOffset;
3458 	uint32 vecIndex = *_vecIndex;
3459 	status_t status;
3460 	size_t size;
3461 
3462 	if (!doWrite && vecOffset == 0) {
3463 		// now directly read the data from the device
3464 		// the first file_io_vec can be read directly
3465 
3466 		if (fileVecs[0].length < numBytes)
3467 			size = fileVecs[0].length;
3468 		else
3469 			size = numBytes;
3470 
3471 		if (fileVecs[0].offset >= 0) {
3472 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3473 				&vecs[vecIndex], vecCount - vecIndex, &size);
3474 		} else {
3475 			// sparse read
3476 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3477 		}
3478 		if (status != B_OK)
3479 			return status;
3480 
3481 		// TODO: this is a work-around for buggy device drivers!
3482 		//	When our own drivers honour the length, we can:
3483 		//	a) also use this direct I/O for writes (otherwise, it would
3484 		//	   overwrite precious data)
3485 		//	b) panic if the term below is true (at least for writes)
3486 		if (size > fileVecs[0].length) {
3487 			//dprintf("warning: device driver %p doesn't respect total length "
3488 			//	"in read_pages() call!\n", ref->device);
3489 			size = fileVecs[0].length;
3490 		}
3491 
3492 		ASSERT(size <= fileVecs[0].length);
3493 
3494 		// If the file portion was contiguous, we're already done now
3495 		if (size == numBytes)
3496 			return B_OK;
3497 
3498 		// if we reached the end of the file, we can return as well
3499 		if (size != fileVecs[0].length) {
3500 			*_numBytes = size;
3501 			return B_OK;
3502 		}
3503 
3504 		fileVecIndex = 1;
3505 
3506 		// first, find out where we have to continue in our iovecs
3507 		for (; vecIndex < vecCount; vecIndex++) {
3508 			if (size < vecs[vecIndex].iov_len)
3509 				break;
3510 
3511 			size -= vecs[vecIndex].iov_len;
3512 		}
3513 
3514 		vecOffset = size;
3515 	} else {
3516 		fileVecIndex = 0;
3517 		size = 0;
3518 	}
3519 
3520 	// Too bad, let's process the rest of the file_io_vecs
3521 
3522 	size_t totalSize = size;
3523 	size_t bytesLeft = numBytes - size;
3524 
3525 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3526 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3527 		off_t fileOffset = fileVec.offset;
3528 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3529 
3530 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3531 
3532 		// process the complete fileVec
3533 		while (fileLeft > 0) {
3534 			iovec tempVecs[MAX_TEMP_IO_VECS];
3535 			uint32 tempCount = 0;
3536 
3537 			// size tracks how much of what is left of the current fileVec
3538 			// (fileLeft) has been assigned to tempVecs
3539 			size = 0;
3540 
3541 			// assign what is left of the current fileVec to the tempVecs
3542 			for (size = 0; size < fileLeft && vecIndex < vecCount
3543 					&& tempCount < MAX_TEMP_IO_VECS;) {
3544 				// try to satisfy one iovec per iteration (or as much as
3545 				// possible)
3546 
3547 				// bytes left of the current iovec
3548 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3549 				if (vecLeft == 0) {
3550 					vecOffset = 0;
3551 					vecIndex++;
3552 					continue;
3553 				}
3554 
3555 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3556 					vecIndex, vecOffset, size));
3557 
3558 				// actually available bytes
3559 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3560 
3561 				tempVecs[tempCount].iov_base
3562 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3563 				tempVecs[tempCount].iov_len = tempVecSize;
3564 				tempCount++;
3565 
3566 				size += tempVecSize;
3567 				vecOffset += tempVecSize;
3568 			}
3569 
3570 			size_t bytes = size;
3571 
3572 			if (fileOffset == -1) {
3573 				if (doWrite) {
3574 					panic("sparse write attempt: vnode %p", vnode);
3575 					status = B_IO_ERROR;
3576 				} else {
3577 					// sparse read
3578 					status = zero_pages(tempVecs, tempCount, &bytes);
3579 				}
3580 			} else if (doWrite) {
3581 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3582 					tempVecs, tempCount, &bytes);
3583 			} else {
3584 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3585 					tempVecs, tempCount, &bytes);
3586 			}
3587 			if (status != B_OK)
3588 				return status;
3589 
3590 			totalSize += bytes;
3591 			bytesLeft -= size;
3592 			if (fileOffset >= 0)
3593 				fileOffset += size;
3594 			fileLeft -= size;
3595 			//dprintf("-> file left = %Lu\n", fileLeft);
3596 
3597 			if (size != bytes || vecIndex >= vecCount) {
3598 				// there are no more bytes or iovecs, let's bail out
3599 				*_numBytes = totalSize;
3600 				return B_OK;
3601 			}
3602 		}
3603 	}
3604 
3605 	*_vecIndex = vecIndex;
3606 	*_vecOffset = vecOffset;
3607 	*_numBytes = totalSize;
3608 	return B_OK;
3609 }
3610 
3611 
3612 //	#pragma mark - public API for file systems
3613 
3614 
3615 extern "C" status_t
3616 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3617 	fs_vnode_ops* ops)
3618 {
3619 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3620 		volume, volume->id, vnodeID, privateNode));
3621 
3622 	if (privateNode == NULL)
3623 		return B_BAD_VALUE;
3624 
3625 	// create the node
3626 	bool nodeCreated;
3627 	struct vnode* vnode;
3628 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3629 		nodeCreated);
3630 	if (status != B_OK)
3631 		return status;
3632 
3633 	WriteLocker nodeLocker(sVnodeLock, true);
3634 		// create_new_vnode_and_lock() has locked for us
3635 
3636 	// file system integrity check:
3637 	// test if the vnode already exists and bail out if this is the case!
3638 	if (!nodeCreated) {
3639 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3640 			volume->id, vnodeID, privateNode, vnode->private_node);
3641 		return B_ERROR;
3642 	}
3643 
3644 	vnode->private_node = privateNode;
3645 	vnode->ops = ops;
3646 	vnode->SetUnpublished(true);
3647 
3648 	TRACE(("returns: %s\n", strerror(status)));
3649 
3650 	return status;
3651 }
3652 
3653 
3654 extern "C" status_t
3655 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3656 	fs_vnode_ops* ops, int type, uint32 flags)
3657 {
3658 	FUNCTION(("publish_vnode()\n"));
3659 
3660 	WriteLocker locker(sVnodeLock);
3661 
3662 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3663 
3664 	bool nodeCreated = false;
3665 	if (vnode == NULL) {
3666 		if (privateNode == NULL)
3667 			return B_BAD_VALUE;
3668 
3669 		// create the node
3670 		locker.Unlock();
3671 			// create_new_vnode_and_lock() will re-lock for us on success
3672 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3673 			nodeCreated);
3674 		if (status != B_OK)
3675 			return status;
3676 
3677 		locker.SetTo(sVnodeLock, true);
3678 	}
3679 
3680 	if (nodeCreated) {
3681 		vnode->private_node = privateNode;
3682 		vnode->ops = ops;
3683 		vnode->SetUnpublished(true);
3684 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3685 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3686 		// already known, but not published
3687 	} else
3688 		return B_BAD_VALUE;
3689 
3690 	bool publishSpecialSubNode = false;
3691 
3692 	vnode->SetType(type);
3693 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3694 	publishSpecialSubNode = is_special_node_type(type)
3695 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3696 
3697 	status_t status = B_OK;
3698 
3699 	// create sub vnodes, if necessary
3700 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3701 		locker.Unlock();
3702 
3703 		fs_volume* subVolume = volume;
3704 		if (volume->sub_volume != NULL) {
3705 			while (status == B_OK && subVolume->sub_volume != NULL) {
3706 				subVolume = subVolume->sub_volume;
3707 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3708 					vnode);
3709 			}
3710 		}
3711 
3712 		if (status == B_OK && publishSpecialSubNode)
3713 			status = create_special_sub_node(vnode, flags);
3714 
3715 		if (status != B_OK) {
3716 			// error -- clean up the created sub vnodes
3717 			while (subVolume->super_volume != volume) {
3718 				subVolume = subVolume->super_volume;
3719 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3720 			}
3721 		}
3722 
3723 		if (status == B_OK) {
3724 			ReadLocker vnodesReadLocker(sVnodeLock);
3725 			AutoLocker<Vnode> nodeLocker(vnode);
3726 			vnode->SetBusy(false);
3727 			vnode->SetUnpublished(false);
3728 		} else {
3729 			locker.Lock();
3730 			hash_remove(sVnodeTable, vnode);
3731 			remove_vnode_from_mount_list(vnode, vnode->mount);
3732 			free(vnode);
3733 		}
3734 	} else {
3735 		// we still hold the write lock -- mark the node unbusy and published
3736 		vnode->SetBusy(false);
3737 		vnode->SetUnpublished(false);
3738 	}
3739 
3740 	TRACE(("returns: %s\n", strerror(status)));
3741 
3742 	return status;
3743 }
3744 
3745 
3746 extern "C" status_t
3747 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3748 {
3749 	struct vnode* vnode;
3750 
3751 	if (volume == NULL)
3752 		return B_BAD_VALUE;
3753 
3754 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3755 	if (status != B_OK)
3756 		return status;
3757 
3758 	// If this is a layered FS, we need to get the node cookie for the requested
3759 	// layer.
3760 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3761 		fs_vnode resolvedNode;
3762 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3763 			&resolvedNode);
3764 		if (status != B_OK) {
3765 			panic("get_vnode(): Failed to get super node for vnode %p, "
3766 				"volume: %p", vnode, volume);
3767 			put_vnode(vnode);
3768 			return status;
3769 		}
3770 
3771 		if (_privateNode != NULL)
3772 			*_privateNode = resolvedNode.private_node;
3773 	} else if (_privateNode != NULL)
3774 		*_privateNode = vnode->private_node;
3775 
3776 	return B_OK;
3777 }
3778 
3779 
3780 extern "C" status_t
3781 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3782 {
3783 	struct vnode* vnode;
3784 
3785 	rw_lock_read_lock(&sVnodeLock);
3786 	vnode = lookup_vnode(volume->id, vnodeID);
3787 	rw_lock_read_unlock(&sVnodeLock);
3788 
3789 	if (vnode == NULL)
3790 		return B_BAD_VALUE;
3791 
3792 	inc_vnode_ref_count(vnode);
3793 	return B_OK;
3794 }
3795 
3796 
3797 extern "C" status_t
3798 put_vnode(fs_volume* volume, ino_t vnodeID)
3799 {
3800 	struct vnode* vnode;
3801 
3802 	rw_lock_read_lock(&sVnodeLock);
3803 	vnode = lookup_vnode(volume->id, vnodeID);
3804 	rw_lock_read_unlock(&sVnodeLock);
3805 
3806 	if (vnode == NULL)
3807 		return B_BAD_VALUE;
3808 
3809 	dec_vnode_ref_count(vnode, false, true);
3810 	return B_OK;
3811 }
3812 
3813 
3814 extern "C" status_t
3815 remove_vnode(fs_volume* volume, ino_t vnodeID)
3816 {
3817 	ReadLocker locker(sVnodeLock);
3818 
3819 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3820 	if (vnode == NULL)
3821 		return B_ENTRY_NOT_FOUND;
3822 
3823 	if (vnode->covered_by != NULL) {
3824 		// this vnode is in use
3825 		return B_BUSY;
3826 	}
3827 
3828 	vnode->Lock();
3829 
3830 	vnode->SetRemoved(true);
3831 	bool removeUnpublished = false;
3832 
3833 	if (vnode->IsUnpublished()) {
3834 		// prepare the vnode for deletion
3835 		removeUnpublished = true;
3836 		vnode->SetBusy(true);
3837 	}
3838 
3839 	vnode->Unlock();
3840 	locker.Unlock();
3841 
3842 	if (removeUnpublished) {
3843 		// If the vnode hasn't been published yet, we delete it here
3844 		atomic_add(&vnode->ref_count, -1);
3845 		free_vnode(vnode, true);
3846 	}
3847 
3848 	return B_OK;
3849 }
3850 
3851 
3852 extern "C" status_t
3853 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3854 {
3855 	struct vnode* vnode;
3856 
3857 	rw_lock_read_lock(&sVnodeLock);
3858 
3859 	vnode = lookup_vnode(volume->id, vnodeID);
3860 	if (vnode) {
3861 		AutoLocker<Vnode> nodeLocker(vnode);
3862 		vnode->SetRemoved(false);
3863 	}
3864 
3865 	rw_lock_read_unlock(&sVnodeLock);
3866 	return B_OK;
3867 }
3868 
3869 
3870 extern "C" status_t
3871 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3872 {
3873 	ReadLocker _(sVnodeLock);
3874 
3875 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3876 		if (_removed != NULL)
3877 			*_removed = vnode->IsRemoved();
3878 		return B_OK;
3879 	}
3880 
3881 	return B_BAD_VALUE;
3882 }
3883 
3884 
3885 extern "C" fs_volume*
3886 volume_for_vnode(fs_vnode* _vnode)
3887 {
3888 	if (_vnode == NULL)
3889 		return NULL;
3890 
3891 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3892 	return vnode->mount->volume;
3893 }
3894 
3895 
3896 #if 0
3897 extern "C" status_t
3898 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3899 	size_t* _numBytes)
3900 {
3901 	struct file_descriptor* descriptor;
3902 	struct vnode* vnode;
3903 
3904 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3905 	if (descriptor == NULL)
3906 		return B_FILE_ERROR;
3907 
3908 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3909 		count, 0, _numBytes);
3910 
3911 	put_fd(descriptor);
3912 	return status;
3913 }
3914 
3915 
3916 extern "C" status_t
3917 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3918 	size_t* _numBytes)
3919 {
3920 	struct file_descriptor* descriptor;
3921 	struct vnode* vnode;
3922 
3923 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3924 	if (descriptor == NULL)
3925 		return B_FILE_ERROR;
3926 
3927 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3928 		count, 0, _numBytes);
3929 
3930 	put_fd(descriptor);
3931 	return status;
3932 }
3933 #endif
3934 
3935 
3936 extern "C" status_t
3937 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3938 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3939 	size_t* _bytes)
3940 {
3941 	struct file_descriptor* descriptor;
3942 	struct vnode* vnode;
3943 
3944 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3945 	if (descriptor == NULL)
3946 		return B_FILE_ERROR;
3947 
3948 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3949 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3950 		false);
3951 
3952 	put_fd(descriptor);
3953 	return status;
3954 }
3955 
3956 
3957 extern "C" status_t
3958 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3959 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3960 	size_t* _bytes)
3961 {
3962 	struct file_descriptor* descriptor;
3963 	struct vnode* vnode;
3964 
3965 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3966 	if (descriptor == NULL)
3967 		return B_FILE_ERROR;
3968 
3969 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3970 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3971 		true);
3972 
3973 	put_fd(descriptor);
3974 	return status;
3975 }
3976 
3977 
3978 extern "C" status_t
3979 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3980 {
3981 	// lookup mount -- the caller is required to make sure that the mount
3982 	// won't go away
3983 	MutexLocker locker(sMountMutex);
3984 	struct fs_mount* mount = find_mount(mountID);
3985 	if (mount == NULL)
3986 		return B_BAD_VALUE;
3987 	locker.Unlock();
3988 
3989 	return mount->entry_cache.Add(dirID, name, nodeID);
3990 }
3991 
3992 
3993 extern "C" status_t
3994 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3995 {
3996 	// lookup mount -- the caller is required to make sure that the mount
3997 	// won't go away
3998 	MutexLocker locker(sMountMutex);
3999 	struct fs_mount* mount = find_mount(mountID);
4000 	if (mount == NULL)
4001 		return B_BAD_VALUE;
4002 	locker.Unlock();
4003 
4004 	return mount->entry_cache.Remove(dirID, name);
4005 }
4006 
4007 
4008 //	#pragma mark - private VFS API
4009 //	Functions the VFS exports for other parts of the kernel
4010 
4011 
4012 /*! Acquires another reference to the vnode that has to be released
4013 	by calling vfs_put_vnode().
4014 */
4015 void
4016 vfs_acquire_vnode(struct vnode* vnode)
4017 {
4018 	inc_vnode_ref_count(vnode);
4019 }
4020 
4021 
4022 /*! This is currently called from file_cache_create() only.
4023 	It's probably a temporary solution as long as devfs requires that
4024 	fs_read_pages()/fs_write_pages() are called with the standard
4025 	open cookie and not with a device cookie.
4026 	If that's done differently, remove this call; it has no other
4027 	purpose.
4028 */
4029 extern "C" status_t
4030 vfs_get_cookie_from_fd(int fd, void** _cookie)
4031 {
4032 	struct file_descriptor* descriptor;
4033 
4034 	descriptor = get_fd(get_current_io_context(true), fd);
4035 	if (descriptor == NULL)
4036 		return B_FILE_ERROR;
4037 
4038 	*_cookie = descriptor->cookie;
4039 	return B_OK;
4040 }
4041 
4042 
4043 extern "C" status_t
4044 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4045 {
4046 	*vnode = get_vnode_from_fd(fd, kernel);
4047 
4048 	if (*vnode == NULL)
4049 		return B_FILE_ERROR;
4050 
4051 	return B_NO_ERROR;
4052 }
4053 
4054 
4055 extern "C" status_t
4056 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4057 {
4058 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4059 		path, kernel));
4060 
4061 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4062 	if (pathBuffer.InitCheck() != B_OK)
4063 		return B_NO_MEMORY;
4064 
4065 	char* buffer = pathBuffer.LockBuffer();
4066 	strlcpy(buffer, path, pathBuffer.BufferSize());
4067 
4068 	struct vnode* vnode;
4069 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4070 	if (status != B_OK)
4071 		return status;
4072 
4073 	*_vnode = vnode;
4074 	return B_OK;
4075 }
4076 
4077 
4078 extern "C" status_t
4079 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4080 {
4081 	struct vnode* vnode;
4082 
4083 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4084 	if (status != B_OK)
4085 		return status;
4086 
4087 	*_vnode = vnode;
4088 	return B_OK;
4089 }
4090 
4091 
4092 extern "C" status_t
4093 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4094 	const char* name, struct vnode** _vnode)
4095 {
4096 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4097 }
4098 
4099 
4100 extern "C" void
4101 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4102 {
4103 	*_mountID = vnode->device;
4104 	*_vnodeID = vnode->id;
4105 }
4106 
4107 
4108 /*!
4109 	Calls fs_open() on the given vnode and returns a new
4110 	file descriptor for it
4111 */
4112 int
4113 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4114 {
4115 	return open_vnode(vnode, openMode, kernel);
4116 }
4117 
4118 
4119 /*!	Looks up a vnode with the given mount and vnode ID.
4120 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4121 	to the node.
4122 	It's currently only be used by file_cache_create().
4123 */
4124 extern "C" status_t
4125 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4126 {
4127 	rw_lock_read_lock(&sVnodeLock);
4128 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4129 	rw_lock_read_unlock(&sVnodeLock);
4130 
4131 	if (vnode == NULL)
4132 		return B_ERROR;
4133 
4134 	*_vnode = vnode;
4135 	return B_OK;
4136 }
4137 
4138 
4139 extern "C" status_t
4140 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4141 	bool traverseLeafLink, bool kernel, void** _node)
4142 {
4143 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4144 		volume, path, kernel));
4145 
4146 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4147 	if (pathBuffer.InitCheck() != B_OK)
4148 		return B_NO_MEMORY;
4149 
4150 	fs_mount* mount;
4151 	status_t status = get_mount(volume->id, &mount);
4152 	if (status != B_OK)
4153 		return status;
4154 
4155 	char* buffer = pathBuffer.LockBuffer();
4156 	strlcpy(buffer, path, pathBuffer.BufferSize());
4157 
4158 	struct vnode* vnode = mount->root_vnode;
4159 
4160 	if (buffer[0] == '/')
4161 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4162 	else {
4163 		inc_vnode_ref_count(vnode);
4164 			// vnode_path_to_vnode() releases a reference to the starting vnode
4165 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4166 			kernel, &vnode, NULL);
4167 	}
4168 
4169 	put_mount(mount);
4170 
4171 	if (status != B_OK)
4172 		return status;
4173 
4174 	if (vnode->device != volume->id) {
4175 		// wrong mount ID - must not gain access on foreign file system nodes
4176 		put_vnode(vnode);
4177 		return B_BAD_VALUE;
4178 	}
4179 
4180 	// Use get_vnode() to resolve the cookie for the right layer.
4181 	status = get_vnode(volume, vnode->id, _node);
4182 	put_vnode(vnode);
4183 
4184 	return status;
4185 }
4186 
4187 
4188 status_t
4189 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4190 	struct stat* stat, bool kernel)
4191 {
4192 	status_t status;
4193 
4194 	if (path) {
4195 		// path given: get the stat of the node referred to by (fd, path)
4196 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4197 		if (pathBuffer.InitCheck() != B_OK)
4198 			return B_NO_MEMORY;
4199 
4200 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4201 			traverseLeafLink, stat, kernel);
4202 	} else {
4203 		// no path given: get the FD and use the FD operation
4204 		struct file_descriptor* descriptor
4205 			= get_fd(get_current_io_context(kernel), fd);
4206 		if (descriptor == NULL)
4207 			return B_FILE_ERROR;
4208 
4209 		if (descriptor->ops->fd_read_stat)
4210 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4211 		else
4212 			status = B_NOT_SUPPORTED;
4213 
4214 		put_fd(descriptor);
4215 	}
4216 
4217 	return status;
4218 }
4219 
4220 
4221 /*!	Finds the full path to the file that contains the module \a moduleName,
4222 	puts it into \a pathBuffer, and returns B_OK for success.
4223 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4224 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4225 	\a pathBuffer is clobbered in any case and must not be relied on if this
4226 	functions returns unsuccessfully.
4227 	\a basePath and \a pathBuffer must not point to the same space.
4228 */
4229 status_t
4230 vfs_get_module_path(const char* basePath, const char* moduleName,
4231 	char* pathBuffer, size_t bufferSize)
4232 {
4233 	struct vnode* dir;
4234 	struct vnode* file;
4235 	status_t status;
4236 	size_t length;
4237 	char* path;
4238 
4239 	if (bufferSize == 0
4240 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4241 		return B_BUFFER_OVERFLOW;
4242 
4243 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4244 	if (status != B_OK)
4245 		return status;
4246 
4247 	// the path buffer had been clobbered by the above call
4248 	length = strlcpy(pathBuffer, basePath, bufferSize);
4249 	if (pathBuffer[length - 1] != '/')
4250 		pathBuffer[length++] = '/';
4251 
4252 	path = pathBuffer + length;
4253 	bufferSize -= length;
4254 
4255 	while (moduleName) {
4256 		char* nextPath = strchr(moduleName, '/');
4257 		if (nextPath == NULL)
4258 			length = strlen(moduleName);
4259 		else {
4260 			length = nextPath - moduleName;
4261 			nextPath++;
4262 		}
4263 
4264 		if (length + 1 >= bufferSize) {
4265 			status = B_BUFFER_OVERFLOW;
4266 			goto err;
4267 		}
4268 
4269 		memcpy(path, moduleName, length);
4270 		path[length] = '\0';
4271 		moduleName = nextPath;
4272 
4273 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4274 		if (status != B_OK) {
4275 			// vnode_path_to_vnode() has already released the reference to dir
4276 			return status;
4277 		}
4278 
4279 		if (S_ISDIR(file->Type())) {
4280 			// goto the next directory
4281 			path[length] = '/';
4282 			path[length + 1] = '\0';
4283 			path += length + 1;
4284 			bufferSize -= length + 1;
4285 
4286 			dir = file;
4287 		} else if (S_ISREG(file->Type())) {
4288 			// it's a file so it should be what we've searched for
4289 			put_vnode(file);
4290 
4291 			return B_OK;
4292 		} else {
4293 			TRACE(("vfs_get_module_path(): something is strange here: "
4294 				"0x%08lx...\n", file->Type()));
4295 			status = B_ERROR;
4296 			dir = file;
4297 			goto err;
4298 		}
4299 	}
4300 
4301 	// if we got here, the moduleName just pointed to a directory, not to
4302 	// a real module - what should we do in this case?
4303 	status = B_ENTRY_NOT_FOUND;
4304 
4305 err:
4306 	put_vnode(dir);
4307 	return status;
4308 }
4309 
4310 
4311 /*!	\brief Normalizes a given path.
4312 
4313 	The path must refer to an existing or non-existing entry in an existing
4314 	directory, that is chopping off the leaf component the remaining path must
4315 	refer to an existing directory.
4316 
4317 	The returned will be canonical in that it will be absolute, will not
4318 	contain any "." or ".." components or duplicate occurrences of '/'s,
4319 	and none of the directory components will by symbolic links.
4320 
4321 	Any two paths referring to the same entry, will result in the same
4322 	normalized path (well, that is pretty much the definition of `normalized',
4323 	isn't it :-).
4324 
4325 	\param path The path to be normalized.
4326 	\param buffer The buffer into which the normalized path will be written.
4327 		   May be the same one as \a path.
4328 	\param bufferSize The size of \a buffer.
4329 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4330 	\param kernel \c true, if the IO context of the kernel shall be used,
4331 		   otherwise that of the team this thread belongs to. Only relevant,
4332 		   if the path is relative (to get the CWD).
4333 	\return \c B_OK if everything went fine, another error code otherwise.
4334 */
4335 status_t
4336 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4337 	bool traverseLink, bool kernel)
4338 {
4339 	if (!path || !buffer || bufferSize < 1)
4340 		return B_BAD_VALUE;
4341 
4342 	if (path != buffer) {
4343 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4344 			return B_BUFFER_OVERFLOW;
4345 	}
4346 
4347 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4348 }
4349 
4350 
4351 /*!	\brief Creates a special node in the file system.
4352 
4353 	The caller gets a reference to the newly created node (which is passed
4354 	back through \a _createdVnode) and is responsible for releasing it.
4355 
4356 	\param path The path where to create the entry for the node. Can be \c NULL,
4357 		in which case the node is created without an entry in the root FS -- it
4358 		will automatically be deleted when the last reference has been released.
4359 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4360 		the target file system will just create the node with its standard
4361 		operations. Depending on the type of the node a subnode might be created
4362 		automatically, though.
4363 	\param mode The type and permissions for the node to be created.
4364 	\param flags Flags to be passed to the creating FS.
4365 	\param kernel \c true, if called in the kernel context (relevant only if
4366 		\a path is not \c NULL and not absolute).
4367 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4368 		file system creating the node, with the private data pointer and
4369 		operations for the super node. Can be \c NULL.
4370 	\param _createVnode Pointer to pre-allocated storage where to store the
4371 		pointer to the newly created node.
4372 	\return \c B_OK, if everything went fine, another error code otherwise.
4373 */
4374 status_t
4375 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4376 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4377 	struct vnode** _createdVnode)
4378 {
4379 	struct vnode* dirNode;
4380 	char _leaf[B_FILE_NAME_LENGTH];
4381 	char* leaf = NULL;
4382 
4383 	if (path) {
4384 		// We've got a path. Get the dir vnode and the leaf name.
4385 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4386 		if (tmpPathBuffer.InitCheck() != B_OK)
4387 			return B_NO_MEMORY;
4388 
4389 		char* tmpPath = tmpPathBuffer.LockBuffer();
4390 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4391 			return B_NAME_TOO_LONG;
4392 
4393 		// get the dir vnode and the leaf name
4394 		leaf = _leaf;
4395 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4396 		if (error != B_OK)
4397 			return error;
4398 	} else {
4399 		// No path. Create the node in the root FS.
4400 		dirNode = sRoot;
4401 		inc_vnode_ref_count(dirNode);
4402 	}
4403 
4404 	VNodePutter _(dirNode);
4405 
4406 	// check support for creating special nodes
4407 	if (!HAS_FS_CALL(dirNode, create_special_node))
4408 		return B_UNSUPPORTED;
4409 
4410 	// create the node
4411 	fs_vnode superVnode;
4412 	ino_t nodeID;
4413 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4414 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4415 	if (status != B_OK)
4416 		return status;
4417 
4418 	// lookup the node
4419 	rw_lock_read_lock(&sVnodeLock);
4420 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4421 	rw_lock_read_unlock(&sVnodeLock);
4422 
4423 	if (*_createdVnode == NULL) {
4424 		panic("vfs_create_special_node(): lookup of node failed");
4425 		return B_ERROR;
4426 	}
4427 
4428 	return B_OK;
4429 }
4430 
4431 
4432 extern "C" void
4433 vfs_put_vnode(struct vnode* vnode)
4434 {
4435 	put_vnode(vnode);
4436 }
4437 
4438 
4439 extern "C" status_t
4440 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4441 {
4442 	// Get current working directory from io context
4443 	struct io_context* context = get_current_io_context(false);
4444 	status_t status = B_OK;
4445 
4446 	mutex_lock(&context->io_mutex);
4447 
4448 	if (context->cwd != NULL) {
4449 		*_mountID = context->cwd->device;
4450 		*_vnodeID = context->cwd->id;
4451 	} else
4452 		status = B_ERROR;
4453 
4454 	mutex_unlock(&context->io_mutex);
4455 	return status;
4456 }
4457 
4458 
4459 status_t
4460 vfs_unmount(dev_t mountID, uint32 flags)
4461 {
4462 	return fs_unmount(NULL, mountID, flags, true);
4463 }
4464 
4465 
4466 extern "C" status_t
4467 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4468 {
4469 	struct vnode* vnode;
4470 
4471 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4472 	if (status != B_OK)
4473 		return status;
4474 
4475 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4476 	put_vnode(vnode);
4477 	return B_OK;
4478 }
4479 
4480 
4481 extern "C" void
4482 vfs_free_unused_vnodes(int32 level)
4483 {
4484 	vnode_low_resource_handler(NULL,
4485 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4486 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4487 		level);
4488 }
4489 
4490 
4491 extern "C" bool
4492 vfs_can_page(struct vnode* vnode, void* cookie)
4493 {
4494 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4495 
4496 	if (HAS_FS_CALL(vnode, can_page))
4497 		return FS_CALL(vnode, can_page, cookie);
4498 	return false;
4499 }
4500 
4501 
4502 extern "C" status_t
4503 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4504 	const generic_io_vec* vecs, size_t count, uint32 flags,
4505 	generic_size_t* _numBytes)
4506 {
4507 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4508 		pos));
4509 
4510 #if VFS_PAGES_IO_TRACING
4511 	generic_size_t bytesRequested = *_numBytes;
4512 #endif
4513 
4514 	IORequest request;
4515 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4516 	if (status == B_OK) {
4517 		status = vfs_vnode_io(vnode, cookie, &request);
4518 		if (status == B_OK)
4519 			status = request.Wait();
4520 		*_numBytes = request.TransferredBytes();
4521 	}
4522 
4523 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4524 		status, *_numBytes));
4525 
4526 	return status;
4527 }
4528 
4529 
4530 extern "C" status_t
4531 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4532 	const generic_io_vec* vecs, size_t count, uint32 flags,
4533 	generic_size_t* _numBytes)
4534 {
4535 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4536 		pos));
4537 
4538 #if VFS_PAGES_IO_TRACING
4539 	generic_size_t bytesRequested = *_numBytes;
4540 #endif
4541 
4542 	IORequest request;
4543 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4544 	if (status == B_OK) {
4545 		status = vfs_vnode_io(vnode, cookie, &request);
4546 		if (status == B_OK)
4547 			status = request.Wait();
4548 		*_numBytes = request.TransferredBytes();
4549 	}
4550 
4551 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4552 		status, *_numBytes));
4553 
4554 	return status;
4555 }
4556 
4557 
4558 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4559 	created if \a allocate is \c true.
4560 	In case it's successful, it will also grab a reference to the cache
4561 	it returns.
4562 */
4563 extern "C" status_t
4564 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4565 {
4566 	if (vnode->cache != NULL) {
4567 		vnode->cache->AcquireRef();
4568 		*_cache = vnode->cache;
4569 		return B_OK;
4570 	}
4571 
4572 	rw_lock_read_lock(&sVnodeLock);
4573 	vnode->Lock();
4574 
4575 	status_t status = B_OK;
4576 
4577 	// The cache could have been created in the meantime
4578 	if (vnode->cache == NULL) {
4579 		if (allocate) {
4580 			// TODO: actually the vnode needs to be busy already here, or
4581 			//	else this won't work...
4582 			bool wasBusy = vnode->IsBusy();
4583 			vnode->SetBusy(true);
4584 
4585 			vnode->Unlock();
4586 			rw_lock_read_unlock(&sVnodeLock);
4587 
4588 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4589 
4590 			rw_lock_read_lock(&sVnodeLock);
4591 			vnode->Lock();
4592 			vnode->SetBusy(wasBusy);
4593 		} else
4594 			status = B_BAD_VALUE;
4595 	}
4596 
4597 	vnode->Unlock();
4598 	rw_lock_read_unlock(&sVnodeLock);
4599 
4600 	if (status == B_OK) {
4601 		vnode->cache->AcquireRef();
4602 		*_cache = vnode->cache;
4603 	}
4604 
4605 	return status;
4606 }
4607 
4608 
4609 status_t
4610 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4611 	file_io_vec* vecs, size_t* _count)
4612 {
4613 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4614 		vnode, vecs, offset, size));
4615 
4616 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4617 }
4618 
4619 
4620 status_t
4621 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4622 {
4623 	status_t status = FS_CALL(vnode, read_stat, stat);
4624 
4625 	// fill in the st_dev and st_ino fields
4626 	if (status == B_OK) {
4627 		stat->st_dev = vnode->device;
4628 		stat->st_ino = vnode->id;
4629 		stat->st_rdev = -1;
4630 	}
4631 
4632 	return status;
4633 }
4634 
4635 
4636 status_t
4637 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4638 {
4639 	struct vnode* vnode;
4640 	status_t status = get_vnode(device, inode, &vnode, true, false);
4641 	if (status != B_OK)
4642 		return status;
4643 
4644 	status = FS_CALL(vnode, read_stat, stat);
4645 
4646 	// fill in the st_dev and st_ino fields
4647 	if (status == B_OK) {
4648 		stat->st_dev = vnode->device;
4649 		stat->st_ino = vnode->id;
4650 		stat->st_rdev = -1;
4651 	}
4652 
4653 	put_vnode(vnode);
4654 	return status;
4655 }
4656 
4657 
4658 status_t
4659 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4660 {
4661 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4662 }
4663 
4664 
4665 status_t
4666 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4667 	char* path, size_t pathLength)
4668 {
4669 	struct vnode* vnode;
4670 	status_t status;
4671 
4672 	// filter invalid leaf names
4673 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4674 		return B_BAD_VALUE;
4675 
4676 	// get the vnode matching the dir's node_ref
4677 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4678 		// special cases "." and "..": we can directly get the vnode of the
4679 		// referenced directory
4680 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4681 		leaf = NULL;
4682 	} else
4683 		status = get_vnode(device, inode, &vnode, true, false);
4684 	if (status != B_OK)
4685 		return status;
4686 
4687 	// get the directory path
4688 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4689 	put_vnode(vnode);
4690 		// we don't need the vnode anymore
4691 	if (status != B_OK)
4692 		return status;
4693 
4694 	// append the leaf name
4695 	if (leaf) {
4696 		// insert a directory separator if this is not the file system root
4697 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4698 				>= pathLength)
4699 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4700 			return B_NAME_TOO_LONG;
4701 		}
4702 	}
4703 
4704 	return B_OK;
4705 }
4706 
4707 
4708 /*!	If the given descriptor locked its vnode, that lock will be released. */
4709 void
4710 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4711 {
4712 	struct vnode* vnode = fd_vnode(descriptor);
4713 
4714 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4715 		vnode->mandatory_locked_by = NULL;
4716 }
4717 
4718 
4719 /*!	Closes all file descriptors of the specified I/O context that
4720 	have the O_CLOEXEC flag set.
4721 */
4722 void
4723 vfs_exec_io_context(io_context* context)
4724 {
4725 	uint32 i;
4726 
4727 	for (i = 0; i < context->table_size; i++) {
4728 		mutex_lock(&context->io_mutex);
4729 
4730 		struct file_descriptor* descriptor = context->fds[i];
4731 		bool remove = false;
4732 
4733 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4734 			context->fds[i] = NULL;
4735 			context->num_used_fds--;
4736 
4737 			remove = true;
4738 		}
4739 
4740 		mutex_unlock(&context->io_mutex);
4741 
4742 		if (remove) {
4743 			close_fd(descriptor);
4744 			put_fd(descriptor);
4745 		}
4746 	}
4747 }
4748 
4749 
4750 /*! Sets up a new io_control structure, and inherits the properties
4751 	of the parent io_control if it is given.
4752 */
4753 io_context*
4754 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4755 {
4756 	io_context* context = (io_context*)malloc(sizeof(io_context));
4757 	if (context == NULL)
4758 		return NULL;
4759 
4760 	TIOC(NewIOContext(context, parentContext));
4761 
4762 	memset(context, 0, sizeof(io_context));
4763 	context->ref_count = 1;
4764 
4765 	MutexLocker parentLocker;
4766 
4767 	size_t tableSize;
4768 	if (parentContext) {
4769 		parentLocker.SetTo(parentContext->io_mutex, false);
4770 		tableSize = parentContext->table_size;
4771 	} else
4772 		tableSize = DEFAULT_FD_TABLE_SIZE;
4773 
4774 	// allocate space for FDs and their close-on-exec flag
4775 	context->fds = (file_descriptor**)malloc(
4776 		sizeof(struct file_descriptor*) * tableSize
4777 		+ sizeof(struct select_sync*) * tableSize
4778 		+ (tableSize + 7) / 8);
4779 	if (context->fds == NULL) {
4780 		free(context);
4781 		return NULL;
4782 	}
4783 
4784 	context->select_infos = (select_info**)(context->fds + tableSize);
4785 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4786 
4787 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4788 		+ sizeof(struct select_sync*) * tableSize
4789 		+ (tableSize + 7) / 8);
4790 
4791 	mutex_init(&context->io_mutex, "I/O context");
4792 
4793 	// Copy all parent file descriptors
4794 
4795 	if (parentContext) {
4796 		size_t i;
4797 
4798 		mutex_lock(&sIOContextRootLock);
4799 		context->root = parentContext->root;
4800 		if (context->root)
4801 			inc_vnode_ref_count(context->root);
4802 		mutex_unlock(&sIOContextRootLock);
4803 
4804 		context->cwd = parentContext->cwd;
4805 		if (context->cwd)
4806 			inc_vnode_ref_count(context->cwd);
4807 
4808 		for (i = 0; i < tableSize; i++) {
4809 			struct file_descriptor* descriptor = parentContext->fds[i];
4810 
4811 			if (descriptor != NULL) {
4812 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4813 				if (closeOnExec && purgeCloseOnExec)
4814 					continue;
4815 
4816 				TFD(InheritFD(context, i, descriptor, parentContext));
4817 
4818 				context->fds[i] = descriptor;
4819 				context->num_used_fds++;
4820 				atomic_add(&descriptor->ref_count, 1);
4821 				atomic_add(&descriptor->open_count, 1);
4822 
4823 				if (closeOnExec)
4824 					fd_set_close_on_exec(context, i, true);
4825 			}
4826 		}
4827 
4828 		parentLocker.Unlock();
4829 	} else {
4830 		context->root = sRoot;
4831 		context->cwd = sRoot;
4832 
4833 		if (context->root)
4834 			inc_vnode_ref_count(context->root);
4835 
4836 		if (context->cwd)
4837 			inc_vnode_ref_count(context->cwd);
4838 	}
4839 
4840 	context->table_size = tableSize;
4841 
4842 	list_init(&context->node_monitors);
4843 	context->max_monitors = DEFAULT_NODE_MONITORS;
4844 
4845 	return context;
4846 }
4847 
4848 
4849 static status_t
4850 vfs_free_io_context(io_context* context)
4851 {
4852 	uint32 i;
4853 
4854 	TIOC(FreeIOContext(context));
4855 
4856 	if (context->root)
4857 		put_vnode(context->root);
4858 
4859 	if (context->cwd)
4860 		put_vnode(context->cwd);
4861 
4862 	mutex_lock(&context->io_mutex);
4863 
4864 	for (i = 0; i < context->table_size; i++) {
4865 		if (struct file_descriptor* descriptor = context->fds[i]) {
4866 			close_fd(descriptor);
4867 			put_fd(descriptor);
4868 		}
4869 	}
4870 
4871 	mutex_destroy(&context->io_mutex);
4872 
4873 	remove_node_monitors(context);
4874 	free(context->fds);
4875 	free(context);
4876 
4877 	return B_OK;
4878 }
4879 
4880 
4881 void
4882 vfs_get_io_context(io_context* context)
4883 {
4884 	atomic_add(&context->ref_count, 1);
4885 }
4886 
4887 
4888 void
4889 vfs_put_io_context(io_context* context)
4890 {
4891 	if (atomic_add(&context->ref_count, -1) == 1)
4892 		vfs_free_io_context(context);
4893 }
4894 
4895 
4896 static status_t
4897 vfs_resize_fd_table(struct io_context* context, const int newSize)
4898 {
4899 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4900 		return B_BAD_VALUE;
4901 
4902 	TIOC(ResizeIOContext(context, newSize));
4903 
4904 	MutexLocker _(context->io_mutex);
4905 
4906 	int oldSize = context->table_size;
4907 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4908 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4909 
4910 	// If the tables shrink, make sure none of the fds being dropped are in use.
4911 	if (newSize < oldSize) {
4912 		for (int i = oldSize; i-- > newSize;) {
4913 			if (context->fds[i])
4914 				return B_BUSY;
4915 		}
4916 	}
4917 
4918 	// store pointers to the old tables
4919 	file_descriptor** oldFDs = context->fds;
4920 	select_info** oldSelectInfos = context->select_infos;
4921 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4922 
4923 	// allocate new tables
4924 	file_descriptor** newFDs = (file_descriptor**)malloc(
4925 		sizeof(struct file_descriptor*) * newSize
4926 		+ sizeof(struct select_sync*) * newSize
4927 		+ newCloseOnExitBitmapSize);
4928 	if (newFDs == NULL)
4929 		return B_NO_MEMORY;
4930 
4931 	context->fds = newFDs;
4932 	context->select_infos = (select_info**)(context->fds + newSize);
4933 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4934 	context->table_size = newSize;
4935 
4936 	// copy entries from old tables
4937 	int toCopy = min_c(oldSize, newSize);
4938 
4939 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4940 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4941 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4942 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4943 
4944 	// clear additional entries, if the tables grow
4945 	if (newSize > oldSize) {
4946 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4947 		memset(context->select_infos + oldSize, 0,
4948 			sizeof(void*) * (newSize - oldSize));
4949 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4950 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4951 	}
4952 
4953 	free(oldFDs);
4954 
4955 	return B_OK;
4956 }
4957 
4958 
4959 static status_t
4960 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4961 {
4962 	int	status = B_OK;
4963 
4964 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4965 		return B_BAD_VALUE;
4966 
4967 	mutex_lock(&context->io_mutex);
4968 
4969 	if ((size_t)newSize < context->num_monitors) {
4970 		status = B_BUSY;
4971 		goto out;
4972 	}
4973 	context->max_monitors = newSize;
4974 
4975 out:
4976 	mutex_unlock(&context->io_mutex);
4977 	return status;
4978 }
4979 
4980 
4981 int
4982 vfs_getrlimit(int resource, struct rlimit* rlp)
4983 {
4984 	if (!rlp)
4985 		return B_BAD_ADDRESS;
4986 
4987 	switch (resource) {
4988 		case RLIMIT_NOFILE:
4989 		{
4990 			struct io_context* context = get_current_io_context(false);
4991 			MutexLocker _(context->io_mutex);
4992 
4993 			rlp->rlim_cur = context->table_size;
4994 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4995 			return 0;
4996 		}
4997 
4998 		case RLIMIT_NOVMON:
4999 		{
5000 			struct io_context* context = get_current_io_context(false);
5001 			MutexLocker _(context->io_mutex);
5002 
5003 			rlp->rlim_cur = context->max_monitors;
5004 			rlp->rlim_max = MAX_NODE_MONITORS;
5005 			return 0;
5006 		}
5007 
5008 		default:
5009 			return B_BAD_VALUE;
5010 	}
5011 }
5012 
5013 
5014 int
5015 vfs_setrlimit(int resource, const struct rlimit* rlp)
5016 {
5017 	if (!rlp)
5018 		return B_BAD_ADDRESS;
5019 
5020 	switch (resource) {
5021 		case RLIMIT_NOFILE:
5022 			/* TODO: check getuid() */
5023 			if (rlp->rlim_max != RLIM_SAVED_MAX
5024 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5025 				return B_NOT_ALLOWED;
5026 
5027 			return vfs_resize_fd_table(get_current_io_context(false),
5028 				rlp->rlim_cur);
5029 
5030 		case RLIMIT_NOVMON:
5031 			/* TODO: check getuid() */
5032 			if (rlp->rlim_max != RLIM_SAVED_MAX
5033 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5034 				return B_NOT_ALLOWED;
5035 
5036 			return vfs_resize_monitor_table(get_current_io_context(false),
5037 				rlp->rlim_cur);
5038 
5039 		default:
5040 			return B_BAD_VALUE;
5041 	}
5042 }
5043 
5044 
5045 status_t
5046 vfs_init(kernel_args* args)
5047 {
5048 	vnode::StaticInit();
5049 
5050 	struct vnode dummyVnode;
5051 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5052 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5053 	if (sVnodeTable == NULL)
5054 		panic("vfs_init: error creating vnode hash table\n");
5055 
5056 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5057 
5058 	struct fs_mount dummyMount;
5059 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5060 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5061 	if (sMountsTable == NULL)
5062 		panic("vfs_init: error creating mounts hash table\n");
5063 
5064 	node_monitor_init();
5065 
5066 	sRoot = NULL;
5067 
5068 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5069 
5070 	if (block_cache_init() != B_OK)
5071 		return B_ERROR;
5072 
5073 #ifdef ADD_DEBUGGER_COMMANDS
5074 	// add some debugger commands
5075 	add_debugger_command_etc("vnode", &dump_vnode,
5076 		"Print info about the specified vnode",
5077 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5078 		"Prints information about the vnode specified by address <vnode> or\n"
5079 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5080 		"constructed and printed. It might not be possible to construct a\n"
5081 		"complete path, though.\n",
5082 		0);
5083 	add_debugger_command("vnodes", &dump_vnodes,
5084 		"list all vnodes (from the specified device)");
5085 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5086 		"list all vnode caches");
5087 	add_debugger_command("mount", &dump_mount,
5088 		"info about the specified fs_mount");
5089 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5090 	add_debugger_command("io_context", &dump_io_context,
5091 		"info about the I/O context");
5092 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5093 		"info about vnode usage");
5094 #endif
5095 
5096 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5097 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5098 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5099 		0);
5100 
5101 	file_map_init();
5102 
5103 	return file_cache_init();
5104 }
5105 
5106 
5107 //	#pragma mark - fd_ops implementations
5108 
5109 
5110 /*!
5111 	Calls fs_open() on the given vnode and returns a new
5112 	file descriptor for it
5113 */
5114 static int
5115 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5116 {
5117 	void* cookie;
5118 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5119 	if (status != B_OK)
5120 		return status;
5121 
5122 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5123 	if (fd < 0) {
5124 		FS_CALL(vnode, close, cookie);
5125 		FS_CALL(vnode, free_cookie, cookie);
5126 	}
5127 	return fd;
5128 }
5129 
5130 
5131 /*!
5132 	Calls fs_open() on the given vnode and returns a new
5133 	file descriptor for it
5134 */
5135 static int
5136 create_vnode(struct vnode* directory, const char* name, int openMode,
5137 	int perms, bool kernel)
5138 {
5139 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5140 	status_t status = B_ERROR;
5141 	struct vnode* vnode;
5142 	void* cookie;
5143 	ino_t newID;
5144 
5145 	// This is somewhat tricky: If the entry already exists, the FS responsible
5146 	// for the directory might not necessarily also be the one responsible for
5147 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5148 	// we can actually never call the create() hook without O_EXCL. Instead we
5149 	// try to look the entry up first. If it already exists, we just open the
5150 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5151 	// introduces a race condition, since someone else might have created the
5152 	// entry in the meantime. We hope the respective FS returns the correct
5153 	// error code and retry (up to 3 times) again.
5154 
5155 	for (int i = 0; i < 3 && status != B_OK; i++) {
5156 		// look the node up
5157 		status = lookup_dir_entry(directory, name, &vnode);
5158 		if (status == B_OK) {
5159 			VNodePutter putter(vnode);
5160 
5161 			if ((openMode & O_EXCL) != 0)
5162 				return B_FILE_EXISTS;
5163 
5164 			// If the node is a symlink, we have to follow it, unless
5165 			// O_NOTRAVERSE is set.
5166 			if (S_ISLNK(vnode->Type()) && traverse) {
5167 				putter.Put();
5168 				char clonedName[B_FILE_NAME_LENGTH + 1];
5169 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5170 						>= B_FILE_NAME_LENGTH) {
5171 					return B_NAME_TOO_LONG;
5172 				}
5173 
5174 				inc_vnode_ref_count(directory);
5175 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5176 					kernel, &vnode, NULL);
5177 				if (status != B_OK)
5178 					return status;
5179 
5180 				putter.SetTo(vnode);
5181 			}
5182 
5183 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5184 				put_vnode(vnode);
5185 				return B_LINK_LIMIT;
5186 			}
5187 
5188 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5189 			// on success keep the vnode reference for the FD
5190 			if (fd >= 0)
5191 				putter.Detach();
5192 
5193 			return fd;
5194 		}
5195 
5196 		// it doesn't exist yet -- try to create it
5197 
5198 		if (!HAS_FS_CALL(directory, create))
5199 			return B_READ_ONLY_DEVICE;
5200 
5201 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5202 			&cookie, &newID);
5203 		if (status != B_OK
5204 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5205 			return status;
5206 		}
5207 	}
5208 
5209 	if (status != B_OK)
5210 		return status;
5211 
5212 	// the node has been created successfully
5213 
5214 	rw_lock_read_lock(&sVnodeLock);
5215 	vnode = lookup_vnode(directory->device, newID);
5216 	rw_lock_read_unlock(&sVnodeLock);
5217 
5218 	if (vnode == NULL) {
5219 		panic("vfs: fs_create() returned success but there is no vnode, "
5220 			"mount ID %ld!\n", directory->device);
5221 		return B_BAD_VALUE;
5222 	}
5223 
5224 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5225 	if (fd >= 0)
5226 		return fd;
5227 
5228 	status = fd;
5229 
5230 	// something went wrong, clean up
5231 
5232 	FS_CALL(vnode, close, cookie);
5233 	FS_CALL(vnode, free_cookie, cookie);
5234 	put_vnode(vnode);
5235 
5236 	FS_CALL(directory, unlink, name);
5237 
5238 	return status;
5239 }
5240 
5241 
5242 /*! Calls fs open_dir() on the given vnode and returns a new
5243 	file descriptor for it
5244 */
5245 static int
5246 open_dir_vnode(struct vnode* vnode, bool kernel)
5247 {
5248 	void* cookie;
5249 	int status;
5250 
5251 	status = FS_CALL(vnode, open_dir, &cookie);
5252 	if (status != B_OK)
5253 		return status;
5254 
5255 	// directory is opened, create a fd
5256 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5257 	if (status >= 0)
5258 		return status;
5259 
5260 	FS_CALL(vnode, close_dir, cookie);
5261 	FS_CALL(vnode, free_dir_cookie, cookie);
5262 
5263 	return status;
5264 }
5265 
5266 
5267 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5268 	file descriptor for it.
5269 	Used by attr_dir_open(), and attr_dir_open_fd().
5270 */
5271 static int
5272 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5273 {
5274 	void* cookie;
5275 	int status;
5276 
5277 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5278 		return B_NOT_SUPPORTED;
5279 
5280 	status = FS_CALL(vnode, open_attr_dir, &cookie);
5281 	if (status != B_OK)
5282 		return status;
5283 
5284 	// directory is opened, create a fd
5285 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5286 	if (status >= 0)
5287 		return status;
5288 
5289 	FS_CALL(vnode, close_attr_dir, cookie);
5290 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5291 
5292 	return status;
5293 }
5294 
5295 
5296 static int
5297 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5298 	int openMode, int perms, bool kernel)
5299 {
5300 	struct vnode* directory;
5301 	int status;
5302 
5303 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5304 		"kernel %d\n", name, openMode, perms, kernel));
5305 
5306 	// get directory to put the new file in
5307 	status = get_vnode(mountID, directoryID, &directory, true, false);
5308 	if (status != B_OK)
5309 		return status;
5310 
5311 	status = create_vnode(directory, name, openMode, perms, kernel);
5312 	put_vnode(directory);
5313 
5314 	return status;
5315 }
5316 
5317 
5318 static int
5319 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5320 {
5321 	char name[B_FILE_NAME_LENGTH];
5322 	struct vnode* directory;
5323 	int status;
5324 
5325 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5326 		openMode, perms, kernel));
5327 
5328 	// get directory to put the new file in
5329 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5330 	if (status < 0)
5331 		return status;
5332 
5333 	status = create_vnode(directory, name, openMode, perms, kernel);
5334 
5335 	put_vnode(directory);
5336 	return status;
5337 }
5338 
5339 
5340 static int
5341 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5342 	int openMode, bool kernel)
5343 {
5344 	if (name == NULL || *name == '\0')
5345 		return B_BAD_VALUE;
5346 
5347 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5348 		mountID, directoryID, name, openMode));
5349 
5350 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5351 
5352 	// get the vnode matching the entry_ref
5353 	struct vnode* vnode;
5354 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5355 		kernel, &vnode);
5356 	if (status != B_OK)
5357 		return status;
5358 
5359 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5360 		put_vnode(vnode);
5361 		return B_LINK_LIMIT;
5362 	}
5363 
5364 	int newFD = open_vnode(vnode, openMode, kernel);
5365 	if (newFD >= 0) {
5366 		// The vnode reference has been transferred to the FD
5367 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5368 			directoryID, vnode->id, name);
5369 	} else
5370 		put_vnode(vnode);
5371 
5372 	return newFD;
5373 }
5374 
5375 
5376 static int
5377 file_open(int fd, char* path, int openMode, bool kernel)
5378 {
5379 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5380 
5381 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5382 		fd, path, openMode, kernel));
5383 
5384 	// get the vnode matching the vnode + path combination
5385 	struct vnode* vnode;
5386 	ino_t parentID;
5387 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5388 		&parentID, kernel);
5389 	if (status != B_OK)
5390 		return status;
5391 
5392 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5393 		put_vnode(vnode);
5394 		return B_LINK_LIMIT;
5395 	}
5396 
5397 	// open the vnode
5398 	int newFD = open_vnode(vnode, openMode, kernel);
5399 	if (newFD >= 0) {
5400 		// The vnode reference has been transferred to the FD
5401 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5402 			vnode->device, parentID, vnode->id, NULL);
5403 	} else
5404 		put_vnode(vnode);
5405 
5406 	return newFD;
5407 }
5408 
5409 
5410 static status_t
5411 file_close(struct file_descriptor* descriptor)
5412 {
5413 	struct vnode* vnode = descriptor->u.vnode;
5414 	status_t status = B_OK;
5415 
5416 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5417 
5418 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5419 		vnode->id);
5420 	if (HAS_FS_CALL(vnode, close)) {
5421 		status = FS_CALL(vnode, close, descriptor->cookie);
5422 	}
5423 
5424 	if (status == B_OK) {
5425 		// remove all outstanding locks for this team
5426 		release_advisory_lock(vnode, NULL);
5427 	}
5428 	return status;
5429 }
5430 
5431 
5432 static void
5433 file_free_fd(struct file_descriptor* descriptor)
5434 {
5435 	struct vnode* vnode = descriptor->u.vnode;
5436 
5437 	if (vnode != NULL) {
5438 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5439 		put_vnode(vnode);
5440 	}
5441 }
5442 
5443 
5444 static status_t
5445 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5446 	size_t* length)
5447 {
5448 	struct vnode* vnode = descriptor->u.vnode;
5449 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5450 		*length));
5451 
5452 	if (S_ISDIR(vnode->Type()))
5453 		return B_IS_A_DIRECTORY;
5454 
5455 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5456 }
5457 
5458 
5459 static status_t
5460 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5461 	size_t* length)
5462 {
5463 	struct vnode* vnode = descriptor->u.vnode;
5464 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5465 
5466 	if (S_ISDIR(vnode->Type()))
5467 		return B_IS_A_DIRECTORY;
5468 	if (!HAS_FS_CALL(vnode, write))
5469 		return B_READ_ONLY_DEVICE;
5470 
5471 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5472 }
5473 
5474 
5475 static off_t
5476 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5477 {
5478 	struct vnode* vnode = descriptor->u.vnode;
5479 	off_t offset;
5480 
5481 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5482 
5483 	// some kinds of files are not seekable
5484 	switch (vnode->Type() & S_IFMT) {
5485 		case S_IFIFO:
5486 		case S_IFSOCK:
5487 			return ESPIPE;
5488 
5489 		// The Open Group Base Specs don't mention any file types besides pipes,
5490 		// fifos, and sockets specially, so we allow seeking them.
5491 		case S_IFREG:
5492 		case S_IFBLK:
5493 		case S_IFDIR:
5494 		case S_IFLNK:
5495 		case S_IFCHR:
5496 			break;
5497 	}
5498 
5499 	switch (seekType) {
5500 		case SEEK_SET:
5501 			offset = 0;
5502 			break;
5503 		case SEEK_CUR:
5504 			offset = descriptor->pos;
5505 			break;
5506 		case SEEK_END:
5507 		{
5508 			// stat() the node
5509 			if (!HAS_FS_CALL(vnode, read_stat))
5510 				return B_NOT_SUPPORTED;
5511 
5512 			struct stat stat;
5513 			status_t status = FS_CALL(vnode, read_stat, &stat);
5514 			if (status != B_OK)
5515 				return status;
5516 
5517 			offset = stat.st_size;
5518 			break;
5519 		}
5520 		default:
5521 			return B_BAD_VALUE;
5522 	}
5523 
5524 	// assumes off_t is 64 bits wide
5525 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5526 		return B_BUFFER_OVERFLOW;
5527 
5528 	pos += offset;
5529 	if (pos < 0)
5530 		return B_BAD_VALUE;
5531 
5532 	return descriptor->pos = pos;
5533 }
5534 
5535 
5536 static status_t
5537 file_select(struct file_descriptor* descriptor, uint8 event,
5538 	struct selectsync* sync)
5539 {
5540 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5541 
5542 	struct vnode* vnode = descriptor->u.vnode;
5543 
5544 	// If the FS has no select() hook, notify select() now.
5545 	if (!HAS_FS_CALL(vnode, select))
5546 		return notify_select_event(sync, event);
5547 
5548 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5549 }
5550 
5551 
5552 static status_t
5553 file_deselect(struct file_descriptor* descriptor, uint8 event,
5554 	struct selectsync* sync)
5555 {
5556 	struct vnode* vnode = descriptor->u.vnode;
5557 
5558 	if (!HAS_FS_CALL(vnode, deselect))
5559 		return B_OK;
5560 
5561 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5562 }
5563 
5564 
5565 static status_t
5566 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5567 	bool kernel)
5568 {
5569 	struct vnode* vnode;
5570 	status_t status;
5571 
5572 	if (name == NULL || *name == '\0')
5573 		return B_BAD_VALUE;
5574 
5575 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5576 		"perms = %d)\n", mountID, parentID, name, perms));
5577 
5578 	status = get_vnode(mountID, parentID, &vnode, true, false);
5579 	if (status != B_OK)
5580 		return status;
5581 
5582 	if (HAS_FS_CALL(vnode, create_dir))
5583 		status = FS_CALL(vnode, create_dir, name, perms);
5584 	else
5585 		status = B_READ_ONLY_DEVICE;
5586 
5587 	put_vnode(vnode);
5588 	return status;
5589 }
5590 
5591 
5592 static status_t
5593 dir_create(int fd, char* path, int perms, bool kernel)
5594 {
5595 	char filename[B_FILE_NAME_LENGTH];
5596 	struct vnode* vnode;
5597 	status_t status;
5598 
5599 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5600 		kernel));
5601 
5602 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5603 	if (status < 0)
5604 		return status;
5605 
5606 	if (HAS_FS_CALL(vnode, create_dir)) {
5607 		status = FS_CALL(vnode, create_dir, filename, perms);
5608 	} else
5609 		status = B_READ_ONLY_DEVICE;
5610 
5611 	put_vnode(vnode);
5612 	return status;
5613 }
5614 
5615 
5616 static int
5617 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5618 {
5619 	struct vnode* vnode;
5620 	int status;
5621 
5622 	FUNCTION(("dir_open_entry_ref()\n"));
5623 
5624 	if (name && *name == '\0')
5625 		return B_BAD_VALUE;
5626 
5627 	// get the vnode matching the entry_ref/node_ref
5628 	if (name) {
5629 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5630 			&vnode);
5631 	} else
5632 		status = get_vnode(mountID, parentID, &vnode, true, false);
5633 	if (status != B_OK)
5634 		return status;
5635 
5636 	int newFD = open_dir_vnode(vnode, kernel);
5637 	if (newFD >= 0) {
5638 		// The vnode reference has been transferred to the FD
5639 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5640 			vnode->id, name);
5641 	} else
5642 		put_vnode(vnode);
5643 
5644 	return newFD;
5645 }
5646 
5647 
5648 static int
5649 dir_open(int fd, char* path, bool kernel)
5650 {
5651 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5652 		kernel));
5653 
5654 	// get the vnode matching the vnode + path combination
5655 	struct vnode* vnode = NULL;
5656 	ino_t parentID;
5657 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5658 		kernel);
5659 	if (status != B_OK)
5660 		return status;
5661 
5662 	// open the dir
5663 	int newFD = open_dir_vnode(vnode, kernel);
5664 	if (newFD >= 0) {
5665 		// The vnode reference has been transferred to the FD
5666 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5667 			parentID, vnode->id, NULL);
5668 	} else
5669 		put_vnode(vnode);
5670 
5671 	return newFD;
5672 }
5673 
5674 
5675 static status_t
5676 dir_close(struct file_descriptor* descriptor)
5677 {
5678 	struct vnode* vnode = descriptor->u.vnode;
5679 
5680 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5681 
5682 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5683 		vnode->id);
5684 	if (HAS_FS_CALL(vnode, close_dir))
5685 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5686 
5687 	return B_OK;
5688 }
5689 
5690 
5691 static void
5692 dir_free_fd(struct file_descriptor* descriptor)
5693 {
5694 	struct vnode* vnode = descriptor->u.vnode;
5695 
5696 	if (vnode != NULL) {
5697 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5698 		put_vnode(vnode);
5699 	}
5700 }
5701 
5702 
5703 static status_t
5704 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5705 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5706 {
5707 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5708 		bufferSize, _count);
5709 }
5710 
5711 
5712 static status_t
5713 fix_dirent(struct vnode* parent, struct dirent* entry,
5714 	struct io_context* ioContext)
5715 {
5716 	// set d_pdev and d_pino
5717 	entry->d_pdev = parent->device;
5718 	entry->d_pino = parent->id;
5719 
5720 	// If this is the ".." entry and the directory is the root of a FS,
5721 	// we need to replace d_dev and d_ino with the actual values.
5722 	if (strcmp(entry->d_name, "..") == 0
5723 		&& parent->mount->root_vnode == parent
5724 		&& parent->mount->covers_vnode) {
5725 		inc_vnode_ref_count(parent);
5726 			// vnode_path_to_vnode() puts the node
5727 
5728 		// Make sure the IO context root is not bypassed.
5729 		if (parent == ioContext->root) {
5730 			entry->d_dev = parent->device;
5731 			entry->d_ino = parent->id;
5732 		} else {
5733 			// ".." is guaranteed not to be clobbered by this call
5734 			struct vnode* vnode;
5735 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5736 				ioContext, &vnode, NULL);
5737 
5738 			if (status == B_OK) {
5739 				entry->d_dev = vnode->device;
5740 				entry->d_ino = vnode->id;
5741 			}
5742 		}
5743 	} else {
5744 		// resolve mount points
5745 		ReadLocker _(&sVnodeLock);
5746 
5747 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5748 		if (vnode != NULL) {
5749 			if (vnode->covered_by != NULL) {
5750 				entry->d_dev = vnode->covered_by->device;
5751 				entry->d_ino = vnode->covered_by->id;
5752 			}
5753 		}
5754 	}
5755 
5756 	return B_OK;
5757 }
5758 
5759 
5760 static status_t
5761 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5762 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5763 {
5764 	if (!HAS_FS_CALL(vnode, read_dir))
5765 		return B_NOT_SUPPORTED;
5766 
5767 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5768 		_count);
5769 	if (error != B_OK)
5770 		return error;
5771 
5772 	// we need to adjust the read dirents
5773 	uint32 count = *_count;
5774 	for (uint32 i = 0; i < count; i++) {
5775 		error = fix_dirent(vnode, buffer, ioContext);
5776 		if (error != B_OK)
5777 			return error;
5778 
5779 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5780 	}
5781 
5782 	return error;
5783 }
5784 
5785 
5786 static status_t
5787 dir_rewind(struct file_descriptor* descriptor)
5788 {
5789 	struct vnode* vnode = descriptor->u.vnode;
5790 
5791 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5792 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5793 	}
5794 
5795 	return B_NOT_SUPPORTED;
5796 }
5797 
5798 
5799 static status_t
5800 dir_remove(int fd, char* path, bool kernel)
5801 {
5802 	char name[B_FILE_NAME_LENGTH];
5803 	struct vnode* directory;
5804 	status_t status;
5805 
5806 	if (path != NULL) {
5807 		// we need to make sure our path name doesn't stop with "/", ".",
5808 		// or ".."
5809 		char* lastSlash = strrchr(path, '/');
5810 		if (lastSlash != NULL) {
5811 			char* leaf = lastSlash + 1;
5812 			if (!strcmp(leaf, ".."))
5813 				return B_NOT_ALLOWED;
5814 
5815 			// omit multiple slashes
5816 			while (lastSlash > path && lastSlash[-1] == '/') {
5817 				lastSlash--;
5818 			}
5819 
5820 			if (!leaf[0]
5821 				|| !strcmp(leaf, ".")) {
5822 				// "name/" -> "name", or "name/." -> "name"
5823 				lastSlash[0] = '\0';
5824 			}
5825 		}
5826 
5827 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5828 			return B_NOT_ALLOWED;
5829 	}
5830 
5831 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5832 	if (status != B_OK)
5833 		return status;
5834 
5835 	if (HAS_FS_CALL(directory, remove_dir))
5836 		status = FS_CALL(directory, remove_dir, name);
5837 	else
5838 		status = B_READ_ONLY_DEVICE;
5839 
5840 	put_vnode(directory);
5841 	return status;
5842 }
5843 
5844 
5845 static status_t
5846 common_ioctl(struct file_descriptor* descriptor, uint32 op, void* buffer,
5847 	size_t length)
5848 {
5849 	struct vnode* vnode = descriptor->u.vnode;
5850 
5851 	if (HAS_FS_CALL(vnode, ioctl))
5852 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5853 
5854 	return B_NOT_SUPPORTED;
5855 }
5856 
5857 
5858 static status_t
5859 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5860 {
5861 	struct flock flock;
5862 
5863 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5864 		fd, op, argument, kernel ? "kernel" : "user"));
5865 
5866 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5867 		fd);
5868 	if (descriptor == NULL)
5869 		return B_FILE_ERROR;
5870 
5871 	struct vnode* vnode = fd_vnode(descriptor);
5872 
5873 	status_t status = B_OK;
5874 
5875 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5876 		if (descriptor->type != FDTYPE_FILE)
5877 			status = B_BAD_VALUE;
5878 		else if (user_memcpy(&flock, (struct flock*)argument,
5879 				sizeof(struct flock)) != B_OK)
5880 			status = B_BAD_ADDRESS;
5881 
5882 		if (status != B_OK) {
5883 			put_fd(descriptor);
5884 			return status;
5885 		}
5886 	}
5887 
5888 	switch (op) {
5889 		case F_SETFD:
5890 		{
5891 			struct io_context* context = get_current_io_context(kernel);
5892 			// Set file descriptor flags
5893 
5894 			// O_CLOEXEC is the only flag available at this time
5895 			mutex_lock(&context->io_mutex);
5896 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5897 			mutex_unlock(&context->io_mutex);
5898 
5899 			status = B_OK;
5900 			break;
5901 		}
5902 
5903 		case F_GETFD:
5904 		{
5905 			struct io_context* context = get_current_io_context(kernel);
5906 
5907 			// Get file descriptor flags
5908 			mutex_lock(&context->io_mutex);
5909 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5910 			mutex_unlock(&context->io_mutex);
5911 			break;
5912 		}
5913 
5914 		case F_SETFL:
5915 			// Set file descriptor open mode
5916 
5917 			// we only accept changes to O_APPEND and O_NONBLOCK
5918 			argument &= O_APPEND | O_NONBLOCK;
5919 			if (descriptor->ops->fd_set_flags != NULL) {
5920 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5921 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5922 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5923 					(int)argument);
5924 			} else
5925 				status = B_NOT_SUPPORTED;
5926 
5927 			if (status == B_OK) {
5928 				// update this descriptor's open_mode field
5929 				descriptor->open_mode = (descriptor->open_mode
5930 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5931 			}
5932 
5933 			break;
5934 
5935 		case F_GETFL:
5936 			// Get file descriptor open mode
5937 			status = descriptor->open_mode;
5938 			break;
5939 
5940 		case F_DUPFD:
5941 		{
5942 			struct io_context* context = get_current_io_context(kernel);
5943 
5944 			status = new_fd_etc(context, descriptor, (int)argument);
5945 			if (status >= 0) {
5946 				mutex_lock(&context->io_mutex);
5947 				fd_set_close_on_exec(context, fd, false);
5948 				mutex_unlock(&context->io_mutex);
5949 
5950 				atomic_add(&descriptor->ref_count, 1);
5951 			}
5952 			break;
5953 		}
5954 
5955 		case F_GETLK:
5956 			if (vnode != NULL) {
5957 				status = get_advisory_lock(vnode, &flock);
5958 				if (status == B_OK) {
5959 					// copy back flock structure
5960 					status = user_memcpy((struct flock*)argument, &flock,
5961 						sizeof(struct flock));
5962 				}
5963 			} else
5964 				status = B_BAD_VALUE;
5965 			break;
5966 
5967 		case F_SETLK:
5968 		case F_SETLKW:
5969 			status = normalize_flock(descriptor, &flock);
5970 			if (status != B_OK)
5971 				break;
5972 
5973 			if (vnode == NULL) {
5974 				status = B_BAD_VALUE;
5975 			} else if (flock.l_type == F_UNLCK) {
5976 				status = release_advisory_lock(vnode, &flock);
5977 			} else {
5978 				// the open mode must match the lock type
5979 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5980 						&& flock.l_type == F_WRLCK)
5981 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5982 						&& flock.l_type == F_RDLCK))
5983 					status = B_FILE_ERROR;
5984 				else {
5985 					status = acquire_advisory_lock(vnode, -1,
5986 						&flock, op == F_SETLKW);
5987 				}
5988 			}
5989 			break;
5990 
5991 		// ToDo: add support for more ops?
5992 
5993 		default:
5994 			status = B_BAD_VALUE;
5995 	}
5996 
5997 	put_fd(descriptor);
5998 	return status;
5999 }
6000 
6001 
6002 static status_t
6003 common_sync(int fd, bool kernel)
6004 {
6005 	struct file_descriptor* descriptor;
6006 	struct vnode* vnode;
6007 	status_t status;
6008 
6009 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6010 
6011 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6012 	if (descriptor == NULL)
6013 		return B_FILE_ERROR;
6014 
6015 	if (HAS_FS_CALL(vnode, fsync))
6016 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6017 	else
6018 		status = B_NOT_SUPPORTED;
6019 
6020 	put_fd(descriptor);
6021 	return status;
6022 }
6023 
6024 
6025 static status_t
6026 common_lock_node(int fd, bool kernel)
6027 {
6028 	struct file_descriptor* descriptor;
6029 	struct vnode* vnode;
6030 
6031 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6032 	if (descriptor == NULL)
6033 		return B_FILE_ERROR;
6034 
6035 	status_t status = B_OK;
6036 
6037 	// We need to set the locking atomically - someone
6038 	// else might set one at the same time
6039 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6040 			(file_descriptor*)NULL) != NULL)
6041 		status = B_BUSY;
6042 
6043 	put_fd(descriptor);
6044 	return status;
6045 }
6046 
6047 
6048 static status_t
6049 common_unlock_node(int fd, bool kernel)
6050 {
6051 	struct file_descriptor* descriptor;
6052 	struct vnode* vnode;
6053 
6054 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6055 	if (descriptor == NULL)
6056 		return B_FILE_ERROR;
6057 
6058 	status_t status = B_OK;
6059 
6060 	// We need to set the locking atomically - someone
6061 	// else might set one at the same time
6062 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6063 			(file_descriptor*)NULL, descriptor) != descriptor)
6064 		status = B_BAD_VALUE;
6065 
6066 	put_fd(descriptor);
6067 	return status;
6068 }
6069 
6070 
6071 static status_t
6072 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6073 	bool kernel)
6074 {
6075 	struct vnode* vnode;
6076 	status_t status;
6077 
6078 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6079 	if (status != B_OK)
6080 		return status;
6081 
6082 	if (HAS_FS_CALL(vnode, read_symlink)) {
6083 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6084 	} else
6085 		status = B_BAD_VALUE;
6086 
6087 	put_vnode(vnode);
6088 	return status;
6089 }
6090 
6091 
6092 static status_t
6093 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6094 	bool kernel)
6095 {
6096 	// path validity checks have to be in the calling function!
6097 	char name[B_FILE_NAME_LENGTH];
6098 	struct vnode* vnode;
6099 	status_t status;
6100 
6101 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6102 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6103 
6104 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6105 	if (status != B_OK)
6106 		return status;
6107 
6108 	if (HAS_FS_CALL(vnode, create_symlink))
6109 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6110 	else {
6111 		status = HAS_FS_CALL(vnode, write)
6112 			? B_NOT_SUPPORTED : B_READ_ONLY_DEVICE;
6113 	}
6114 
6115 	put_vnode(vnode);
6116 
6117 	return status;
6118 }
6119 
6120 
6121 static status_t
6122 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6123 	bool traverseLeafLink, bool kernel)
6124 {
6125 	// path validity checks have to be in the calling function!
6126 
6127 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6128 		toPath, kernel));
6129 
6130 	char name[B_FILE_NAME_LENGTH];
6131 	struct vnode* directory;
6132 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6133 		kernel);
6134 	if (status != B_OK)
6135 		return status;
6136 
6137 	struct vnode* vnode;
6138 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6139 		kernel);
6140 	if (status != B_OK)
6141 		goto err;
6142 
6143 	if (directory->mount != vnode->mount) {
6144 		status = B_CROSS_DEVICE_LINK;
6145 		goto err1;
6146 	}
6147 
6148 	if (HAS_FS_CALL(directory, link))
6149 		status = FS_CALL(directory, link, name, vnode);
6150 	else
6151 		status = B_READ_ONLY_DEVICE;
6152 
6153 err1:
6154 	put_vnode(vnode);
6155 err:
6156 	put_vnode(directory);
6157 
6158 	return status;
6159 }
6160 
6161 
6162 static status_t
6163 common_unlink(int fd, char* path, bool kernel)
6164 {
6165 	char filename[B_FILE_NAME_LENGTH];
6166 	struct vnode* vnode;
6167 	status_t status;
6168 
6169 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6170 		kernel));
6171 
6172 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6173 	if (status < 0)
6174 		return status;
6175 
6176 	if (HAS_FS_CALL(vnode, unlink))
6177 		status = FS_CALL(vnode, unlink, filename);
6178 	else
6179 		status = B_READ_ONLY_DEVICE;
6180 
6181 	put_vnode(vnode);
6182 
6183 	return status;
6184 }
6185 
6186 
6187 static status_t
6188 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6189 {
6190 	struct vnode* vnode;
6191 	status_t status;
6192 
6193 	// TODO: honor effectiveUserGroup argument
6194 
6195 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6196 	if (status != B_OK)
6197 		return status;
6198 
6199 	if (HAS_FS_CALL(vnode, access))
6200 		status = FS_CALL(vnode, access, mode);
6201 	else
6202 		status = B_OK;
6203 
6204 	put_vnode(vnode);
6205 
6206 	return status;
6207 }
6208 
6209 
6210 static status_t
6211 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6212 {
6213 	struct vnode* fromVnode;
6214 	struct vnode* toVnode;
6215 	char fromName[B_FILE_NAME_LENGTH];
6216 	char toName[B_FILE_NAME_LENGTH];
6217 	status_t status;
6218 
6219 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6220 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6221 
6222 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6223 	if (status != B_OK)
6224 		return status;
6225 
6226 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6227 	if (status != B_OK)
6228 		goto err1;
6229 
6230 	if (fromVnode->device != toVnode->device) {
6231 		status = B_CROSS_DEVICE_LINK;
6232 		goto err2;
6233 	}
6234 
6235 	if (fromName[0] == '\0' || toName == '\0'
6236 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6237 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6238 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6239 		status = B_BAD_VALUE;
6240 		goto err2;
6241 	}
6242 
6243 	if (HAS_FS_CALL(fromVnode, rename))
6244 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6245 	else
6246 		status = B_READ_ONLY_DEVICE;
6247 
6248 err2:
6249 	put_vnode(toVnode);
6250 err1:
6251 	put_vnode(fromVnode);
6252 
6253 	return status;
6254 }
6255 
6256 
6257 static status_t
6258 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6259 {
6260 	struct vnode* vnode = descriptor->u.vnode;
6261 
6262 	FUNCTION(("common_read_stat: stat %p\n", stat));
6263 
6264 	// TODO: remove this once all file systems properly set them!
6265 	stat->st_crtim.tv_nsec = 0;
6266 	stat->st_ctim.tv_nsec = 0;
6267 	stat->st_mtim.tv_nsec = 0;
6268 	stat->st_atim.tv_nsec = 0;
6269 
6270 	status_t status = FS_CALL(vnode, read_stat, stat);
6271 
6272 	// fill in the st_dev and st_ino fields
6273 	if (status == B_OK) {
6274 		stat->st_dev = vnode->device;
6275 		stat->st_ino = vnode->id;
6276 		stat->st_rdev = -1;
6277 	}
6278 
6279 	return status;
6280 }
6281 
6282 
6283 static status_t
6284 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6285 	int statMask)
6286 {
6287 	struct vnode* vnode = descriptor->u.vnode;
6288 
6289 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6290 		vnode, stat, statMask));
6291 
6292 	if (!HAS_FS_CALL(vnode, write_stat))
6293 		return B_READ_ONLY_DEVICE;
6294 
6295 	return FS_CALL(vnode, write_stat, stat, statMask);
6296 }
6297 
6298 
6299 static status_t
6300 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6301 	struct stat* stat, bool kernel)
6302 {
6303 	struct vnode* vnode;
6304 	status_t status;
6305 
6306 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6307 		stat));
6308 
6309 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6310 		kernel);
6311 	if (status < 0)
6312 		return status;
6313 
6314 	status = FS_CALL(vnode, read_stat, stat);
6315 
6316 	// fill in the st_dev and st_ino fields
6317 	if (status == B_OK) {
6318 		stat->st_dev = vnode->device;
6319 		stat->st_ino = vnode->id;
6320 		stat->st_rdev = -1;
6321 	}
6322 
6323 	put_vnode(vnode);
6324 	return status;
6325 }
6326 
6327 
6328 static status_t
6329 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6330 	const struct stat* stat, int statMask, bool kernel)
6331 {
6332 	struct vnode* vnode;
6333 	status_t status;
6334 
6335 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6336 		"kernel %d\n", fd, path, stat, statMask, kernel));
6337 
6338 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6339 		kernel);
6340 	if (status < 0)
6341 		return status;
6342 
6343 	if (HAS_FS_CALL(vnode, write_stat))
6344 		status = FS_CALL(vnode, write_stat, stat, statMask);
6345 	else
6346 		status = B_READ_ONLY_DEVICE;
6347 
6348 	put_vnode(vnode);
6349 
6350 	return status;
6351 }
6352 
6353 
6354 static int
6355 attr_dir_open(int fd, char* path, bool kernel)
6356 {
6357 	struct vnode* vnode;
6358 	int status;
6359 
6360 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6361 		kernel));
6362 
6363 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6364 	if (status != B_OK)
6365 		return status;
6366 
6367 	status = open_attr_dir_vnode(vnode, kernel);
6368 	if (status < 0)
6369 		put_vnode(vnode);
6370 
6371 	return status;
6372 }
6373 
6374 
6375 static status_t
6376 attr_dir_close(struct file_descriptor* descriptor)
6377 {
6378 	struct vnode* vnode = descriptor->u.vnode;
6379 
6380 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6381 
6382 	if (HAS_FS_CALL(vnode, close_attr_dir))
6383 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6384 
6385 	return B_OK;
6386 }
6387 
6388 
6389 static void
6390 attr_dir_free_fd(struct file_descriptor* descriptor)
6391 {
6392 	struct vnode* vnode = descriptor->u.vnode;
6393 
6394 	if (vnode != NULL) {
6395 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6396 		put_vnode(vnode);
6397 	}
6398 }
6399 
6400 
6401 static status_t
6402 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6403 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6404 {
6405 	struct vnode* vnode = descriptor->u.vnode;
6406 
6407 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6408 
6409 	if (HAS_FS_CALL(vnode, read_attr_dir))
6410 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6411 			bufferSize, _count);
6412 
6413 	return B_NOT_SUPPORTED;
6414 }
6415 
6416 
6417 static status_t
6418 attr_dir_rewind(struct file_descriptor* descriptor)
6419 {
6420 	struct vnode* vnode = descriptor->u.vnode;
6421 
6422 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6423 
6424 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6425 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6426 
6427 	return B_NOT_SUPPORTED;
6428 }
6429 
6430 
6431 static int
6432 attr_create(int fd, char* path, const char* name, uint32 type,
6433 	int openMode, bool kernel)
6434 {
6435 	if (name == NULL || *name == '\0')
6436 		return B_BAD_VALUE;
6437 
6438 	struct vnode* vnode;
6439 	status_t status = fd_and_path_to_vnode(fd, path,
6440 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6441 	if (status != B_OK)
6442 		return status;
6443 
6444 	if (!HAS_FS_CALL(vnode, create_attr)) {
6445 		status = B_READ_ONLY_DEVICE;
6446 		goto err;
6447 	}
6448 
6449 	void* cookie;
6450 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6451 	if (status != B_OK)
6452 		goto err;
6453 
6454 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6455 	if (fd >= 0)
6456 		return fd;
6457 
6458 	status = fd;
6459 
6460 	FS_CALL(vnode, close_attr, cookie);
6461 	FS_CALL(vnode, free_attr_cookie, cookie);
6462 
6463 	FS_CALL(vnode, remove_attr, name);
6464 
6465 err:
6466 	put_vnode(vnode);
6467 
6468 	return status;
6469 }
6470 
6471 
6472 static int
6473 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6474 {
6475 	if (name == NULL || *name == '\0')
6476 		return B_BAD_VALUE;
6477 
6478 	struct vnode* vnode;
6479 	status_t status = fd_and_path_to_vnode(fd, path,
6480 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6481 	if (status != B_OK)
6482 		return status;
6483 
6484 	if (!HAS_FS_CALL(vnode, open_attr)) {
6485 		status = B_NOT_SUPPORTED;
6486 		goto err;
6487 	}
6488 
6489 	void* cookie;
6490 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6491 	if (status != B_OK)
6492 		goto err;
6493 
6494 	// now we only need a file descriptor for this attribute and we're done
6495 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6496 	if (fd >= 0)
6497 		return fd;
6498 
6499 	status = fd;
6500 
6501 	FS_CALL(vnode, close_attr, cookie);
6502 	FS_CALL(vnode, free_attr_cookie, cookie);
6503 
6504 err:
6505 	put_vnode(vnode);
6506 
6507 	return status;
6508 }
6509 
6510 
6511 static status_t
6512 attr_close(struct file_descriptor* descriptor)
6513 {
6514 	struct vnode* vnode = descriptor->u.vnode;
6515 
6516 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6517 
6518 	if (HAS_FS_CALL(vnode, close_attr))
6519 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6520 
6521 	return B_OK;
6522 }
6523 
6524 
6525 static void
6526 attr_free_fd(struct file_descriptor* descriptor)
6527 {
6528 	struct vnode* vnode = descriptor->u.vnode;
6529 
6530 	if (vnode != NULL) {
6531 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6532 		put_vnode(vnode);
6533 	}
6534 }
6535 
6536 
6537 static status_t
6538 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6539 	size_t* length)
6540 {
6541 	struct vnode* vnode = descriptor->u.vnode;
6542 
6543 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6544 		*length));
6545 
6546 	if (!HAS_FS_CALL(vnode, read_attr))
6547 		return B_NOT_SUPPORTED;
6548 
6549 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6550 }
6551 
6552 
6553 static status_t
6554 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6555 	size_t* length)
6556 {
6557 	struct vnode* vnode = descriptor->u.vnode;
6558 
6559 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6560 	if (!HAS_FS_CALL(vnode, write_attr))
6561 		return B_NOT_SUPPORTED;
6562 
6563 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6564 }
6565 
6566 
6567 static off_t
6568 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6569 {
6570 	off_t offset;
6571 
6572 	switch (seekType) {
6573 		case SEEK_SET:
6574 			offset = 0;
6575 			break;
6576 		case SEEK_CUR:
6577 			offset = descriptor->pos;
6578 			break;
6579 		case SEEK_END:
6580 		{
6581 			struct vnode* vnode = descriptor->u.vnode;
6582 			if (!HAS_FS_CALL(vnode, read_stat))
6583 				return B_NOT_SUPPORTED;
6584 
6585 			struct stat stat;
6586 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6587 				&stat);
6588 			if (status != B_OK)
6589 				return status;
6590 
6591 			offset = stat.st_size;
6592 			break;
6593 		}
6594 		default:
6595 			return B_BAD_VALUE;
6596 	}
6597 
6598 	// assumes off_t is 64 bits wide
6599 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6600 		return B_BUFFER_OVERFLOW;
6601 
6602 	pos += offset;
6603 	if (pos < 0)
6604 		return B_BAD_VALUE;
6605 
6606 	return descriptor->pos = pos;
6607 }
6608 
6609 
6610 static status_t
6611 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6612 {
6613 	struct vnode* vnode = descriptor->u.vnode;
6614 
6615 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6616 
6617 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6618 		return B_NOT_SUPPORTED;
6619 
6620 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6621 }
6622 
6623 
6624 static status_t
6625 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6626 	int statMask)
6627 {
6628 	struct vnode* vnode = descriptor->u.vnode;
6629 
6630 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6631 
6632 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6633 		return B_READ_ONLY_DEVICE;
6634 
6635 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6636 }
6637 
6638 
6639 static status_t
6640 attr_remove(int fd, const char* name, bool kernel)
6641 {
6642 	struct file_descriptor* descriptor;
6643 	struct vnode* vnode;
6644 	status_t status;
6645 
6646 	if (name == NULL || *name == '\0')
6647 		return B_BAD_VALUE;
6648 
6649 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6650 		kernel));
6651 
6652 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6653 	if (descriptor == NULL)
6654 		return B_FILE_ERROR;
6655 
6656 	if (HAS_FS_CALL(vnode, remove_attr))
6657 		status = FS_CALL(vnode, remove_attr, name);
6658 	else
6659 		status = B_READ_ONLY_DEVICE;
6660 
6661 	put_fd(descriptor);
6662 
6663 	return status;
6664 }
6665 
6666 
6667 static status_t
6668 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6669 	bool kernel)
6670 {
6671 	struct file_descriptor* fromDescriptor;
6672 	struct file_descriptor* toDescriptor;
6673 	struct vnode* fromVnode;
6674 	struct vnode* toVnode;
6675 	status_t status;
6676 
6677 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6678 		|| *toName == '\0')
6679 		return B_BAD_VALUE;
6680 
6681 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6682 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6683 
6684 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6685 	if (fromDescriptor == NULL)
6686 		return B_FILE_ERROR;
6687 
6688 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6689 	if (toDescriptor == NULL) {
6690 		status = B_FILE_ERROR;
6691 		goto err;
6692 	}
6693 
6694 	// are the files on the same volume?
6695 	if (fromVnode->device != toVnode->device) {
6696 		status = B_CROSS_DEVICE_LINK;
6697 		goto err1;
6698 	}
6699 
6700 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6701 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6702 	} else
6703 		status = B_READ_ONLY_DEVICE;
6704 
6705 err1:
6706 	put_fd(toDescriptor);
6707 err:
6708 	put_fd(fromDescriptor);
6709 
6710 	return status;
6711 }
6712 
6713 
6714 static int
6715 index_dir_open(dev_t mountID, bool kernel)
6716 {
6717 	struct fs_mount* mount;
6718 	void* cookie;
6719 
6720 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6721 
6722 	status_t status = get_mount(mountID, &mount);
6723 	if (status != B_OK)
6724 		return status;
6725 
6726 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6727 		status = B_NOT_SUPPORTED;
6728 		goto error;
6729 	}
6730 
6731 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6732 	if (status != B_OK)
6733 		goto error;
6734 
6735 	// get fd for the index directory
6736 	int fd;
6737 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6738 	if (fd >= 0)
6739 		return fd;
6740 
6741 	// something went wrong
6742 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6743 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6744 
6745 	status = fd;
6746 
6747 error:
6748 	put_mount(mount);
6749 	return status;
6750 }
6751 
6752 
6753 static status_t
6754 index_dir_close(struct file_descriptor* descriptor)
6755 {
6756 	struct fs_mount* mount = descriptor->u.mount;
6757 
6758 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6759 
6760 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6761 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6762 
6763 	return B_OK;
6764 }
6765 
6766 
6767 static void
6768 index_dir_free_fd(struct file_descriptor* descriptor)
6769 {
6770 	struct fs_mount* mount = descriptor->u.mount;
6771 
6772 	if (mount != NULL) {
6773 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6774 		put_mount(mount);
6775 	}
6776 }
6777 
6778 
6779 static status_t
6780 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6781 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6782 {
6783 	struct fs_mount* mount = descriptor->u.mount;
6784 
6785 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6786 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6787 			bufferSize, _count);
6788 	}
6789 
6790 	return B_NOT_SUPPORTED;
6791 }
6792 
6793 
6794 static status_t
6795 index_dir_rewind(struct file_descriptor* descriptor)
6796 {
6797 	struct fs_mount* mount = descriptor->u.mount;
6798 
6799 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6800 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6801 
6802 	return B_NOT_SUPPORTED;
6803 }
6804 
6805 
6806 static status_t
6807 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6808 	bool kernel)
6809 {
6810 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6811 		name, kernel));
6812 
6813 	struct fs_mount* mount;
6814 	status_t status = get_mount(mountID, &mount);
6815 	if (status != B_OK)
6816 		return status;
6817 
6818 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6819 		status = B_READ_ONLY_DEVICE;
6820 		goto out;
6821 	}
6822 
6823 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6824 
6825 out:
6826 	put_mount(mount);
6827 	return status;
6828 }
6829 
6830 
6831 #if 0
6832 static status_t
6833 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6834 {
6835 	struct vnode* vnode = descriptor->u.vnode;
6836 
6837 	// ToDo: currently unused!
6838 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6839 	if (!HAS_FS_CALL(vnode, read_index_stat))
6840 		return B_NOT_SUPPORTED;
6841 
6842 	return B_NOT_SUPPORTED;
6843 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6844 }
6845 
6846 
6847 static void
6848 index_free_fd(struct file_descriptor* descriptor)
6849 {
6850 	struct vnode* vnode = descriptor->u.vnode;
6851 
6852 	if (vnode != NULL) {
6853 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6854 		put_vnode(vnode);
6855 	}
6856 }
6857 #endif
6858 
6859 
6860 static status_t
6861 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6862 	bool kernel)
6863 {
6864 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6865 		name, kernel));
6866 
6867 	struct fs_mount* mount;
6868 	status_t status = get_mount(mountID, &mount);
6869 	if (status != B_OK)
6870 		return status;
6871 
6872 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6873 		status = B_NOT_SUPPORTED;
6874 		goto out;
6875 	}
6876 
6877 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6878 
6879 out:
6880 	put_mount(mount);
6881 	return status;
6882 }
6883 
6884 
6885 static status_t
6886 index_remove(dev_t mountID, const char* name, bool kernel)
6887 {
6888 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6889 		name, kernel));
6890 
6891 	struct fs_mount* mount;
6892 	status_t status = get_mount(mountID, &mount);
6893 	if (status != B_OK)
6894 		return status;
6895 
6896 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6897 		status = B_READ_ONLY_DEVICE;
6898 		goto out;
6899 	}
6900 
6901 	status = FS_MOUNT_CALL(mount, remove_index, name);
6902 
6903 out:
6904 	put_mount(mount);
6905 	return status;
6906 }
6907 
6908 
6909 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6910 		It would be nice if the FS would find some more kernel support
6911 		for them.
6912 		For example, query parsing should be moved into the kernel.
6913 */
6914 static int
6915 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6916 	int32 token, bool kernel)
6917 {
6918 	struct fs_mount* mount;
6919 	void* cookie;
6920 
6921 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6922 		query, kernel));
6923 
6924 	status_t status = get_mount(device, &mount);
6925 	if (status != B_OK)
6926 		return status;
6927 
6928 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6929 		status = B_NOT_SUPPORTED;
6930 		goto error;
6931 	}
6932 
6933 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6934 		&cookie);
6935 	if (status != B_OK)
6936 		goto error;
6937 
6938 	// get fd for the index directory
6939 	int fd;
6940 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
6941 	if (fd >= 0)
6942 		return fd;
6943 
6944 	status = fd;
6945 
6946 	// something went wrong
6947 	FS_MOUNT_CALL(mount, close_query, cookie);
6948 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6949 
6950 error:
6951 	put_mount(mount);
6952 	return status;
6953 }
6954 
6955 
6956 static status_t
6957 query_close(struct file_descriptor* descriptor)
6958 {
6959 	struct fs_mount* mount = descriptor->u.mount;
6960 
6961 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6962 
6963 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6964 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6965 
6966 	return B_OK;
6967 }
6968 
6969 
6970 static void
6971 query_free_fd(struct file_descriptor* descriptor)
6972 {
6973 	struct fs_mount* mount = descriptor->u.mount;
6974 
6975 	if (mount != NULL) {
6976 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6977 		put_mount(mount);
6978 	}
6979 }
6980 
6981 
6982 static status_t
6983 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6984 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6985 {
6986 	struct fs_mount* mount = descriptor->u.mount;
6987 
6988 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6989 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6990 			bufferSize, _count);
6991 	}
6992 
6993 	return B_NOT_SUPPORTED;
6994 }
6995 
6996 
6997 static status_t
6998 query_rewind(struct file_descriptor* descriptor)
6999 {
7000 	struct fs_mount* mount = descriptor->u.mount;
7001 
7002 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7003 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7004 
7005 	return B_NOT_SUPPORTED;
7006 }
7007 
7008 
7009 //	#pragma mark - General File System functions
7010 
7011 
7012 static dev_t
7013 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7014 	const char* args, bool kernel)
7015 {
7016 	struct ::fs_mount* mount;
7017 	status_t status = B_OK;
7018 	fs_volume* volume = NULL;
7019 	int32 layer = 0;
7020 
7021 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
7022 
7023 	// The path is always safe, we just have to make sure that fsName is
7024 	// almost valid - we can't make any assumptions about args, though.
7025 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7026 	// We'll get it from the DDM later.
7027 	if (fsName == NULL) {
7028 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7029 			return B_BAD_VALUE;
7030 	} else if (fsName[0] == '\0')
7031 		return B_BAD_VALUE;
7032 
7033 	RecursiveLocker mountOpLocker(sMountOpLock);
7034 
7035 	// Helper to delete a newly created file device on failure.
7036 	// Not exactly beautiful, but helps to keep the code below cleaner.
7037 	struct FileDeviceDeleter {
7038 		FileDeviceDeleter() : id(-1) {}
7039 		~FileDeviceDeleter()
7040 		{
7041 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7042 		}
7043 
7044 		partition_id id;
7045 	} fileDeviceDeleter;
7046 
7047 	// If the file system is not a "virtual" one, the device argument should
7048 	// point to a real file/device (if given at all).
7049 	// get the partition
7050 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7051 	KPartition* partition = NULL;
7052 	KPath normalizedDevice;
7053 	bool newlyCreatedFileDevice = false;
7054 
7055 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7056 		// normalize the device path
7057 		status = normalizedDevice.SetTo(device, true);
7058 		if (status != B_OK)
7059 			return status;
7060 
7061 		// get a corresponding partition from the DDM
7062 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7063 		if (partition == NULL) {
7064 			// Partition not found: This either means, the user supplied
7065 			// an invalid path, or the path refers to an image file. We try
7066 			// to let the DDM create a file device for the path.
7067 			partition_id deviceID = ddm->CreateFileDevice(
7068 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7069 			if (deviceID >= 0) {
7070 				partition = ddm->RegisterPartition(deviceID);
7071 				if (newlyCreatedFileDevice)
7072 					fileDeviceDeleter.id = deviceID;
7073 			}
7074 		}
7075 
7076 		if (!partition) {
7077 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7078 				normalizedDevice.Path()));
7079 			return B_ENTRY_NOT_FOUND;
7080 		}
7081 
7082 		device = normalizedDevice.Path();
7083 			// correct path to file device
7084 	}
7085 	PartitionRegistrar partitionRegistrar(partition, true);
7086 
7087 	// Write lock the partition's device. For the time being, we keep the lock
7088 	// until we're done mounting -- not nice, but ensure, that no-one is
7089 	// interfering.
7090 	// TODO: Just mark the partition busy while mounting!
7091 	KDiskDevice* diskDevice = NULL;
7092 	if (partition) {
7093 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7094 		if (!diskDevice) {
7095 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7096 			return B_ERROR;
7097 		}
7098 	}
7099 
7100 	DeviceWriteLocker writeLocker(diskDevice, true);
7101 		// this takes over the write lock acquired before
7102 
7103 	if (partition != NULL) {
7104 		// make sure, that the partition is not busy
7105 		if (partition->IsBusy()) {
7106 			TRACE(("fs_mount(): Partition is busy.\n"));
7107 			return B_BUSY;
7108 		}
7109 
7110 		// if no FS name had been supplied, we get it from the partition
7111 		if (fsName == NULL) {
7112 			KDiskSystem* diskSystem = partition->DiskSystem();
7113 			if (!diskSystem) {
7114 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7115 					"recognize it.\n"));
7116 				return B_BAD_VALUE;
7117 			}
7118 
7119 			if (!diskSystem->IsFileSystem()) {
7120 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7121 					"partitioning system.\n"));
7122 				return B_BAD_VALUE;
7123 			}
7124 
7125 			// The disk system name will not change, and the KDiskSystem
7126 			// object will not go away while the disk device is locked (and
7127 			// the partition has a reference to it), so this is safe.
7128 			fsName = diskSystem->Name();
7129 		}
7130 	}
7131 
7132 	mount = new(std::nothrow) (struct ::fs_mount);
7133 	if (mount == NULL)
7134 		return B_NO_MEMORY;
7135 
7136 	mount->device_name = strdup(device);
7137 		// "device" can be NULL
7138 
7139 	status = mount->entry_cache.Init();
7140 	if (status != B_OK)
7141 		goto err1;
7142 
7143 	// initialize structure
7144 	mount->id = sNextMountID++;
7145 	mount->partition = NULL;
7146 	mount->root_vnode = NULL;
7147 	mount->covers_vnode = NULL;
7148 	mount->unmounting = false;
7149 	mount->owns_file_device = false;
7150 	mount->volume = NULL;
7151 
7152 	// build up the volume(s)
7153 	while (true) {
7154 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7155 		if (layerFSName == NULL) {
7156 			if (layer == 0) {
7157 				status = B_NO_MEMORY;
7158 				goto err1;
7159 			}
7160 
7161 			break;
7162 		}
7163 
7164 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7165 		if (volume == NULL) {
7166 			status = B_NO_MEMORY;
7167 			free(layerFSName);
7168 			goto err1;
7169 		}
7170 
7171 		volume->id = mount->id;
7172 		volume->partition = partition != NULL ? partition->ID() : -1;
7173 		volume->layer = layer++;
7174 		volume->private_volume = NULL;
7175 		volume->ops = NULL;
7176 		volume->sub_volume = NULL;
7177 		volume->super_volume = NULL;
7178 		volume->file_system = NULL;
7179 		volume->file_system_name = NULL;
7180 
7181 		volume->file_system_name = get_file_system_name(layerFSName);
7182 		if (volume->file_system_name == NULL) {
7183 			status = B_NO_MEMORY;
7184 			free(layerFSName);
7185 			free(volume);
7186 			goto err1;
7187 		}
7188 
7189 		volume->file_system = get_file_system(layerFSName);
7190 		if (volume->file_system == NULL) {
7191 			status = B_DEVICE_NOT_FOUND;
7192 			free(layerFSName);
7193 			free(volume->file_system_name);
7194 			free(volume);
7195 			goto err1;
7196 		}
7197 
7198 		if (mount->volume == NULL)
7199 			mount->volume = volume;
7200 		else {
7201 			volume->super_volume = mount->volume;
7202 			mount->volume->sub_volume = volume;
7203 			mount->volume = volume;
7204 		}
7205 	}
7206 
7207 	// insert mount struct into list before we call FS's mount() function
7208 	// so that vnodes can be created for this mount
7209 	mutex_lock(&sMountMutex);
7210 	hash_insert(sMountsTable, mount);
7211 	mutex_unlock(&sMountMutex);
7212 
7213 	ino_t rootID;
7214 
7215 	if (!sRoot) {
7216 		// we haven't mounted anything yet
7217 		if (strcmp(path, "/") != 0) {
7218 			status = B_ERROR;
7219 			goto err2;
7220 		}
7221 
7222 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7223 			args, &rootID);
7224 		if (status != 0)
7225 			goto err2;
7226 	} else {
7227 		status = path_to_vnode(path, true, &mount->covers_vnode, NULL, kernel);
7228 		if (status != B_OK)
7229 			goto err2;
7230 
7231 		// make sure covered_vnode is a directory
7232 		if (!S_ISDIR(mount->covers_vnode->Type())) {
7233 			status = B_NOT_A_DIRECTORY;
7234 			goto err3;
7235 		}
7236 
7237 		if (mount->covers_vnode->mount->root_vnode == mount->covers_vnode) {
7238 			// this is already a mount point
7239 			status = B_BUSY;
7240 			goto err3;
7241 		}
7242 
7243 		// mount it/them
7244 		fs_volume* volume = mount->volume;
7245 		while (volume) {
7246 			status = volume->file_system->mount(volume, device, flags, args,
7247 				&rootID);
7248 			if (status != B_OK) {
7249 				if (volume->sub_volume)
7250 					goto err4;
7251 				goto err3;
7252 			}
7253 
7254 			volume = volume->super_volume;
7255 		}
7256 
7257 		volume = mount->volume;
7258 		while (volume) {
7259 			if (volume->ops->all_layers_mounted != NULL)
7260 				volume->ops->all_layers_mounted(volume);
7261 			volume = volume->super_volume;
7262 		}
7263 	}
7264 
7265 	// the root node is supposed to be owned by the file system - it must
7266 	// exist at this point
7267 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7268 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7269 		panic("fs_mount: file system does not own its root node!\n");
7270 		status = B_ERROR;
7271 		goto err4;
7272 	}
7273 
7274 	// No race here, since fs_mount() is the only function changing
7275 	// covers_vnode (and holds sMountOpLock at that time).
7276 	rw_lock_write_lock(&sVnodeLock);
7277 	if (mount->covers_vnode)
7278 		mount->covers_vnode->covered_by = mount->root_vnode;
7279 	rw_lock_write_unlock(&sVnodeLock);
7280 
7281 	if (!sRoot) {
7282 		sRoot = mount->root_vnode;
7283 		mutex_lock(&sIOContextRootLock);
7284 		get_current_io_context(true)->root = sRoot;
7285 		mutex_unlock(&sIOContextRootLock);
7286 		inc_vnode_ref_count(sRoot);
7287 	}
7288 
7289 	// supply the partition (if any) with the mount cookie and mark it mounted
7290 	if (partition) {
7291 		partition->SetMountCookie(mount->volume->private_volume);
7292 		partition->SetVolumeID(mount->id);
7293 
7294 		// keep a partition reference as long as the partition is mounted
7295 		partitionRegistrar.Detach();
7296 		mount->partition = partition;
7297 		mount->owns_file_device = newlyCreatedFileDevice;
7298 		fileDeviceDeleter.id = -1;
7299 	}
7300 
7301 	notify_mount(mount->id,
7302 		mount->covers_vnode ? mount->covers_vnode->device : -1,
7303 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7304 
7305 	return mount->id;
7306 
7307 err4:
7308 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7309 err3:
7310 	if (mount->covers_vnode != NULL)
7311 		put_vnode(mount->covers_vnode);
7312 err2:
7313 	mutex_lock(&sMountMutex);
7314 	hash_remove(sMountsTable, mount);
7315 	mutex_unlock(&sMountMutex);
7316 err1:
7317 	delete mount;
7318 
7319 	return status;
7320 }
7321 
7322 
7323 static status_t
7324 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7325 {
7326 	struct fs_mount* mount;
7327 	status_t err;
7328 
7329 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7330 		kernel));
7331 
7332 	struct vnode* pathVnode = NULL;
7333 	if (path != NULL) {
7334 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7335 		if (err != B_OK)
7336 			return B_ENTRY_NOT_FOUND;
7337 	}
7338 
7339 	RecursiveLocker mountOpLocker(sMountOpLock);
7340 
7341 	// this lock is not strictly necessary, but here in case of KDEBUG
7342 	// to keep the ASSERT in find_mount() working.
7343 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7344 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7345 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7346 	if (mount == NULL) {
7347 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7348 			pathVnode);
7349 	}
7350 
7351 	if (path != NULL) {
7352 		put_vnode(pathVnode);
7353 
7354 		if (mount->root_vnode != pathVnode) {
7355 			// not mountpoint
7356 			return B_BAD_VALUE;
7357 		}
7358 	}
7359 
7360 	// if the volume is associated with a partition, lock the device of the
7361 	// partition as long as we are unmounting
7362 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7363 	KPartition* partition = mount->partition;
7364 	KDiskDevice* diskDevice = NULL;
7365 	if (partition != NULL) {
7366 		if (partition->Device() == NULL) {
7367 			dprintf("fs_unmount(): There is no device!\n");
7368 			return B_ERROR;
7369 		}
7370 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7371 		if (!diskDevice) {
7372 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7373 			return B_ERROR;
7374 		}
7375 	}
7376 	DeviceWriteLocker writeLocker(diskDevice, true);
7377 
7378 	// make sure, that the partition is not busy
7379 	if (partition != NULL) {
7380 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7381 			TRACE(("fs_unmount(): Partition is busy.\n"));
7382 			return B_BUSY;
7383 		}
7384 	}
7385 
7386 	// grab the vnode master mutex to keep someone from creating
7387 	// a vnode while we're figuring out if we can continue
7388 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7389 
7390 	bool disconnectedDescriptors = false;
7391 
7392 	while (true) {
7393 		bool busy = false;
7394 
7395 		// cycle through the list of vnodes associated with this mount and
7396 		// make sure all of them are not busy or have refs on them
7397 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7398 		while (struct vnode* vnode = iterator.Next()) {
7399 			// The root vnode ref_count needs to be 1 here (the mount has a
7400 			// reference).
7401 			if (vnode->IsBusy()
7402 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7403 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7404 				// there are still vnodes in use on this mount, so we cannot
7405 				// unmount yet
7406 				busy = true;
7407 				break;
7408 			}
7409 		}
7410 
7411 		if (!busy)
7412 			break;
7413 
7414 		if ((flags & B_FORCE_UNMOUNT) == 0)
7415 			return B_BUSY;
7416 
7417 		if (disconnectedDescriptors) {
7418 			// wait a bit until the last access is finished, and then try again
7419 			vnodesWriteLocker.Unlock();
7420 			snooze(100000);
7421 			// TODO: if there is some kind of bug that prevents the ref counts
7422 			// from getting back to zero, this will fall into an endless loop...
7423 			vnodesWriteLocker.Lock();
7424 			continue;
7425 		}
7426 
7427 		// the file system is still busy - but we're forced to unmount it,
7428 		// so let's disconnect all open file descriptors
7429 
7430 		mount->unmounting = true;
7431 			// prevent new vnodes from being created
7432 
7433 		vnodesWriteLocker.Unlock();
7434 
7435 		disconnect_mount_or_vnode_fds(mount, NULL);
7436 		disconnectedDescriptors = true;
7437 
7438 		vnodesWriteLocker.Lock();
7439 	}
7440 
7441 	// we can safely continue, mark all of the vnodes busy and this mount
7442 	// structure in unmounting state
7443 	mount->unmounting = true;
7444 
7445 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7446 	while (struct vnode* vnode = iterator.Next()) {
7447 		vnode->SetBusy(true);
7448 		vnode_to_be_freed(vnode);
7449 	}
7450 
7451 	// The ref_count of the root node is 1 at this point, see above why this is
7452 	mount->root_vnode->ref_count--;
7453 	vnode_to_be_freed(mount->root_vnode);
7454 
7455 	mount->covers_vnode->covered_by = NULL;
7456 
7457 	vnodesWriteLocker.Unlock();
7458 
7459 	put_vnode(mount->covers_vnode);
7460 
7461 	// Free all vnodes associated with this mount.
7462 	// They will be removed from the mount list by free_vnode(), so
7463 	// we don't have to do this.
7464 	while (struct vnode* vnode = mount->vnodes.Head())
7465 		free_vnode(vnode, false);
7466 
7467 	// remove the mount structure from the hash table
7468 	mutex_lock(&sMountMutex);
7469 	hash_remove(sMountsTable, mount);
7470 	mutex_unlock(&sMountMutex);
7471 
7472 	mountOpLocker.Unlock();
7473 
7474 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7475 	notify_unmount(mount->id);
7476 
7477 	// dereference the partition and mark it unmounted
7478 	if (partition) {
7479 		partition->SetVolumeID(-1);
7480 		partition->SetMountCookie(NULL);
7481 
7482 		if (mount->owns_file_device)
7483 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7484 		partition->Unregister();
7485 	}
7486 
7487 	delete mount;
7488 	return B_OK;
7489 }
7490 
7491 
7492 static status_t
7493 fs_sync(dev_t device)
7494 {
7495 	struct fs_mount* mount;
7496 	status_t status = get_mount(device, &mount);
7497 	if (status != B_OK)
7498 		return status;
7499 
7500 	struct vnode marker;
7501 	memset(&marker, 0, sizeof(marker));
7502 	marker.SetBusy(true);
7503 	marker.SetRemoved(true);
7504 
7505 	// First, synchronize all file caches
7506 
7507 	while (true) {
7508 		WriteLocker locker(sVnodeLock);
7509 			// Note: That's the easy way. Which is probably OK for sync(),
7510 			// since it's a relatively rare call and doesn't need to allow for
7511 			// a lot of concurrency. Using a read lock would be possible, but
7512 			// also more involved, since we had to lock the individual nodes
7513 			// and take care of the locking order, which we might not want to
7514 			// do while holding fs_mount::rlock.
7515 
7516 		// synchronize access to vnode list
7517 		recursive_lock_lock(&mount->rlock);
7518 
7519 		struct vnode* vnode;
7520 		if (!marker.IsRemoved()) {
7521 			vnode = mount->vnodes.GetNext(&marker);
7522 			mount->vnodes.Remove(&marker);
7523 			marker.SetRemoved(true);
7524 		} else
7525 			vnode = mount->vnodes.First();
7526 
7527 		while (vnode != NULL && (vnode->cache == NULL
7528 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7529 			// TODO: we could track writes (and writable mapped vnodes)
7530 			//	and have a simple flag that we could test for here
7531 			vnode = mount->vnodes.GetNext(vnode);
7532 		}
7533 
7534 		if (vnode != NULL) {
7535 			// insert marker vnode again
7536 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7537 			marker.SetRemoved(false);
7538 		}
7539 
7540 		recursive_lock_unlock(&mount->rlock);
7541 
7542 		if (vnode == NULL)
7543 			break;
7544 
7545 		vnode = lookup_vnode(mount->id, vnode->id);
7546 		if (vnode == NULL || vnode->IsBusy())
7547 			continue;
7548 
7549 		if (vnode->ref_count == 0) {
7550 			// this vnode has been unused before
7551 			vnode_used(vnode);
7552 		}
7553 		inc_vnode_ref_count(vnode);
7554 
7555 		locker.Unlock();
7556 
7557 		if (vnode->cache != NULL && !vnode->IsRemoved())
7558 			vnode->cache->WriteModified();
7559 
7560 		put_vnode(vnode);
7561 	}
7562 
7563 	// And then, let the file systems do their synchronizing work
7564 
7565 	if (HAS_FS_MOUNT_CALL(mount, sync))
7566 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7567 
7568 	put_mount(mount);
7569 	return status;
7570 }
7571 
7572 
7573 static status_t
7574 fs_read_info(dev_t device, struct fs_info* info)
7575 {
7576 	struct fs_mount* mount;
7577 	status_t status = get_mount(device, &mount);
7578 	if (status != B_OK)
7579 		return status;
7580 
7581 	memset(info, 0, sizeof(struct fs_info));
7582 
7583 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7584 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7585 
7586 	// fill in info the file system doesn't (have to) know about
7587 	if (status == B_OK) {
7588 		info->dev = mount->id;
7589 		info->root = mount->root_vnode->id;
7590 
7591 		fs_volume* volume = mount->volume;
7592 		while (volume->super_volume != NULL)
7593 			volume = volume->super_volume;
7594 
7595 		strlcpy(info->fsh_name, volume->file_system_name,
7596 			sizeof(info->fsh_name));
7597 		if (mount->device_name != NULL) {
7598 			strlcpy(info->device_name, mount->device_name,
7599 				sizeof(info->device_name));
7600 		}
7601 	}
7602 
7603 	// if the call is not supported by the file system, there are still
7604 	// the parts that we filled out ourselves
7605 
7606 	put_mount(mount);
7607 	return status;
7608 }
7609 
7610 
7611 static status_t
7612 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7613 {
7614 	struct fs_mount* mount;
7615 	status_t status = get_mount(device, &mount);
7616 	if (status != B_OK)
7617 		return status;
7618 
7619 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7620 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7621 	else
7622 		status = B_READ_ONLY_DEVICE;
7623 
7624 	put_mount(mount);
7625 	return status;
7626 }
7627 
7628 
7629 static dev_t
7630 fs_next_device(int32* _cookie)
7631 {
7632 	struct fs_mount* mount = NULL;
7633 	dev_t device = *_cookie;
7634 
7635 	mutex_lock(&sMountMutex);
7636 
7637 	// Since device IDs are assigned sequentially, this algorithm
7638 	// does work good enough. It makes sure that the device list
7639 	// returned is sorted, and that no device is skipped when an
7640 	// already visited device got unmounted.
7641 
7642 	while (device < sNextMountID) {
7643 		mount = find_mount(device++);
7644 		if (mount != NULL && mount->volume->private_volume != NULL)
7645 			break;
7646 	}
7647 
7648 	*_cookie = device;
7649 
7650 	if (mount != NULL)
7651 		device = mount->id;
7652 	else
7653 		device = B_BAD_VALUE;
7654 
7655 	mutex_unlock(&sMountMutex);
7656 
7657 	return device;
7658 }
7659 
7660 
7661 ssize_t
7662 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7663 	void *buffer, size_t readBytes)
7664 {
7665 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7666 	if (attrFD < 0)
7667 		return attrFD;
7668 
7669 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7670 
7671 	_kern_close(attrFD);
7672 
7673 	return bytesRead;
7674 }
7675 
7676 
7677 static status_t
7678 get_cwd(char* buffer, size_t size, bool kernel)
7679 {
7680 	// Get current working directory from io context
7681 	struct io_context* context = get_current_io_context(kernel);
7682 	status_t status;
7683 
7684 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7685 
7686 	mutex_lock(&context->io_mutex);
7687 
7688 	struct vnode* vnode = context->cwd;
7689 	if (vnode)
7690 		inc_vnode_ref_count(vnode);
7691 
7692 	mutex_unlock(&context->io_mutex);
7693 
7694 	if (vnode) {
7695 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7696 		put_vnode(vnode);
7697 	} else
7698 		status = B_ERROR;
7699 
7700 	return status;
7701 }
7702 
7703 
7704 static status_t
7705 set_cwd(int fd, char* path, bool kernel)
7706 {
7707 	struct io_context* context;
7708 	struct vnode* vnode = NULL;
7709 	struct vnode* oldDirectory;
7710 	status_t status;
7711 
7712 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7713 
7714 	// Get vnode for passed path, and bail if it failed
7715 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7716 	if (status < 0)
7717 		return status;
7718 
7719 	if (!S_ISDIR(vnode->Type())) {
7720 		// nope, can't cwd to here
7721 		status = B_NOT_A_DIRECTORY;
7722 		goto err;
7723 	}
7724 
7725 	// Get current io context and lock
7726 	context = get_current_io_context(kernel);
7727 	mutex_lock(&context->io_mutex);
7728 
7729 	// save the old current working directory first
7730 	oldDirectory = context->cwd;
7731 	context->cwd = vnode;
7732 
7733 	mutex_unlock(&context->io_mutex);
7734 
7735 	if (oldDirectory)
7736 		put_vnode(oldDirectory);
7737 
7738 	return B_NO_ERROR;
7739 
7740 err:
7741 	put_vnode(vnode);
7742 	return status;
7743 }
7744 
7745 
7746 //	#pragma mark - kernel mirrored syscalls
7747 
7748 
7749 dev_t
7750 _kern_mount(const char* path, const char* device, const char* fsName,
7751 	uint32 flags, const char* args, size_t argsLength)
7752 {
7753 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7754 	if (pathBuffer.InitCheck() != B_OK)
7755 		return B_NO_MEMORY;
7756 
7757 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7758 }
7759 
7760 
7761 status_t
7762 _kern_unmount(const char* path, uint32 flags)
7763 {
7764 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7765 	if (pathBuffer.InitCheck() != B_OK)
7766 		return B_NO_MEMORY;
7767 
7768 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7769 }
7770 
7771 
7772 status_t
7773 _kern_read_fs_info(dev_t device, struct fs_info* info)
7774 {
7775 	if (info == NULL)
7776 		return B_BAD_VALUE;
7777 
7778 	return fs_read_info(device, info);
7779 }
7780 
7781 
7782 status_t
7783 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7784 {
7785 	if (info == NULL)
7786 		return B_BAD_VALUE;
7787 
7788 	return fs_write_info(device, info, mask);
7789 }
7790 
7791 
7792 status_t
7793 _kern_sync(void)
7794 {
7795 	// Note: _kern_sync() is also called from _user_sync()
7796 	int32 cookie = 0;
7797 	dev_t device;
7798 	while ((device = next_dev(&cookie)) >= 0) {
7799 		status_t status = fs_sync(device);
7800 		if (status != B_OK && status != B_BAD_VALUE) {
7801 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7802 				strerror(status));
7803 		}
7804 	}
7805 
7806 	return B_OK;
7807 }
7808 
7809 
7810 dev_t
7811 _kern_next_device(int32* _cookie)
7812 {
7813 	return fs_next_device(_cookie);
7814 }
7815 
7816 
7817 status_t
7818 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7819 	size_t infoSize)
7820 {
7821 	if (infoSize != sizeof(fd_info))
7822 		return B_BAD_VALUE;
7823 
7824 	struct io_context* context = NULL;
7825 	struct team* team = NULL;
7826 
7827 	cpu_status state = disable_interrupts();
7828 	GRAB_TEAM_LOCK();
7829 
7830 	bool contextLocked = false;
7831 	team = team_get_team_struct_locked(teamID);
7832 	if (team) {
7833 		// We cannot lock the IO context while holding the team lock, nor can
7834 		// we just drop the team lock, since it might be deleted in the
7835 		// meantime. team_remove_team() acquires the thread lock when removing
7836 		// the team from the team hash table, though. Hence we switch to the
7837 		// thread lock and use mutex_lock_threads_locked().
7838 		context = (io_context*)team->io_context;
7839 
7840 		GRAB_THREAD_LOCK();
7841 		RELEASE_TEAM_LOCK();
7842 		contextLocked = mutex_lock_threads_locked(&context->io_mutex) == B_OK;
7843 		RELEASE_THREAD_LOCK();
7844 	} else
7845 		RELEASE_TEAM_LOCK();
7846 
7847 	restore_interrupts(state);
7848 
7849 	if (!contextLocked) {
7850 		// team doesn't exit or seems to be gone
7851 		return B_BAD_TEAM_ID;
7852 	}
7853 
7854 	// the team cannot be deleted completely while we're owning its
7855 	// io_context mutex, so we can safely play with it now
7856 
7857 	uint32 slot = *_cookie;
7858 
7859 	struct file_descriptor* descriptor;
7860 	while (slot < context->table_size
7861 		&& (descriptor = context->fds[slot]) == NULL) {
7862 		slot++;
7863 	}
7864 
7865 	if (slot >= context->table_size) {
7866 		mutex_unlock(&context->io_mutex);
7867 		return B_ENTRY_NOT_FOUND;
7868 	}
7869 
7870 	info->number = slot;
7871 	info->open_mode = descriptor->open_mode;
7872 
7873 	struct vnode* vnode = fd_vnode(descriptor);
7874 	if (vnode != NULL) {
7875 		info->device = vnode->device;
7876 		info->node = vnode->id;
7877 	} else if (descriptor->u.mount != NULL) {
7878 		info->device = descriptor->u.mount->id;
7879 		info->node = -1;
7880 	}
7881 
7882 	mutex_unlock(&context->io_mutex);
7883 
7884 	*_cookie = slot + 1;
7885 	return B_OK;
7886 }
7887 
7888 
7889 int
7890 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7891 	int perms)
7892 {
7893 	if ((openMode & O_CREAT) != 0) {
7894 		return file_create_entry_ref(device, inode, name, openMode, perms,
7895 			true);
7896 	}
7897 
7898 	return file_open_entry_ref(device, inode, name, openMode, true);
7899 }
7900 
7901 
7902 /*!	\brief Opens a node specified by a FD + path pair.
7903 
7904 	At least one of \a fd and \a path must be specified.
7905 	If only \a fd is given, the function opens the node identified by this
7906 	FD. If only a path is given, this path is opened. If both are given and
7907 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7908 	of the directory (!) identified by \a fd.
7909 
7910 	\param fd The FD. May be < 0.
7911 	\param path The absolute or relative path. May be \c NULL.
7912 	\param openMode The open mode.
7913 	\return A FD referring to the newly opened node, or an error code,
7914 			if an error occurs.
7915 */
7916 int
7917 _kern_open(int fd, const char* path, int openMode, int perms)
7918 {
7919 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7920 	if (pathBuffer.InitCheck() != B_OK)
7921 		return B_NO_MEMORY;
7922 
7923 	if (openMode & O_CREAT)
7924 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7925 
7926 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7927 }
7928 
7929 
7930 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7931 
7932 	The supplied name may be \c NULL, in which case directory identified
7933 	by \a device and \a inode will be opened. Otherwise \a device and
7934 	\a inode identify the parent directory of the directory to be opened
7935 	and \a name its entry name.
7936 
7937 	\param device If \a name is specified the ID of the device the parent
7938 		   directory of the directory to be opened resides on, otherwise
7939 		   the device of the directory itself.
7940 	\param inode If \a name is specified the node ID of the parent
7941 		   directory of the directory to be opened, otherwise node ID of the
7942 		   directory itself.
7943 	\param name The entry name of the directory to be opened. If \c NULL,
7944 		   the \a device + \a inode pair identify the node to be opened.
7945 	\return The FD of the newly opened directory or an error code, if
7946 			something went wrong.
7947 */
7948 int
7949 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7950 {
7951 	return dir_open_entry_ref(device, inode, name, true);
7952 }
7953 
7954 
7955 /*!	\brief Opens a directory specified by a FD + path pair.
7956 
7957 	At least one of \a fd and \a path must be specified.
7958 	If only \a fd is given, the function opens the directory identified by this
7959 	FD. If only a path is given, this path is opened. If both are given and
7960 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7961 	of the directory (!) identified by \a fd.
7962 
7963 	\param fd The FD. May be < 0.
7964 	\param path The absolute or relative path. May be \c NULL.
7965 	\return A FD referring to the newly opened directory, or an error code,
7966 			if an error occurs.
7967 */
7968 int
7969 _kern_open_dir(int fd, const char* path)
7970 {
7971 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7972 	if (pathBuffer.InitCheck() != B_OK)
7973 		return B_NO_MEMORY;
7974 
7975 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7976 }
7977 
7978 
7979 status_t
7980 _kern_fcntl(int fd, int op, uint32 argument)
7981 {
7982 	return common_fcntl(fd, op, argument, true);
7983 }
7984 
7985 
7986 status_t
7987 _kern_fsync(int fd)
7988 {
7989 	return common_sync(fd, true);
7990 }
7991 
7992 
7993 status_t
7994 _kern_lock_node(int fd)
7995 {
7996 	return common_lock_node(fd, true);
7997 }
7998 
7999 
8000 status_t
8001 _kern_unlock_node(int fd)
8002 {
8003 	return common_unlock_node(fd, true);
8004 }
8005 
8006 
8007 status_t
8008 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8009 	int perms)
8010 {
8011 	return dir_create_entry_ref(device, inode, name, perms, true);
8012 }
8013 
8014 
8015 /*!	\brief Creates a directory specified by a FD + path pair.
8016 
8017 	\a path must always be specified (it contains the name of the new directory
8018 	at least). If only a path is given, this path identifies the location at
8019 	which the directory shall be created. If both \a fd and \a path are given
8020 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8021 	of the directory (!) identified by \a fd.
8022 
8023 	\param fd The FD. May be < 0.
8024 	\param path The absolute or relative path. Must not be \c NULL.
8025 	\param perms The access permissions the new directory shall have.
8026 	\return \c B_OK, if the directory has been created successfully, another
8027 			error code otherwise.
8028 */
8029 status_t
8030 _kern_create_dir(int fd, const char* path, int perms)
8031 {
8032 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8033 	if (pathBuffer.InitCheck() != B_OK)
8034 		return B_NO_MEMORY;
8035 
8036 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8037 }
8038 
8039 
8040 status_t
8041 _kern_remove_dir(int fd, const char* path)
8042 {
8043 	if (path) {
8044 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8045 		if (pathBuffer.InitCheck() != B_OK)
8046 			return B_NO_MEMORY;
8047 
8048 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8049 	}
8050 
8051 	return dir_remove(fd, NULL, true);
8052 }
8053 
8054 
8055 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8056 
8057 	At least one of \a fd and \a path must be specified.
8058 	If only \a fd is given, the function the symlink to be read is the node
8059 	identified by this FD. If only a path is given, this path identifies the
8060 	symlink to be read. If both are given and the path is absolute, \a fd is
8061 	ignored; a relative path is reckoned off of the directory (!) identified
8062 	by \a fd.
8063 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8064 	will still be updated to reflect the required buffer size.
8065 
8066 	\param fd The FD. May be < 0.
8067 	\param path The absolute or relative path. May be \c NULL.
8068 	\param buffer The buffer into which the contents of the symlink shall be
8069 		   written.
8070 	\param _bufferSize A pointer to the size of the supplied buffer.
8071 	\return The length of the link on success or an appropriate error code
8072 */
8073 status_t
8074 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8075 {
8076 	if (path) {
8077 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8078 		if (pathBuffer.InitCheck() != B_OK)
8079 			return B_NO_MEMORY;
8080 
8081 		return common_read_link(fd, pathBuffer.LockBuffer(),
8082 			buffer, _bufferSize, true);
8083 	}
8084 
8085 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8086 }
8087 
8088 
8089 /*!	\brief Creates a symlink specified by a FD + path pair.
8090 
8091 	\a path must always be specified (it contains the name of the new symlink
8092 	at least). If only a path is given, this path identifies the location at
8093 	which the symlink shall be created. If both \a fd and \a path are given and
8094 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8095 	of the directory (!) identified by \a fd.
8096 
8097 	\param fd The FD. May be < 0.
8098 	\param toPath The absolute or relative path. Must not be \c NULL.
8099 	\param mode The access permissions the new symlink shall have.
8100 	\return \c B_OK, if the symlink has been created successfully, another
8101 			error code otherwise.
8102 */
8103 status_t
8104 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8105 {
8106 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8107 	if (pathBuffer.InitCheck() != B_OK)
8108 		return B_NO_MEMORY;
8109 
8110 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8111 		toPath, mode, true);
8112 }
8113 
8114 
8115 status_t
8116 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8117 	bool traverseLeafLink)
8118 {
8119 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8120 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8121 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8122 		return B_NO_MEMORY;
8123 
8124 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8125 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8126 }
8127 
8128 
8129 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8130 
8131 	\a path must always be specified (it contains at least the name of the entry
8132 	to be deleted). If only a path is given, this path identifies the entry
8133 	directly. If both \a fd and \a path are given and the path is absolute,
8134 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8135 	identified by \a fd.
8136 
8137 	\param fd The FD. May be < 0.
8138 	\param path The absolute or relative path. Must not be \c NULL.
8139 	\return \c B_OK, if the entry has been removed successfully, another
8140 			error code otherwise.
8141 */
8142 status_t
8143 _kern_unlink(int fd, const char* path)
8144 {
8145 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8146 	if (pathBuffer.InitCheck() != B_OK)
8147 		return B_NO_MEMORY;
8148 
8149 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8150 }
8151 
8152 
8153 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8154 		   by another FD + path pair.
8155 
8156 	\a oldPath and \a newPath must always be specified (they contain at least
8157 	the name of the entry). If only a path is given, this path identifies the
8158 	entry directly. If both a FD and a path are given and the path is absolute,
8159 	the FD is ignored; a relative path is reckoned off of the directory (!)
8160 	identified by the respective FD.
8161 
8162 	\param oldFD The FD of the old location. May be < 0.
8163 	\param oldPath The absolute or relative path of the old location. Must not
8164 		   be \c NULL.
8165 	\param newFD The FD of the new location. May be < 0.
8166 	\param newPath The absolute or relative path of the new location. Must not
8167 		   be \c NULL.
8168 	\return \c B_OK, if the entry has been moved successfully, another
8169 			error code otherwise.
8170 */
8171 status_t
8172 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8173 {
8174 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8175 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8176 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8177 		return B_NO_MEMORY;
8178 
8179 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8180 		newFD, newPathBuffer.LockBuffer(), true);
8181 }
8182 
8183 
8184 status_t
8185 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8186 {
8187 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8188 	if (pathBuffer.InitCheck() != B_OK)
8189 		return B_NO_MEMORY;
8190 
8191 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8192 		true);
8193 }
8194 
8195 
8196 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8197 
8198 	If only \a fd is given, the stat operation associated with the type
8199 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8200 	given, this path identifies the entry for whose node to retrieve the
8201 	stat data. If both \a fd and \a path are given and the path is absolute,
8202 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8203 	identified by \a fd and specifies the entry whose stat data shall be
8204 	retrieved.
8205 
8206 	\param fd The FD. May be < 0.
8207 	\param path The absolute or relative path. Must not be \c NULL.
8208 	\param traverseLeafLink If \a path is given, \c true specifies that the
8209 		   function shall not stick to symlinks, but traverse them.
8210 	\param stat The buffer the stat data shall be written into.
8211 	\param statSize The size of the supplied stat buffer.
8212 	\return \c B_OK, if the the stat data have been read successfully, another
8213 			error code otherwise.
8214 */
8215 status_t
8216 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8217 	struct stat* stat, size_t statSize)
8218 {
8219 	struct stat completeStat;
8220 	struct stat* originalStat = NULL;
8221 	status_t status;
8222 
8223 	if (statSize > sizeof(struct stat))
8224 		return B_BAD_VALUE;
8225 
8226 	// this supports different stat extensions
8227 	if (statSize < sizeof(struct stat)) {
8228 		originalStat = stat;
8229 		stat = &completeStat;
8230 	}
8231 
8232 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8233 
8234 	if (status == B_OK && originalStat != NULL)
8235 		memcpy(originalStat, stat, statSize);
8236 
8237 	return status;
8238 }
8239 
8240 
8241 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8242 
8243 	If only \a fd is given, the stat operation associated with the type
8244 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8245 	given, this path identifies the entry for whose node to write the
8246 	stat data. If both \a fd and \a path are given and the path is absolute,
8247 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8248 	identified by \a fd and specifies the entry whose stat data shall be
8249 	written.
8250 
8251 	\param fd The FD. May be < 0.
8252 	\param path The absolute or relative path. Must not be \c NULL.
8253 	\param traverseLeafLink If \a path is given, \c true specifies that the
8254 		   function shall not stick to symlinks, but traverse them.
8255 	\param stat The buffer containing the stat data to be written.
8256 	\param statSize The size of the supplied stat buffer.
8257 	\param statMask A mask specifying which parts of the stat data shall be
8258 		   written.
8259 	\return \c B_OK, if the the stat data have been written successfully,
8260 			another error code otherwise.
8261 */
8262 status_t
8263 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8264 	const struct stat* stat, size_t statSize, int statMask)
8265 {
8266 	struct stat completeStat;
8267 
8268 	if (statSize > sizeof(struct stat))
8269 		return B_BAD_VALUE;
8270 
8271 	// this supports different stat extensions
8272 	if (statSize < sizeof(struct stat)) {
8273 		memset((uint8*)&completeStat + statSize, 0,
8274 			sizeof(struct stat) - statSize);
8275 		memcpy(&completeStat, stat, statSize);
8276 		stat = &completeStat;
8277 	}
8278 
8279 	status_t status;
8280 
8281 	if (path) {
8282 		// path given: write the stat of the node referred to by (fd, path)
8283 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8284 		if (pathBuffer.InitCheck() != B_OK)
8285 			return B_NO_MEMORY;
8286 
8287 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8288 			traverseLeafLink, stat, statMask, true);
8289 	} else {
8290 		// no path given: get the FD and use the FD operation
8291 		struct file_descriptor* descriptor
8292 			= get_fd(get_current_io_context(true), fd);
8293 		if (descriptor == NULL)
8294 			return B_FILE_ERROR;
8295 
8296 		if (descriptor->ops->fd_write_stat)
8297 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8298 		else
8299 			status = B_NOT_SUPPORTED;
8300 
8301 		put_fd(descriptor);
8302 	}
8303 
8304 	return status;
8305 }
8306 
8307 
8308 int
8309 _kern_open_attr_dir(int fd, const char* path)
8310 {
8311 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8312 	if (pathBuffer.InitCheck() != B_OK)
8313 		return B_NO_MEMORY;
8314 
8315 	if (path != NULL)
8316 		pathBuffer.SetTo(path);
8317 
8318 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
8319 }
8320 
8321 
8322 int
8323 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8324 	int openMode)
8325 {
8326 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8327 	if (pathBuffer.InitCheck() != B_OK)
8328 		return B_NO_MEMORY;
8329 
8330 	if ((openMode & O_CREAT) != 0) {
8331 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8332 			true);
8333 	}
8334 
8335 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8336 }
8337 
8338 
8339 status_t
8340 _kern_remove_attr(int fd, const char* name)
8341 {
8342 	return attr_remove(fd, name, true);
8343 }
8344 
8345 
8346 status_t
8347 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8348 	const char* toName)
8349 {
8350 	return attr_rename(fromFile, fromName, toFile, toName, true);
8351 }
8352 
8353 
8354 int
8355 _kern_open_index_dir(dev_t device)
8356 {
8357 	return index_dir_open(device, true);
8358 }
8359 
8360 
8361 status_t
8362 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8363 {
8364 	return index_create(device, name, type, flags, true);
8365 }
8366 
8367 
8368 status_t
8369 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8370 {
8371 	return index_name_read_stat(device, name, stat, true);
8372 }
8373 
8374 
8375 status_t
8376 _kern_remove_index(dev_t device, const char* name)
8377 {
8378 	return index_remove(device, name, true);
8379 }
8380 
8381 
8382 status_t
8383 _kern_getcwd(char* buffer, size_t size)
8384 {
8385 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8386 
8387 	// Call vfs to get current working directory
8388 	return get_cwd(buffer, size, true);
8389 }
8390 
8391 
8392 status_t
8393 _kern_setcwd(int fd, const char* path)
8394 {
8395 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8396 	if (pathBuffer.InitCheck() != B_OK)
8397 		return B_NO_MEMORY;
8398 
8399 	if (path != NULL)
8400 		pathBuffer.SetTo(path);
8401 
8402 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8403 }
8404 
8405 
8406 //	#pragma mark - userland syscalls
8407 
8408 
8409 dev_t
8410 _user_mount(const char* userPath, const char* userDevice,
8411 	const char* userFileSystem, uint32 flags, const char* userArgs,
8412 	size_t argsLength)
8413 {
8414 	char fileSystem[B_FILE_NAME_LENGTH];
8415 	KPath path, device;
8416 	char* args = NULL;
8417 	status_t status;
8418 
8419 	if (!IS_USER_ADDRESS(userPath)
8420 		|| !IS_USER_ADDRESS(userFileSystem)
8421 		|| !IS_USER_ADDRESS(userDevice))
8422 		return B_BAD_ADDRESS;
8423 
8424 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8425 		return B_NO_MEMORY;
8426 
8427 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8428 		return B_BAD_ADDRESS;
8429 
8430 	if (userFileSystem != NULL
8431 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8432 		return B_BAD_ADDRESS;
8433 
8434 	if (userDevice != NULL
8435 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8436 			< B_OK)
8437 		return B_BAD_ADDRESS;
8438 
8439 	if (userArgs != NULL && argsLength > 0) {
8440 		// this is a safety restriction
8441 		if (argsLength >= 65536)
8442 			return B_NAME_TOO_LONG;
8443 
8444 		args = (char*)malloc(argsLength + 1);
8445 		if (args == NULL)
8446 			return B_NO_MEMORY;
8447 
8448 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8449 			free(args);
8450 			return B_BAD_ADDRESS;
8451 		}
8452 	}
8453 	path.UnlockBuffer();
8454 	device.UnlockBuffer();
8455 
8456 	status = fs_mount(path.LockBuffer(),
8457 		userDevice != NULL ? device.Path() : NULL,
8458 		userFileSystem ? fileSystem : NULL, flags, args, false);
8459 
8460 	free(args);
8461 	return status;
8462 }
8463 
8464 
8465 status_t
8466 _user_unmount(const char* userPath, uint32 flags)
8467 {
8468 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8469 	if (pathBuffer.InitCheck() != B_OK)
8470 		return B_NO_MEMORY;
8471 
8472 	char* path = pathBuffer.LockBuffer();
8473 
8474 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8475 		return B_BAD_ADDRESS;
8476 
8477 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8478 }
8479 
8480 
8481 status_t
8482 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8483 {
8484 	struct fs_info info;
8485 	status_t status;
8486 
8487 	if (userInfo == NULL)
8488 		return B_BAD_VALUE;
8489 
8490 	if (!IS_USER_ADDRESS(userInfo))
8491 		return B_BAD_ADDRESS;
8492 
8493 	status = fs_read_info(device, &info);
8494 	if (status != B_OK)
8495 		return status;
8496 
8497 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8498 		return B_BAD_ADDRESS;
8499 
8500 	return B_OK;
8501 }
8502 
8503 
8504 status_t
8505 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8506 {
8507 	struct fs_info info;
8508 
8509 	if (userInfo == NULL)
8510 		return B_BAD_VALUE;
8511 
8512 	if (!IS_USER_ADDRESS(userInfo)
8513 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8514 		return B_BAD_ADDRESS;
8515 
8516 	return fs_write_info(device, &info, mask);
8517 }
8518 
8519 
8520 dev_t
8521 _user_next_device(int32* _userCookie)
8522 {
8523 	int32 cookie;
8524 	dev_t device;
8525 
8526 	if (!IS_USER_ADDRESS(_userCookie)
8527 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8528 		return B_BAD_ADDRESS;
8529 
8530 	device = fs_next_device(&cookie);
8531 
8532 	if (device >= B_OK) {
8533 		// update user cookie
8534 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8535 			return B_BAD_ADDRESS;
8536 	}
8537 
8538 	return device;
8539 }
8540 
8541 
8542 status_t
8543 _user_sync(void)
8544 {
8545 	return _kern_sync();
8546 }
8547 
8548 
8549 status_t
8550 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8551 	size_t infoSize)
8552 {
8553 	struct fd_info info;
8554 	uint32 cookie;
8555 
8556 	// only root can do this (or should root's group be enough?)
8557 	if (geteuid() != 0)
8558 		return B_NOT_ALLOWED;
8559 
8560 	if (infoSize != sizeof(fd_info))
8561 		return B_BAD_VALUE;
8562 
8563 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8564 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8565 		return B_BAD_ADDRESS;
8566 
8567 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8568 	if (status != B_OK)
8569 		return status;
8570 
8571 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8572 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8573 		return B_BAD_ADDRESS;
8574 
8575 	return status;
8576 }
8577 
8578 
8579 status_t
8580 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8581 	char* userPath, size_t pathLength)
8582 {
8583 	if (!IS_USER_ADDRESS(userPath))
8584 		return B_BAD_ADDRESS;
8585 
8586 	KPath path(B_PATH_NAME_LENGTH + 1);
8587 	if (path.InitCheck() != B_OK)
8588 		return B_NO_MEMORY;
8589 
8590 	// copy the leaf name onto the stack
8591 	char stackLeaf[B_FILE_NAME_LENGTH];
8592 	if (leaf) {
8593 		if (!IS_USER_ADDRESS(leaf))
8594 			return B_BAD_ADDRESS;
8595 
8596 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8597 		if (length < 0)
8598 			return length;
8599 		if (length >= B_FILE_NAME_LENGTH)
8600 			return B_NAME_TOO_LONG;
8601 
8602 		leaf = stackLeaf;
8603 	}
8604 
8605 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8606 		path.LockBuffer(), path.BufferSize());
8607 	if (status != B_OK)
8608 		return status;
8609 
8610 	path.UnlockBuffer();
8611 
8612 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8613 	if (length < 0)
8614 		return length;
8615 	if (length >= (int)pathLength)
8616 		return B_BUFFER_OVERFLOW;
8617 
8618 	return B_OK;
8619 }
8620 
8621 
8622 status_t
8623 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8624 {
8625 	if (userPath == NULL || buffer == NULL)
8626 		return B_BAD_VALUE;
8627 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8628 		return B_BAD_ADDRESS;
8629 
8630 	// copy path from userland
8631 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8632 	if (pathBuffer.InitCheck() != B_OK)
8633 		return B_NO_MEMORY;
8634 	char* path = pathBuffer.LockBuffer();
8635 
8636 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8637 		return B_BAD_ADDRESS;
8638 
8639 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8640 		false);
8641 	if (error != B_OK)
8642 		return error;
8643 
8644 	// copy back to userland
8645 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8646 	if (len < 0)
8647 		return len;
8648 	if (len >= B_PATH_NAME_LENGTH)
8649 		return B_BUFFER_OVERFLOW;
8650 
8651 	return B_OK;
8652 }
8653 
8654 
8655 int
8656 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8657 	int openMode, int perms)
8658 {
8659 	char name[B_FILE_NAME_LENGTH];
8660 
8661 	if (userName == NULL || device < 0 || inode < 0)
8662 		return B_BAD_VALUE;
8663 	if (!IS_USER_ADDRESS(userName)
8664 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8665 		return B_BAD_ADDRESS;
8666 
8667 	if ((openMode & O_CREAT) != 0) {
8668 		return file_create_entry_ref(device, inode, name, openMode, perms,
8669 		 false);
8670 	}
8671 
8672 	return file_open_entry_ref(device, inode, name, openMode, false);
8673 }
8674 
8675 
8676 int
8677 _user_open(int fd, const char* userPath, int openMode, int perms)
8678 {
8679 	KPath path(B_PATH_NAME_LENGTH + 1);
8680 	if (path.InitCheck() != B_OK)
8681 		return B_NO_MEMORY;
8682 
8683 	char* buffer = path.LockBuffer();
8684 
8685 	if (!IS_USER_ADDRESS(userPath)
8686 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8687 		return B_BAD_ADDRESS;
8688 
8689 	if ((openMode & O_CREAT) != 0)
8690 		return file_create(fd, buffer, openMode, perms, false);
8691 
8692 	return file_open(fd, buffer, openMode, false);
8693 }
8694 
8695 
8696 int
8697 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8698 {
8699 	if (userName != NULL) {
8700 		char name[B_FILE_NAME_LENGTH];
8701 
8702 		if (!IS_USER_ADDRESS(userName)
8703 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8704 			return B_BAD_ADDRESS;
8705 
8706 		return dir_open_entry_ref(device, inode, name, false);
8707 	}
8708 	return dir_open_entry_ref(device, inode, NULL, false);
8709 }
8710 
8711 
8712 int
8713 _user_open_dir(int fd, const char* userPath)
8714 {
8715 	if (userPath == NULL)
8716 		return dir_open(fd, NULL, false);
8717 
8718 	KPath path(B_PATH_NAME_LENGTH + 1);
8719 	if (path.InitCheck() != B_OK)
8720 		return B_NO_MEMORY;
8721 
8722 	char* buffer = path.LockBuffer();
8723 
8724 	if (!IS_USER_ADDRESS(userPath)
8725 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8726 		return B_BAD_ADDRESS;
8727 
8728 	return dir_open(fd, buffer, false);
8729 }
8730 
8731 
8732 /*!	\brief Opens a directory's parent directory and returns the entry name
8733 		   of the former.
8734 
8735 	Aside from that is returns the directory's entry name, this method is
8736 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8737 	equivalent, if \a userName is \c NULL.
8738 
8739 	If a name buffer is supplied and the name does not fit the buffer, the
8740 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8741 
8742 	\param fd A FD referring to a directory.
8743 	\param userName Buffer the directory's entry name shall be written into.
8744 		   May be \c NULL.
8745 	\param nameLength Size of the name buffer.
8746 	\return The file descriptor of the opened parent directory, if everything
8747 			went fine, an error code otherwise.
8748 */
8749 int
8750 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8751 {
8752 	bool kernel = false;
8753 
8754 	if (userName && !IS_USER_ADDRESS(userName))
8755 		return B_BAD_ADDRESS;
8756 
8757 	// open the parent dir
8758 	int parentFD = dir_open(fd, (char*)"..", kernel);
8759 	if (parentFD < 0)
8760 		return parentFD;
8761 	FDCloser fdCloser(parentFD, kernel);
8762 
8763 	if (userName) {
8764 		// get the vnodes
8765 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8766 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8767 		VNodePutter parentVNodePutter(parentVNode);
8768 		VNodePutter dirVNodePutter(dirVNode);
8769 		if (!parentVNode || !dirVNode)
8770 			return B_FILE_ERROR;
8771 
8772 		// get the vnode name
8773 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8774 		struct dirent* buffer = (struct dirent*)_buffer;
8775 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8776 			sizeof(_buffer), get_current_io_context(false));
8777 		if (status != B_OK)
8778 			return status;
8779 
8780 		// copy the name to the userland buffer
8781 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8782 		if (len < 0)
8783 			return len;
8784 		if (len >= (int)nameLength)
8785 			return B_BUFFER_OVERFLOW;
8786 	}
8787 
8788 	return fdCloser.Detach();
8789 }
8790 
8791 
8792 status_t
8793 _user_fcntl(int fd, int op, uint32 argument)
8794 {
8795 	status_t status = common_fcntl(fd, op, argument, false);
8796 	if (op == F_SETLKW)
8797 		syscall_restart_handle_post(status);
8798 
8799 	return status;
8800 }
8801 
8802 
8803 status_t
8804 _user_fsync(int fd)
8805 {
8806 	return common_sync(fd, false);
8807 }
8808 
8809 
8810 status_t
8811 _user_flock(int fd, int operation)
8812 {
8813 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8814 
8815 	// Check if the operation is valid
8816 	switch (operation & ~LOCK_NB) {
8817 		case LOCK_UN:
8818 		case LOCK_SH:
8819 		case LOCK_EX:
8820 			break;
8821 
8822 		default:
8823 			return B_BAD_VALUE;
8824 	}
8825 
8826 	struct file_descriptor* descriptor;
8827 	struct vnode* vnode;
8828 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8829 	if (descriptor == NULL)
8830 		return B_FILE_ERROR;
8831 
8832 	if (descriptor->type != FDTYPE_FILE) {
8833 		put_fd(descriptor);
8834 		return B_BAD_VALUE;
8835 	}
8836 
8837 	struct flock flock;
8838 	flock.l_start = 0;
8839 	flock.l_len = OFF_MAX;
8840 	flock.l_whence = 0;
8841 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8842 
8843 	status_t status;
8844 	if ((operation & LOCK_UN) != 0)
8845 		status = release_advisory_lock(vnode, &flock);
8846 	else {
8847 		status = acquire_advisory_lock(vnode,
8848 			thread_get_current_thread()->team->session_id, &flock,
8849 			(operation & LOCK_NB) == 0);
8850 	}
8851 
8852 	syscall_restart_handle_post(status);
8853 
8854 	put_fd(descriptor);
8855 	return status;
8856 }
8857 
8858 
8859 status_t
8860 _user_lock_node(int fd)
8861 {
8862 	return common_lock_node(fd, false);
8863 }
8864 
8865 
8866 status_t
8867 _user_unlock_node(int fd)
8868 {
8869 	return common_unlock_node(fd, false);
8870 }
8871 
8872 
8873 status_t
8874 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8875 	int perms)
8876 {
8877 	char name[B_FILE_NAME_LENGTH];
8878 	status_t status;
8879 
8880 	if (!IS_USER_ADDRESS(userName))
8881 		return B_BAD_ADDRESS;
8882 
8883 	status = user_strlcpy(name, userName, sizeof(name));
8884 	if (status < 0)
8885 		return status;
8886 
8887 	return dir_create_entry_ref(device, inode, name, perms, false);
8888 }
8889 
8890 
8891 status_t
8892 _user_create_dir(int fd, const char* userPath, int perms)
8893 {
8894 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8895 	if (pathBuffer.InitCheck() != B_OK)
8896 		return B_NO_MEMORY;
8897 
8898 	char* path = pathBuffer.LockBuffer();
8899 
8900 	if (!IS_USER_ADDRESS(userPath)
8901 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8902 		return B_BAD_ADDRESS;
8903 
8904 	return dir_create(fd, path, perms, false);
8905 }
8906 
8907 
8908 status_t
8909 _user_remove_dir(int fd, const char* userPath)
8910 {
8911 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8912 	if (pathBuffer.InitCheck() != B_OK)
8913 		return B_NO_MEMORY;
8914 
8915 	char* path = pathBuffer.LockBuffer();
8916 
8917 	if (userPath != NULL) {
8918 		if (!IS_USER_ADDRESS(userPath)
8919 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8920 			return B_BAD_ADDRESS;
8921 	}
8922 
8923 	return dir_remove(fd, userPath ? path : NULL, false);
8924 }
8925 
8926 
8927 status_t
8928 _user_read_link(int fd, const char* userPath, char* userBuffer,
8929 	size_t* userBufferSize)
8930 {
8931 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8932 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8933 		return B_NO_MEMORY;
8934 
8935 	size_t bufferSize;
8936 
8937 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8938 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
8939 		return B_BAD_ADDRESS;
8940 
8941 	char* path = pathBuffer.LockBuffer();
8942 	char* buffer = linkBuffer.LockBuffer();
8943 
8944 	if (userPath) {
8945 		if (!IS_USER_ADDRESS(userPath)
8946 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8947 			return B_BAD_ADDRESS;
8948 
8949 		if (bufferSize > B_PATH_NAME_LENGTH)
8950 			bufferSize = B_PATH_NAME_LENGTH;
8951 	}
8952 
8953 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8954 		&bufferSize, false);
8955 
8956 	// we also update the bufferSize in case of errors
8957 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8958 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
8959 		return B_BAD_ADDRESS;
8960 
8961 	if (status != B_OK)
8962 		return status;
8963 
8964 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8965 		return B_BAD_ADDRESS;
8966 
8967 	return B_OK;
8968 }
8969 
8970 
8971 status_t
8972 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8973 	int mode)
8974 {
8975 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8976 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8977 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8978 		return B_NO_MEMORY;
8979 
8980 	char* path = pathBuffer.LockBuffer();
8981 	char* toPath = toPathBuffer.LockBuffer();
8982 
8983 	if (!IS_USER_ADDRESS(userPath)
8984 		|| !IS_USER_ADDRESS(userToPath)
8985 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8986 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8987 		return B_BAD_ADDRESS;
8988 
8989 	return common_create_symlink(fd, path, toPath, mode, false);
8990 }
8991 
8992 
8993 status_t
8994 _user_create_link(int pathFD, const char* userPath, int toFD,
8995 	const char* userToPath, bool traverseLeafLink)
8996 {
8997 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8998 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8999 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9000 		return B_NO_MEMORY;
9001 
9002 	char* path = pathBuffer.LockBuffer();
9003 	char* toPath = toPathBuffer.LockBuffer();
9004 
9005 	if (!IS_USER_ADDRESS(userPath)
9006 		|| !IS_USER_ADDRESS(userToPath)
9007 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9008 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9009 		return B_BAD_ADDRESS;
9010 
9011 	status_t status = check_path(toPath);
9012 	if (status != B_OK)
9013 		return status;
9014 
9015 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9016 		false);
9017 }
9018 
9019 
9020 status_t
9021 _user_unlink(int fd, const char* userPath)
9022 {
9023 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9024 	if (pathBuffer.InitCheck() != B_OK)
9025 		return B_NO_MEMORY;
9026 
9027 	char* path = pathBuffer.LockBuffer();
9028 
9029 	if (!IS_USER_ADDRESS(userPath)
9030 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9031 		return B_BAD_ADDRESS;
9032 
9033 	return common_unlink(fd, path, false);
9034 }
9035 
9036 
9037 status_t
9038 _user_rename(int oldFD, const char* userOldPath, int newFD,
9039 	const char* userNewPath)
9040 {
9041 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9042 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9043 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9044 		return B_NO_MEMORY;
9045 
9046 	char* oldPath = oldPathBuffer.LockBuffer();
9047 	char* newPath = newPathBuffer.LockBuffer();
9048 
9049 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9050 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9051 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9052 		return B_BAD_ADDRESS;
9053 
9054 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9055 }
9056 
9057 
9058 status_t
9059 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9060 {
9061 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9062 	if (pathBuffer.InitCheck() != B_OK)
9063 		return B_NO_MEMORY;
9064 
9065 	char* path = pathBuffer.LockBuffer();
9066 
9067 	if (!IS_USER_ADDRESS(userPath)
9068 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9069 		return B_BAD_ADDRESS;
9070 	}
9071 
9072 	// split into directory vnode and filename path
9073 	char filename[B_FILE_NAME_LENGTH];
9074 	struct vnode* dir;
9075 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9076 	if (status != B_OK)
9077 		return status;
9078 
9079 	VNodePutter _(dir);
9080 
9081 	// the underlying FS needs to support creating FIFOs
9082 	if (!HAS_FS_CALL(dir, create_special_node))
9083 		return B_UNSUPPORTED;
9084 
9085 	// create the entry	-- the FIFO sub node is set up automatically
9086 	fs_vnode superVnode;
9087 	ino_t nodeID;
9088 	status = FS_CALL(dir, create_special_node, filename, NULL,
9089 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9090 
9091 	// create_special_node() acquired a reference for us that we don't need.
9092 	if (status == B_OK)
9093 		put_vnode(dir->mount->volume, nodeID);
9094 
9095 	return status;
9096 }
9097 
9098 
9099 status_t
9100 _user_create_pipe(int* userFDs)
9101 {
9102 	// rootfs should support creating FIFOs, but let's be sure
9103 	if (!HAS_FS_CALL(sRoot, create_special_node))
9104 		return B_UNSUPPORTED;
9105 
9106 	// create the node	-- the FIFO sub node is set up automatically
9107 	fs_vnode superVnode;
9108 	ino_t nodeID;
9109 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9110 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9111 	if (status != B_OK)
9112 		return status;
9113 
9114 	// We've got one reference to the node and need another one.
9115 	struct vnode* vnode;
9116 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9117 	if (status != B_OK) {
9118 		// that should not happen
9119 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9120 			sRoot->mount->id, sRoot->id);
9121 		return status;
9122 	}
9123 
9124 	// Everything looks good so far. Open two FDs for reading respectively
9125 	// writing.
9126 	int fds[2];
9127 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9128 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9129 
9130 	FDCloser closer0(fds[0], false);
9131 	FDCloser closer1(fds[1], false);
9132 
9133 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9134 
9135 	// copy FDs to userland
9136 	if (status == B_OK) {
9137 		if (!IS_USER_ADDRESS(userFDs)
9138 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9139 			status = B_BAD_ADDRESS;
9140 		}
9141 	}
9142 
9143 	// keep FDs, if everything went fine
9144 	if (status == B_OK) {
9145 		closer0.Detach();
9146 		closer1.Detach();
9147 	}
9148 
9149 	return status;
9150 }
9151 
9152 
9153 status_t
9154 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9155 {
9156 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9157 	if (pathBuffer.InitCheck() != B_OK)
9158 		return B_NO_MEMORY;
9159 
9160 	char* path = pathBuffer.LockBuffer();
9161 
9162 	if (!IS_USER_ADDRESS(userPath)
9163 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9164 		return B_BAD_ADDRESS;
9165 
9166 	return common_access(fd, path, mode, effectiveUserGroup, false);
9167 }
9168 
9169 
9170 status_t
9171 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9172 	struct stat* userStat, size_t statSize)
9173 {
9174 	struct stat stat;
9175 	status_t status;
9176 
9177 	if (statSize > sizeof(struct stat))
9178 		return B_BAD_VALUE;
9179 
9180 	if (!IS_USER_ADDRESS(userStat))
9181 		return B_BAD_ADDRESS;
9182 
9183 	if (userPath) {
9184 		// path given: get the stat of the node referred to by (fd, path)
9185 		if (!IS_USER_ADDRESS(userPath))
9186 			return B_BAD_ADDRESS;
9187 
9188 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9189 		if (pathBuffer.InitCheck() != B_OK)
9190 			return B_NO_MEMORY;
9191 
9192 		char* path = pathBuffer.LockBuffer();
9193 
9194 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9195 		if (length < B_OK)
9196 			return length;
9197 		if (length >= B_PATH_NAME_LENGTH)
9198 			return B_NAME_TOO_LONG;
9199 
9200 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9201 	} else {
9202 		// no path given: get the FD and use the FD operation
9203 		struct file_descriptor* descriptor
9204 			= get_fd(get_current_io_context(false), fd);
9205 		if (descriptor == NULL)
9206 			return B_FILE_ERROR;
9207 
9208 		if (descriptor->ops->fd_read_stat)
9209 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9210 		else
9211 			status = B_NOT_SUPPORTED;
9212 
9213 		put_fd(descriptor);
9214 	}
9215 
9216 	if (status != B_OK)
9217 		return status;
9218 
9219 	return user_memcpy(userStat, &stat, statSize);
9220 }
9221 
9222 
9223 status_t
9224 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9225 	const struct stat* userStat, size_t statSize, int statMask)
9226 {
9227 	if (statSize > sizeof(struct stat))
9228 		return B_BAD_VALUE;
9229 
9230 	struct stat stat;
9231 
9232 	if (!IS_USER_ADDRESS(userStat)
9233 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9234 		return B_BAD_ADDRESS;
9235 
9236 	// clear additional stat fields
9237 	if (statSize < sizeof(struct stat))
9238 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9239 
9240 	status_t status;
9241 
9242 	if (userPath) {
9243 		// path given: write the stat of the node referred to by (fd, path)
9244 		if (!IS_USER_ADDRESS(userPath))
9245 			return B_BAD_ADDRESS;
9246 
9247 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9248 		if (pathBuffer.InitCheck() != B_OK)
9249 			return B_NO_MEMORY;
9250 
9251 		char* path = pathBuffer.LockBuffer();
9252 
9253 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9254 		if (length < B_OK)
9255 			return length;
9256 		if (length >= B_PATH_NAME_LENGTH)
9257 			return B_NAME_TOO_LONG;
9258 
9259 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9260 			statMask, false);
9261 	} else {
9262 		// no path given: get the FD and use the FD operation
9263 		struct file_descriptor* descriptor
9264 			= get_fd(get_current_io_context(false), fd);
9265 		if (descriptor == NULL)
9266 			return B_FILE_ERROR;
9267 
9268 		if (descriptor->ops->fd_write_stat) {
9269 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9270 				statMask);
9271 		} else
9272 			status = B_NOT_SUPPORTED;
9273 
9274 		put_fd(descriptor);
9275 	}
9276 
9277 	return status;
9278 }
9279 
9280 
9281 int
9282 _user_open_attr_dir(int fd, const char* userPath)
9283 {
9284 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9285 	if (pathBuffer.InitCheck() != B_OK)
9286 		return B_NO_MEMORY;
9287 
9288 	char* path = pathBuffer.LockBuffer();
9289 
9290 	if (userPath != NULL) {
9291 		if (!IS_USER_ADDRESS(userPath)
9292 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9293 			return B_BAD_ADDRESS;
9294 	}
9295 
9296 	return attr_dir_open(fd, userPath ? path : NULL, false);
9297 }
9298 
9299 
9300 ssize_t
9301 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9302 	size_t readBytes)
9303 {
9304 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9305 	if (attr < 0)
9306 		return attr;
9307 
9308 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9309 	_user_close(attr);
9310 
9311 	return bytes;
9312 }
9313 
9314 
9315 ssize_t
9316 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9317 	const void* buffer, size_t writeBytes)
9318 {
9319 	// Try to support the BeOS typical truncation as well as the position
9320 	// argument
9321 	int attr = attr_create(fd, NULL, attribute, type,
9322 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9323 	if (attr < 0)
9324 		return attr;
9325 
9326 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9327 	_user_close(attr);
9328 
9329 	return bytes;
9330 }
9331 
9332 
9333 status_t
9334 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9335 {
9336 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9337 	if (attr < 0)
9338 		return attr;
9339 
9340 	struct file_descriptor* descriptor
9341 		= get_fd(get_current_io_context(false), attr);
9342 	if (descriptor == NULL) {
9343 		_user_close(attr);
9344 		return B_FILE_ERROR;
9345 	}
9346 
9347 	struct stat stat;
9348 	status_t status;
9349 	if (descriptor->ops->fd_read_stat)
9350 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9351 	else
9352 		status = B_NOT_SUPPORTED;
9353 
9354 	put_fd(descriptor);
9355 	_user_close(attr);
9356 
9357 	if (status == B_OK) {
9358 		attr_info info;
9359 		info.type = stat.st_type;
9360 		info.size = stat.st_size;
9361 
9362 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9363 			return B_BAD_ADDRESS;
9364 	}
9365 
9366 	return status;
9367 }
9368 
9369 
9370 int
9371 _user_open_attr(int fd, const char* userPath, const char* userName,
9372 	uint32 type, int openMode)
9373 {
9374 	char name[B_FILE_NAME_LENGTH];
9375 
9376 	if (!IS_USER_ADDRESS(userName)
9377 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9378 		return B_BAD_ADDRESS;
9379 
9380 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9381 	if (pathBuffer.InitCheck() != B_OK)
9382 		return B_NO_MEMORY;
9383 
9384 	char* path = pathBuffer.LockBuffer();
9385 
9386 	if (userPath != NULL) {
9387 		if (!IS_USER_ADDRESS(userPath)
9388 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9389 			return B_BAD_ADDRESS;
9390 	}
9391 
9392 	if ((openMode & O_CREAT) != 0) {
9393 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9394 			false);
9395 	}
9396 
9397 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9398 }
9399 
9400 
9401 status_t
9402 _user_remove_attr(int fd, const char* userName)
9403 {
9404 	char name[B_FILE_NAME_LENGTH];
9405 
9406 	if (!IS_USER_ADDRESS(userName)
9407 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9408 		return B_BAD_ADDRESS;
9409 
9410 	return attr_remove(fd, name, false);
9411 }
9412 
9413 
9414 status_t
9415 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9416 	const char* userToName)
9417 {
9418 	if (!IS_USER_ADDRESS(userFromName)
9419 		|| !IS_USER_ADDRESS(userToName))
9420 		return B_BAD_ADDRESS;
9421 
9422 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9423 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9424 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9425 		return B_NO_MEMORY;
9426 
9427 	char* fromName = fromNameBuffer.LockBuffer();
9428 	char* toName = toNameBuffer.LockBuffer();
9429 
9430 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9431 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9432 		return B_BAD_ADDRESS;
9433 
9434 	return attr_rename(fromFile, fromName, toFile, toName, false);
9435 }
9436 
9437 
9438 int
9439 _user_open_index_dir(dev_t device)
9440 {
9441 	return index_dir_open(device, false);
9442 }
9443 
9444 
9445 status_t
9446 _user_create_index(dev_t device, const char* userName, uint32 type,
9447 	uint32 flags)
9448 {
9449 	char name[B_FILE_NAME_LENGTH];
9450 
9451 	if (!IS_USER_ADDRESS(userName)
9452 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9453 		return B_BAD_ADDRESS;
9454 
9455 	return index_create(device, name, type, flags, false);
9456 }
9457 
9458 
9459 status_t
9460 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9461 {
9462 	char name[B_FILE_NAME_LENGTH];
9463 	struct stat stat;
9464 	status_t status;
9465 
9466 	if (!IS_USER_ADDRESS(userName)
9467 		|| !IS_USER_ADDRESS(userStat)
9468 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9469 		return B_BAD_ADDRESS;
9470 
9471 	status = index_name_read_stat(device, name, &stat, false);
9472 	if (status == B_OK) {
9473 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9474 			return B_BAD_ADDRESS;
9475 	}
9476 
9477 	return status;
9478 }
9479 
9480 
9481 status_t
9482 _user_remove_index(dev_t device, const char* userName)
9483 {
9484 	char name[B_FILE_NAME_LENGTH];
9485 
9486 	if (!IS_USER_ADDRESS(userName)
9487 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9488 		return B_BAD_ADDRESS;
9489 
9490 	return index_remove(device, name, false);
9491 }
9492 
9493 
9494 status_t
9495 _user_getcwd(char* userBuffer, size_t size)
9496 {
9497 	if (size == 0)
9498 		return B_BAD_VALUE;
9499 	if (!IS_USER_ADDRESS(userBuffer))
9500 		return B_BAD_ADDRESS;
9501 
9502 	if (size > kMaxPathLength)
9503 		size = kMaxPathLength;
9504 
9505 	KPath pathBuffer(size);
9506 	if (pathBuffer.InitCheck() != B_OK)
9507 		return B_NO_MEMORY;
9508 
9509 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9510 
9511 	char* path = pathBuffer.LockBuffer();
9512 
9513 	status_t status = get_cwd(path, size, false);
9514 	if (status != B_OK)
9515 		return status;
9516 
9517 	// Copy back the result
9518 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9519 		return B_BAD_ADDRESS;
9520 
9521 	return status;
9522 }
9523 
9524 
9525 status_t
9526 _user_setcwd(int fd, const char* userPath)
9527 {
9528 	TRACE(("user_setcwd: path = %p\n", userPath));
9529 
9530 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9531 	if (pathBuffer.InitCheck() != B_OK)
9532 		return B_NO_MEMORY;
9533 
9534 	char* path = pathBuffer.LockBuffer();
9535 
9536 	if (userPath != NULL) {
9537 		if (!IS_USER_ADDRESS(userPath)
9538 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9539 			return B_BAD_ADDRESS;
9540 	}
9541 
9542 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9543 }
9544 
9545 
9546 status_t
9547 _user_change_root(const char* userPath)
9548 {
9549 	// only root is allowed to chroot()
9550 	if (geteuid() != 0)
9551 		return B_NOT_ALLOWED;
9552 
9553 	// alloc path buffer
9554 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9555 	if (pathBuffer.InitCheck() != B_OK)
9556 		return B_NO_MEMORY;
9557 
9558 	// copy userland path to kernel
9559 	char* path = pathBuffer.LockBuffer();
9560 	if (userPath != NULL) {
9561 		if (!IS_USER_ADDRESS(userPath)
9562 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9563 			return B_BAD_ADDRESS;
9564 	}
9565 
9566 	// get the vnode
9567 	struct vnode* vnode;
9568 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9569 	if (status != B_OK)
9570 		return status;
9571 
9572 	// set the new root
9573 	struct io_context* context = get_current_io_context(false);
9574 	mutex_lock(&sIOContextRootLock);
9575 	struct vnode* oldRoot = context->root;
9576 	context->root = vnode;
9577 	mutex_unlock(&sIOContextRootLock);
9578 
9579 	put_vnode(oldRoot);
9580 
9581 	return B_OK;
9582 }
9583 
9584 
9585 int
9586 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9587 	uint32 flags, port_id port, int32 token)
9588 {
9589 	char* query;
9590 
9591 	if (device < 0 || userQuery == NULL || queryLength == 0)
9592 		return B_BAD_VALUE;
9593 
9594 	// this is a safety restriction
9595 	if (queryLength >= 65536)
9596 		return B_NAME_TOO_LONG;
9597 
9598 	query = (char*)malloc(queryLength + 1);
9599 	if (query == NULL)
9600 		return B_NO_MEMORY;
9601 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9602 		free(query);
9603 		return B_BAD_ADDRESS;
9604 	}
9605 
9606 	int fd = query_open(device, query, flags, port, token, false);
9607 
9608 	free(query);
9609 	return fd;
9610 }
9611 
9612 
9613 #include "vfs_request_io.cpp"
9614