xref: /haiku/src/system/kernel/fs/vfs.cpp (revision e7d5c75dce28921de0dc981ed840205a67a0c0e5)
1 /*
2  * Copyright 2005-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and covers_vnode fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the covers_vnode, and thus
130 	making the access path vnode->mount->covers_vnode->mount->... safe if a
131 	reference to vnode is held (note that for the root mount covers_vnode
132 	is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
224 
225 	The thread trying to lock the lock must not hold sVnodeLock or
226 	sMountMutex.
227 */
228 static recursive_lock sMountOpLock;
229 
230 /*!	\brief Guards sVnodeTable.
231 
232 	The holder is allowed read/write access to sVnodeTable and to
233 	any unbusy vnode in that table, save to the immutable fields (device, id,
234 	private_node, mount) to which only read-only access is allowed.
235 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
236 	well as the busy, removed, unused flags, and the vnode's type can also be
237 	write access when holding a read lock to sVnodeLock *and* having the vnode
238 	locked. Writing access to covered_by requires to write lock sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not have this lock held when calling create_sem(), as this
242 	might call vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, uint32 op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
549 		status_t status, size_t bytesTransferred)
550 		:
551 		fVnode(vnode),
552 		fMountID(vnode->mount->id),
553 		fNodeID(vnode->id),
554 		fCookie(cookie),
555 		fPos(pos),
556 		fCount(count),
557 		fFlags(flags),
558 		fBytesRequested(bytesRequested),
559 		fStatus(status),
560 		fBytesTransferred(bytesTransferred)
561 	{
562 		fVecs = (iovec*)alloc_tracing_buffer_memcpy(vecs, sizeof(iovec) * count,
563 			false);
564 	}
565 
566 	void AddDump(TraceOutput& out, const char* mode)
567 	{
568 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
569 			"pos: %lld, size: %lu, vecs: {", mode, fVnode, fMountID, fNodeID,
570 			fCookie, fPos, fBytesRequested);
571 
572 		if (fVecs != NULL) {
573 			for (uint32 i = 0; i < fCount; i++) {
574 				if (i > 0)
575 					out.Print(", ");
576 				out.Print("(%p, %lu)", fVecs[i].iov_base, fVecs[i].iov_len);
577 			}
578 		}
579 
580 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %lu",
581 			fFlags, fStatus, fBytesTransferred);
582 	}
583 
584 protected:
585 	struct vnode*	fVnode;
586 	dev_t			fMountID;
587 	ino_t			fNodeID;
588 	void*			fCookie;
589 	off_t			fPos;
590 	iovec*			fVecs;
591 	uint32			fCount;
592 	uint32			fFlags;
593 	size_t			fBytesRequested;
594 	status_t		fStatus;
595 	size_t			fBytesTransferred;
596 };
597 
598 
599 class ReadPages : public PagesIOTraceEntry {
600 public:
601 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
602 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
603 		status_t status, size_t bytesTransferred)
604 		:
605 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
606 			bytesRequested, status, bytesTransferred)
607 	{
608 		Initialized();
609 	}
610 
611 	virtual void AddDump(TraceOutput& out)
612 	{
613 		PagesIOTraceEntry::AddDump(out, "read");
614 	}
615 };
616 
617 
618 class WritePages : public PagesIOTraceEntry {
619 public:
620 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
621 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
622 		status_t status, size_t bytesTransferred)
623 		:
624 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
625 			bytesRequested, status, bytesTransferred)
626 	{
627 		Initialized();
628 	}
629 
630 	virtual void AddDump(TraceOutput& out)
631 	{
632 		PagesIOTraceEntry::AddDump(out, "write");
633 	}
634 };
635 
636 }	// namespace VFSPagesIOTracing
637 
638 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
639 #else
640 #	define TPIO(x) ;
641 #endif	// VFS_PAGES_IO_TRACING
642 
643 
644 static int
645 mount_compare(void* _m, const void* _key)
646 {
647 	struct fs_mount* mount = (fs_mount*)_m;
648 	const dev_t* id = (dev_t*)_key;
649 
650 	if (mount->id == *id)
651 		return 0;
652 
653 	return -1;
654 }
655 
656 
657 static uint32
658 mount_hash(void* _m, const void* _key, uint32 range)
659 {
660 	struct fs_mount* mount = (fs_mount*)_m;
661 	const dev_t* id = (dev_t*)_key;
662 
663 	if (mount)
664 		return mount->id % range;
665 
666 	return (uint32)*id % range;
667 }
668 
669 
670 /*! Finds the mounted device (the fs_mount structure) with the given ID.
671 	Note, you must hold the gMountMutex lock when you call this function.
672 */
673 static struct fs_mount*
674 find_mount(dev_t id)
675 {
676 	ASSERT_LOCKED_MUTEX(&sMountMutex);
677 
678 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
679 }
680 
681 
682 static status_t
683 get_mount(dev_t id, struct fs_mount** _mount)
684 {
685 	struct fs_mount* mount;
686 
687 	ReadLocker nodeLocker(sVnodeLock);
688 	MutexLocker mountLocker(sMountMutex);
689 
690 	mount = find_mount(id);
691 	if (mount == NULL)
692 		return B_BAD_VALUE;
693 
694 	struct vnode* rootNode = mount->root_vnode;
695 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
696 		// might have been called during a mount/unmount operation
697 		return B_BUSY;
698 	}
699 
700 	inc_vnode_ref_count(mount->root_vnode);
701 	*_mount = mount;
702 	return B_OK;
703 }
704 
705 
706 static void
707 put_mount(struct fs_mount* mount)
708 {
709 	if (mount)
710 		put_vnode(mount->root_vnode);
711 }
712 
713 
714 /*!	Tries to open the specified file system module.
715 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
716 	Returns a pointer to file system module interface, or NULL if it
717 	could not open the module.
718 */
719 static file_system_module_info*
720 get_file_system(const char* fsName)
721 {
722 	char name[B_FILE_NAME_LENGTH];
723 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
724 		// construct module name if we didn't get one
725 		// (we currently support only one API)
726 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
727 		fsName = NULL;
728 	}
729 
730 	file_system_module_info* info;
731 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
732 		return NULL;
733 
734 	return info;
735 }
736 
737 
738 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
739 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
740 	The name is allocated for you, and you have to free() it when you're
741 	done with it.
742 	Returns NULL if the required memory is not available.
743 */
744 static char*
745 get_file_system_name(const char* fsName)
746 {
747 	const size_t length = strlen("file_systems/");
748 
749 	if (strncmp(fsName, "file_systems/", length)) {
750 		// the name already seems to be the module's file name
751 		return strdup(fsName);
752 	}
753 
754 	fsName += length;
755 	const char* end = strchr(fsName, '/');
756 	if (end == NULL) {
757 		// this doesn't seem to be a valid name, but well...
758 		return strdup(fsName);
759 	}
760 
761 	// cut off the trailing /v1
762 
763 	char* name = (char*)malloc(end + 1 - fsName);
764 	if (name == NULL)
765 		return NULL;
766 
767 	strlcpy(name, fsName, end + 1 - fsName);
768 	return name;
769 }
770 
771 
772 /*!	Accepts a list of file system names separated by a colon, one for each
773 	layer and returns the file system name for the specified layer.
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available or if there is no
777 	name for the specified layer.
778 */
779 static char*
780 get_file_system_name_for_layer(const char* fsNames, int32 layer)
781 {
782 	while (layer >= 0) {
783 		const char* end = strchr(fsNames, ':');
784 		if (end == NULL) {
785 			if (layer == 0)
786 				return strdup(fsNames);
787 			return NULL;
788 		}
789 
790 		if (layer == 0) {
791 			size_t length = end - fsNames + 1;
792 			char* result = (char*)malloc(length);
793 			strlcpy(result, fsNames, length);
794 			return result;
795 		}
796 
797 		fsNames = end + 1;
798 		layer--;
799 	}
800 
801 	return NULL;
802 }
803 
804 
805 static int
806 vnode_compare(void* _vnode, const void* _key)
807 {
808 	struct vnode* vnode = (struct vnode*)_vnode;
809 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
810 
811 	if (vnode->device == key->device && vnode->id == key->vnode)
812 		return 0;
813 
814 	return -1;
815 }
816 
817 
818 static uint32
819 vnode_hash(void* _vnode, const void* _key, uint32 range)
820 {
821 	struct vnode* vnode = (struct vnode*)_vnode;
822 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
823 
824 #define VHASH(mountid, vnodeid) \
825 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
826 
827 	if (vnode != NULL)
828 		return VHASH(vnode->device, vnode->id) % range;
829 
830 	return VHASH(key->device, key->vnode) % range;
831 
832 #undef VHASH
833 }
834 
835 
836 static void
837 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
838 {
839 	RecursiveLocker _(mount->rlock);
840 	mount->vnodes.Add(vnode);
841 }
842 
843 
844 static void
845 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
846 {
847 	RecursiveLocker _(mount->rlock);
848 	mount->vnodes.Remove(vnode);
849 }
850 
851 
852 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
853 
854 	The caller must hold the sVnodeLock (read lock at least).
855 
856 	\param mountID the mount ID.
857 	\param vnodeID the node ID.
858 
859 	\return The vnode structure, if it was found in the hash table, \c NULL
860 			otherwise.
861 */
862 static struct vnode*
863 lookup_vnode(dev_t mountID, ino_t vnodeID)
864 {
865 	struct vnode_hash_key key;
866 
867 	key.device = mountID;
868 	key.vnode = vnodeID;
869 
870 	return (vnode*)hash_lookup(sVnodeTable, &key);
871 }
872 
873 
874 /*!	Creates a new vnode with the given mount and node ID.
875 	If the node already exists, it is returned instead and no new node is
876 	created. In either case -- but not, if an error occurs -- the function write
877 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
878 	error the lock is not not held on return.
879 
880 	\param mountID The mount ID.
881 	\param vnodeID The vnode ID.
882 	\param _vnode Will be set to the new vnode on success.
883 	\param _nodeCreated Will be set to \c true when the returned vnode has
884 		been newly created, \c false when it already existed. Will not be
885 		changed on error.
886 	\return \c B_OK, when the vnode was successfully created and inserted or
887 		a node with the given ID was found, \c B_NO_MEMORY or
888 		\c B_ENTRY_NOT_FOUND on error.
889 */
890 static status_t
891 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
892 	bool& _nodeCreated)
893 {
894 	FUNCTION(("create_new_vnode_and_lock()\n"));
895 
896 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
897 	if (vnode == NULL)
898 		return B_NO_MEMORY;
899 
900 	// initialize basic values
901 	memset(vnode, 0, sizeof(struct vnode));
902 	vnode->device = mountID;
903 	vnode->id = vnodeID;
904 	vnode->ref_count = 1;
905 	vnode->SetBusy(true);
906 
907 	// look up the the node -- it might have been added by someone else in the
908 	// meantime
909 	rw_lock_write_lock(&sVnodeLock);
910 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
911 	if (existingVnode != NULL) {
912 		free(vnode);
913 		_vnode = existingVnode;
914 		_nodeCreated = false;
915 		return B_OK;
916 	}
917 
918 	// get the mount structure
919 	mutex_lock(&sMountMutex);
920 	vnode->mount = find_mount(mountID);
921 	if (!vnode->mount || vnode->mount->unmounting) {
922 		mutex_unlock(&sMountMutex);
923 		rw_lock_write_unlock(&sVnodeLock);
924 		free(vnode);
925 		return B_ENTRY_NOT_FOUND;
926 	}
927 
928 	// add the vnode to the mount's node list and the hash table
929 	hash_insert(sVnodeTable, vnode);
930 	add_vnode_to_mount_list(vnode, vnode->mount);
931 
932 	mutex_unlock(&sMountMutex);
933 
934 	_vnode = vnode;
935 	_nodeCreated = true;
936 
937 	// keep the vnode lock locked
938 	return B_OK;
939 }
940 
941 
942 /*!	Frees the vnode and all resources it has acquired, and removes
943 	it from the vnode hash as well as from its mount structure.
944 	Will also make sure that any cache modifications are written back.
945 */
946 static void
947 free_vnode(struct vnode* vnode, bool reenter)
948 {
949 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
950 		vnode);
951 
952 	// write back any changes in this vnode's cache -- but only
953 	// if the vnode won't be deleted, in which case the changes
954 	// will be discarded
955 
956 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
957 		FS_CALL_NO_PARAMS(vnode, fsync);
958 
959 	// Note: If this vnode has a cache attached, there will still be two
960 	// references to that cache at this point. The last one belongs to the vnode
961 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
962 	// cache. Each but the last reference to a cache also includes a reference
963 	// to the vnode. The file cache, however, released its reference (cf.
964 	// file_cache_create()), so that this vnode's ref count has the chance to
965 	// ever drop to 0. Deleting the file cache now, will cause the next to last
966 	// cache reference to be released, which will also release a (no longer
967 	// existing) vnode reference. To avoid problems, we set the vnode's ref
968 	// count, so that it will neither become negative nor 0.
969 	vnode->ref_count = 2;
970 
971 	if (!vnode->IsUnpublished()) {
972 		if (vnode->IsRemoved())
973 			FS_CALL(vnode, remove_vnode, reenter);
974 		else
975 			FS_CALL(vnode, put_vnode, reenter);
976 	}
977 
978 	// If the vnode has a VMCache attached, make sure that it won't try to get
979 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
980 	// long as the vnode is busy and in the hash, that won't happen, but as
981 	// soon as we've removed it from the hash, it could reload the vnode -- with
982 	// a new cache attached!
983 	if (vnode->cache != NULL)
984 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
985 
986 	// The file system has removed the resources of the vnode now, so we can
987 	// make it available again (by removing the busy vnode from the hash).
988 	rw_lock_write_lock(&sVnodeLock);
989 	hash_remove(sVnodeTable, vnode);
990 	rw_lock_write_unlock(&sVnodeLock);
991 
992 	// if we have a VMCache attached, remove it
993 	if (vnode->cache)
994 		vnode->cache->ReleaseRef();
995 
996 	vnode->cache = NULL;
997 
998 	remove_vnode_from_mount_list(vnode, vnode->mount);
999 
1000 	free(vnode);
1001 }
1002 
1003 
1004 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1005 	if the counter dropped to 0.
1006 
1007 	The caller must, of course, own a reference to the vnode to call this
1008 	function.
1009 	The caller must not hold the sVnodeLock or the sMountMutex.
1010 
1011 	\param vnode the vnode.
1012 	\param alwaysFree don't move this vnode into the unused list, but really
1013 		   delete it if possible.
1014 	\param reenter \c true, if this function is called (indirectly) from within
1015 		   a file system. This will be passed to file system hooks only.
1016 	\return \c B_OK, if everything went fine, an error code otherwise.
1017 */
1018 static status_t
1019 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1020 {
1021 	ReadLocker locker(sVnodeLock);
1022 	AutoLocker<Vnode> nodeLocker(vnode);
1023 
1024 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1025 
1026 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1027 
1028 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1029 		vnode->ref_count));
1030 
1031 	if (oldRefCount != 1)
1032 		return B_OK;
1033 
1034 	if (vnode->IsBusy())
1035 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1036 
1037 	bool freeNode = false;
1038 	bool freeUnusedNodes = false;
1039 
1040 	// Just insert the vnode into an unused list if we don't need
1041 	// to delete it
1042 	if (vnode->IsRemoved() || alwaysFree) {
1043 		vnode_to_be_freed(vnode);
1044 		vnode->SetBusy(true);
1045 		freeNode = true;
1046 	} else
1047 		freeUnusedNodes = vnode_unused(vnode);
1048 
1049 	nodeLocker.Unlock();
1050 	locker.Unlock();
1051 
1052 	if (freeNode)
1053 		free_vnode(vnode, reenter);
1054 	else if (freeUnusedNodes)
1055 		free_unused_vnodes();
1056 
1057 	return B_OK;
1058 }
1059 
1060 
1061 /*!	\brief Increments the reference counter of the given vnode.
1062 
1063 	The caller must make sure that the node isn't deleted while this function
1064 	is called. This can be done either:
1065 	- by ensuring that a reference to the node exists and remains in existence,
1066 	  or
1067 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1068 	  or by holding sVnodeLock write locked.
1069 
1070 	In the second case the caller is responsible for dealing with the ref count
1071 	0 -> 1 transition. That is 1. this function must not be invoked when the
1072 	node is busy in the first place and 2. vnode_used() must be called for the
1073 	node.
1074 
1075 	\param vnode the vnode.
1076 */
1077 static void
1078 inc_vnode_ref_count(struct vnode* vnode)
1079 {
1080 	atomic_add(&vnode->ref_count, 1);
1081 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1082 		vnode->ref_count));
1083 }
1084 
1085 
1086 static bool
1087 is_special_node_type(int type)
1088 {
1089 	// at the moment only FIFOs are supported
1090 	return S_ISFIFO(type);
1091 }
1092 
1093 
1094 static status_t
1095 create_special_sub_node(struct vnode* vnode, uint32 flags)
1096 {
1097 	if (S_ISFIFO(vnode->Type()))
1098 		return create_fifo_vnode(vnode->mount->volume, vnode);
1099 
1100 	return B_BAD_VALUE;
1101 }
1102 
1103 
1104 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1105 
1106 	If the node is not yet in memory, it will be loaded.
1107 
1108 	The caller must not hold the sVnodeLock or the sMountMutex.
1109 
1110 	\param mountID the mount ID.
1111 	\param vnodeID the node ID.
1112 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1113 		   retrieved vnode structure shall be written.
1114 	\param reenter \c true, if this function is called (indirectly) from within
1115 		   a file system.
1116 	\return \c B_OK, if everything when fine, an error code otherwise.
1117 */
1118 static status_t
1119 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1120 	int reenter)
1121 {
1122 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1123 		_vnode));
1124 
1125 	rw_lock_read_lock(&sVnodeLock);
1126 
1127 	int32 tries = 2000;
1128 		// try for 10 secs
1129 restart:
1130 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1131 	AutoLocker<Vnode> nodeLocker(vnode);
1132 
1133 	if (vnode && vnode->IsBusy()) {
1134 		nodeLocker.Unlock();
1135 		rw_lock_read_unlock(&sVnodeLock);
1136 		if (!canWait || --tries < 0) {
1137 			// vnode doesn't seem to become unbusy
1138 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1139 				vnodeID);
1140 			return B_BUSY;
1141 		}
1142 		snooze(5000); // 5 ms
1143 		rw_lock_read_lock(&sVnodeLock);
1144 		goto restart;
1145 	}
1146 
1147 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1148 
1149 	status_t status;
1150 
1151 	if (vnode) {
1152 		if (vnode->ref_count == 0) {
1153 			// this vnode has been unused before
1154 			vnode_used(vnode);
1155 		}
1156 		inc_vnode_ref_count(vnode);
1157 
1158 		nodeLocker.Unlock();
1159 		rw_lock_read_unlock(&sVnodeLock);
1160 	} else {
1161 		// we need to create a new vnode and read it in
1162 		rw_lock_read_unlock(&sVnodeLock);
1163 			// unlock -- create_new_vnode_and_lock() write-locks on success
1164 		bool nodeCreated;
1165 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1166 			nodeCreated);
1167 		if (status != B_OK)
1168 			return status;
1169 
1170 		if (!nodeCreated) {
1171 			rw_lock_read_lock(&sVnodeLock);
1172 			rw_lock_write_unlock(&sVnodeLock);
1173 			goto restart;
1174 		}
1175 
1176 		rw_lock_write_unlock(&sVnodeLock);
1177 
1178 		int type;
1179 		uint32 flags;
1180 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1181 			&flags, reenter);
1182 		if (status == B_OK && vnode->private_node == NULL)
1183 			status = B_BAD_VALUE;
1184 
1185 		bool gotNode = status == B_OK;
1186 		bool publishSpecialSubNode = false;
1187 		if (gotNode) {
1188 			vnode->SetType(type);
1189 			publishSpecialSubNode = is_special_node_type(type)
1190 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1191 		}
1192 
1193 		if (gotNode && publishSpecialSubNode)
1194 			status = create_special_sub_node(vnode, flags);
1195 
1196 		if (status != B_OK) {
1197 			if (gotNode)
1198 				FS_CALL(vnode, put_vnode, reenter);
1199 
1200 			rw_lock_write_lock(&sVnodeLock);
1201 			hash_remove(sVnodeTable, vnode);
1202 			remove_vnode_from_mount_list(vnode, vnode->mount);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 
1205 			free(vnode);
1206 			return status;
1207 		}
1208 
1209 		rw_lock_read_lock(&sVnodeLock);
1210 		vnode->Lock();
1211 
1212 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1213 		vnode->SetBusy(false);
1214 
1215 		vnode->Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	}
1218 
1219 	TRACE(("get_vnode: returning %p\n", vnode));
1220 
1221 	*_vnode = vnode;
1222 	return B_OK;
1223 }
1224 
1225 
1226 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1227 	if the counter dropped to 0.
1228 
1229 	The caller must, of course, own a reference to the vnode to call this
1230 	function.
1231 	The caller must not hold the sVnodeLock or the sMountMutex.
1232 
1233 	\param vnode the vnode.
1234 */
1235 static inline void
1236 put_vnode(struct vnode* vnode)
1237 {
1238 	dec_vnode_ref_count(vnode, false, false);
1239 }
1240 
1241 
1242 static void
1243 free_unused_vnodes(int32 level)
1244 {
1245 	unused_vnodes_check_started();
1246 
1247 	if (level == B_NO_LOW_RESOURCE) {
1248 		unused_vnodes_check_done();
1249 		return;
1250 	}
1251 
1252 	flush_hot_vnodes();
1253 
1254 	// determine how many nodes to free
1255 	uint32 count = 1;
1256 	{
1257 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1258 
1259 		switch (level) {
1260 			case B_LOW_RESOURCE_NOTE:
1261 				count = sUnusedVnodes / 100;
1262 				break;
1263 			case B_LOW_RESOURCE_WARNING:
1264 				count = sUnusedVnodes / 10;
1265 				break;
1266 			case B_LOW_RESOURCE_CRITICAL:
1267 				count = sUnusedVnodes;
1268 				break;
1269 		}
1270 
1271 		if (count > sUnusedVnodes)
1272 			count = sUnusedVnodes;
1273 	}
1274 
1275 	// Write back the modified pages of some unused vnodes and free them.
1276 
1277 	for (uint32 i = 0; i < count; i++) {
1278 		ReadLocker vnodesReadLocker(sVnodeLock);
1279 
1280 		// get the first node
1281 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1282 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1283 			&sUnusedVnodeList);
1284 		unusedVnodesLocker.Unlock();
1285 
1286 		if (vnode == NULL)
1287 			break;
1288 
1289 		// lock the node
1290 		AutoLocker<Vnode> nodeLocker(vnode);
1291 
1292 		// Check whether the node is still unused -- since we only append to the
1293 		// the tail of the unused queue, the vnode should still be at its head.
1294 		// Alternatively we could check its ref count for 0 and its busy flag,
1295 		// but if the node is no longer at the head of the queue, it means it
1296 		// has been touched in the meantime, i.e. it is no longer the least
1297 		// recently used unused vnode and we rather don't free it.
1298 		unusedVnodesLocker.Lock();
1299 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1300 			continue;
1301 		unusedVnodesLocker.Unlock();
1302 
1303 		ASSERT(!vnode->IsBusy());
1304 
1305 		// grab a reference
1306 		inc_vnode_ref_count(vnode);
1307 		vnode_used(vnode);
1308 
1309 		// write back changes and free the node
1310 		nodeLocker.Unlock();
1311 		vnodesReadLocker.Unlock();
1312 
1313 		if (vnode->cache != NULL)
1314 			vnode->cache->WriteModified();
1315 
1316 		dec_vnode_ref_count(vnode, true, false);
1317 			// this should free the vnode when it's still unused
1318 	}
1319 
1320 	unused_vnodes_check_done();
1321 }
1322 
1323 
1324 static void
1325 free_unused_vnodes()
1326 {
1327 	free_unused_vnodes(
1328 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1329 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1330 }
1331 
1332 
1333 static void
1334 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1335 {
1336 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1337 
1338 	free_unused_vnodes(level);
1339 }
1340 
1341 
1342 static inline void
1343 put_advisory_locking(struct advisory_locking* locking)
1344 {
1345 	release_sem(locking->lock);
1346 }
1347 
1348 
1349 /*!	Returns the advisory_locking object of the \a vnode in case it
1350 	has one, and locks it.
1351 	You have to call put_advisory_locking() when you're done with
1352 	it.
1353 	Note, you must not have the vnode mutex locked when calling
1354 	this function.
1355 */
1356 static struct advisory_locking*
1357 get_advisory_locking(struct vnode* vnode)
1358 {
1359 	rw_lock_read_lock(&sVnodeLock);
1360 	vnode->Lock();
1361 
1362 	struct advisory_locking* locking = vnode->advisory_locking;
1363 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1364 
1365 	vnode->Unlock();
1366 	rw_lock_read_unlock(&sVnodeLock);
1367 
1368 	if (lock >= 0)
1369 		lock = acquire_sem(lock);
1370 	if (lock < 0) {
1371 		// This means the locking has been deleted in the mean time
1372 		// or had never existed in the first place - otherwise, we
1373 		// would get the lock at some point.
1374 		return NULL;
1375 	}
1376 
1377 	return locking;
1378 }
1379 
1380 
1381 /*!	Creates a locked advisory_locking object, and attaches it to the
1382 	given \a vnode.
1383 	Returns B_OK in case of success - also if the vnode got such an
1384 	object from someone else in the mean time, you'll still get this
1385 	one locked then.
1386 */
1387 static status_t
1388 create_advisory_locking(struct vnode* vnode)
1389 {
1390 	if (vnode == NULL)
1391 		return B_FILE_ERROR;
1392 
1393 	ObjectDeleter<advisory_locking> lockingDeleter;
1394 	struct advisory_locking* locking = NULL;
1395 
1396 	while (get_advisory_locking(vnode) == NULL) {
1397 		// no locking object set on the vnode yet, create one
1398 		if (locking == NULL) {
1399 			locking = new(std::nothrow) advisory_locking;
1400 			if (locking == NULL)
1401 				return B_NO_MEMORY;
1402 			lockingDeleter.SetTo(locking);
1403 
1404 			locking->wait_sem = create_sem(0, "advisory lock");
1405 			if (locking->wait_sem < 0)
1406 				return locking->wait_sem;
1407 
1408 			locking->lock = create_sem(0, "advisory locking");
1409 			if (locking->lock < 0)
1410 				return locking->lock;
1411 		}
1412 
1413 		// set our newly created locking object
1414 		ReadLocker _(sVnodeLock);
1415 		AutoLocker<Vnode> nodeLocker(vnode);
1416 		if (vnode->advisory_locking == NULL) {
1417 			vnode->advisory_locking = locking;
1418 			lockingDeleter.Detach();
1419 			return B_OK;
1420 		}
1421 	}
1422 
1423 	// The vnode already had a locking object. That's just as well.
1424 
1425 	return B_OK;
1426 }
1427 
1428 
1429 /*!	Retrieves the first lock that has been set by the current team.
1430 */
1431 static status_t
1432 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1433 {
1434 	struct advisory_locking* locking = get_advisory_locking(vnode);
1435 	if (locking == NULL)
1436 		return B_BAD_VALUE;
1437 
1438 	// TODO: this should probably get the flock by its file descriptor!
1439 	team_id team = team_get_current_team_id();
1440 	status_t status = B_BAD_VALUE;
1441 
1442 	LockList::Iterator iterator = locking->locks.GetIterator();
1443 	while (iterator.HasNext()) {
1444 		struct advisory_lock* lock = iterator.Next();
1445 
1446 		if (lock->team == team) {
1447 			flock->l_start = lock->start;
1448 			flock->l_len = lock->end - lock->start + 1;
1449 			status = B_OK;
1450 			break;
1451 		}
1452 	}
1453 
1454 	put_advisory_locking(locking);
1455 	return status;
1456 }
1457 
1458 
1459 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1460 	with the advisory_lock \a lock.
1461 */
1462 static bool
1463 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1464 {
1465 	if (flock == NULL)
1466 		return true;
1467 
1468 	return lock->start <= flock->l_start - 1 + flock->l_len
1469 		&& lock->end >= flock->l_start;
1470 }
1471 
1472 
1473 /*!	Removes the specified lock, or all locks of the calling team
1474 	if \a flock is NULL.
1475 */
1476 static status_t
1477 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1478 {
1479 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1480 
1481 	struct advisory_locking* locking = get_advisory_locking(vnode);
1482 	if (locking == NULL)
1483 		return B_OK;
1484 
1485 	// TODO: use the thread ID instead??
1486 	team_id team = team_get_current_team_id();
1487 	pid_t session = thread_get_current_thread()->team->session_id;
1488 
1489 	// find matching lock entries
1490 
1491 	LockList::Iterator iterator = locking->locks.GetIterator();
1492 	while (iterator.HasNext()) {
1493 		struct advisory_lock* lock = iterator.Next();
1494 		bool removeLock = false;
1495 
1496 		if (lock->session == session)
1497 			removeLock = true;
1498 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1499 			bool endsBeyond = false;
1500 			bool startsBefore = false;
1501 			if (flock != NULL) {
1502 				startsBefore = lock->start < flock->l_start;
1503 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1504 			}
1505 
1506 			if (!startsBefore && !endsBeyond) {
1507 				// lock is completely contained in flock
1508 				removeLock = true;
1509 			} else if (startsBefore && !endsBeyond) {
1510 				// cut the end of the lock
1511 				lock->end = flock->l_start - 1;
1512 			} else if (!startsBefore && endsBeyond) {
1513 				// cut the start of the lock
1514 				lock->start = flock->l_start + flock->l_len;
1515 			} else {
1516 				// divide the lock into two locks
1517 				struct advisory_lock* secondLock = new advisory_lock;
1518 				if (secondLock == NULL) {
1519 					// TODO: we should probably revert the locks we already
1520 					// changed... (ie. allocate upfront)
1521 					put_advisory_locking(locking);
1522 					return B_NO_MEMORY;
1523 				}
1524 
1525 				lock->end = flock->l_start - 1;
1526 
1527 				secondLock->team = lock->team;
1528 				secondLock->session = lock->session;
1529 				// values must already be normalized when getting here
1530 				secondLock->start = flock->l_start + flock->l_len;
1531 				secondLock->end = lock->end;
1532 				secondLock->shared = lock->shared;
1533 
1534 				locking->locks.Add(secondLock);
1535 			}
1536 		}
1537 
1538 		if (removeLock) {
1539 			// this lock is no longer used
1540 			iterator.Remove();
1541 			free(lock);
1542 		}
1543 	}
1544 
1545 	bool removeLocking = locking->locks.IsEmpty();
1546 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1547 
1548 	put_advisory_locking(locking);
1549 
1550 	if (removeLocking) {
1551 		// We can remove the whole advisory locking structure; it's no
1552 		// longer used
1553 		locking = get_advisory_locking(vnode);
1554 		if (locking != NULL) {
1555 			ReadLocker locker(sVnodeLock);
1556 			AutoLocker<Vnode> nodeLocker(vnode);
1557 
1558 			// the locking could have been changed in the mean time
1559 			if (locking->locks.IsEmpty()) {
1560 				vnode->advisory_locking = NULL;
1561 				nodeLocker.Unlock();
1562 				locker.Unlock();
1563 
1564 				// we've detached the locking from the vnode, so we can
1565 				// safely delete it
1566 				delete_sem(locking->lock);
1567 				delete_sem(locking->wait_sem);
1568 				delete locking;
1569 			} else {
1570 				// the locking is in use again
1571 				nodeLocker.Unlock();
1572 				locker.Unlock();
1573 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1574 			}
1575 		}
1576 	}
1577 
1578 	return B_OK;
1579 }
1580 
1581 
1582 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1583 	will wait for the lock to become available, if there are any collisions
1584 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1585 
1586 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1587 	BSD flock() semantics are used, that is, all children can unlock the file
1588 	in question (we even allow parents to remove the lock, though, but that
1589 	seems to be in line to what the BSD's are doing).
1590 */
1591 static status_t
1592 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1593 	bool wait)
1594 {
1595 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1596 		vnode, flock, wait ? "yes" : "no"));
1597 
1598 	bool shared = flock->l_type == F_RDLCK;
1599 	status_t status = B_OK;
1600 
1601 	// TODO: do deadlock detection!
1602 
1603 	struct advisory_locking* locking;
1604 	sem_id waitForLock;
1605 
1606 	while (true) {
1607 		// if this vnode has an advisory_locking structure attached,
1608 		// lock that one and search for any colliding file lock
1609 		status = create_advisory_locking(vnode);
1610 		if (status != B_OK)
1611 			return status;
1612 
1613 		locking = vnode->advisory_locking;
1614 		team_id team = team_get_current_team_id();
1615 		waitForLock = -1;
1616 
1617 		// test for collisions
1618 		LockList::Iterator iterator = locking->locks.GetIterator();
1619 		while (iterator.HasNext()) {
1620 			struct advisory_lock* lock = iterator.Next();
1621 
1622 			// TODO: locks from the same team might be joinable!
1623 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1624 				// locks do overlap
1625 				if (!shared || !lock->shared) {
1626 					// we need to wait
1627 					waitForLock = locking->wait_sem;
1628 					break;
1629 				}
1630 			}
1631 		}
1632 
1633 		if (waitForLock < 0)
1634 			break;
1635 
1636 		// We need to wait. Do that or fail now, if we've been asked not to.
1637 
1638 		if (!wait) {
1639 			put_advisory_locking(locking);
1640 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1641 		}
1642 
1643 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1644 			B_CAN_INTERRUPT, 0);
1645 		if (status != B_OK && status != B_BAD_SEM_ID)
1646 			return status;
1647 
1648 		// We have been notified, but we need to re-lock the locking object. So
1649 		// go another round...
1650 	}
1651 
1652 	// install new lock
1653 
1654 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1655 		sizeof(struct advisory_lock));
1656 	if (lock == NULL) {
1657 		if (waitForLock >= B_OK)
1658 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1659 		release_sem(locking->lock);
1660 		return B_NO_MEMORY;
1661 	}
1662 
1663 	lock->team = team_get_current_team_id();
1664 	lock->session = session;
1665 	// values must already be normalized when getting here
1666 	lock->start = flock->l_start;
1667 	lock->end = flock->l_start - 1 + flock->l_len;
1668 	lock->shared = shared;
1669 
1670 	locking->locks.Add(lock);
1671 	put_advisory_locking(locking);
1672 
1673 	return status;
1674 }
1675 
1676 
1677 /*!	Normalizes the \a flock structure to make it easier to compare the
1678 	structure with others. The l_start and l_len fields are set to absolute
1679 	values according to the l_whence field.
1680 */
1681 static status_t
1682 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1683 {
1684 	switch (flock->l_whence) {
1685 		case SEEK_SET:
1686 			break;
1687 		case SEEK_CUR:
1688 			flock->l_start += descriptor->pos;
1689 			break;
1690 		case SEEK_END:
1691 		{
1692 			struct vnode* vnode = descriptor->u.vnode;
1693 			struct stat stat;
1694 			status_t status;
1695 
1696 			if (!HAS_FS_CALL(vnode, read_stat))
1697 				return B_NOT_SUPPORTED;
1698 
1699 			status = FS_CALL(vnode, read_stat, &stat);
1700 			if (status != B_OK)
1701 				return status;
1702 
1703 			flock->l_start += stat.st_size;
1704 			break;
1705 		}
1706 		default:
1707 			return B_BAD_VALUE;
1708 	}
1709 
1710 	if (flock->l_start < 0)
1711 		flock->l_start = 0;
1712 	if (flock->l_len == 0)
1713 		flock->l_len = OFF_MAX;
1714 
1715 	// don't let the offset and length overflow
1716 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1717 		flock->l_len = OFF_MAX - flock->l_start;
1718 
1719 	if (flock->l_len < 0) {
1720 		// a negative length reverses the region
1721 		flock->l_start += flock->l_len;
1722 		flock->l_len = -flock->l_len;
1723 	}
1724 
1725 	return B_OK;
1726 }
1727 
1728 
1729 static void
1730 replace_vnode_if_disconnected(struct fs_mount* mount,
1731 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1732 	struct vnode* fallBack, bool lockRootLock)
1733 {
1734 	if (lockRootLock)
1735 		mutex_lock(&sIOContextRootLock);
1736 
1737 	struct vnode* obsoleteVnode = NULL;
1738 
1739 	if (vnode != NULL && vnode->mount == mount
1740 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1741 		obsoleteVnode = vnode;
1742 
1743 		if (vnode == mount->root_vnode) {
1744 			// redirect the vnode to the covered vnode
1745 			vnode = mount->covers_vnode;
1746 		} else
1747 			vnode = fallBack;
1748 
1749 		if (vnode != NULL)
1750 			inc_vnode_ref_count(vnode);
1751 	}
1752 
1753 	if (lockRootLock)
1754 		mutex_unlock(&sIOContextRootLock);
1755 
1756 	if (obsoleteVnode != NULL)
1757 		put_vnode(obsoleteVnode);
1758 }
1759 
1760 
1761 /*!	Disconnects all file descriptors that are associated with the
1762 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1763 	\a mount object.
1764 
1765 	Note, after you've called this function, there might still be ongoing
1766 	accesses - they won't be interrupted if they already happened before.
1767 	However, any subsequent access will fail.
1768 
1769 	This is not a cheap function and should be used with care and rarely.
1770 	TODO: there is currently no means to stop a blocking read/write!
1771 */
1772 void
1773 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1774 	struct vnode* vnodeToDisconnect)
1775 {
1776 	// iterate over all teams and peek into their file descriptors
1777 	int32 nextTeamID = 0;
1778 
1779 	while (true) {
1780 		struct io_context* context = NULL;
1781 		bool contextLocked = false;
1782 		struct team* team = NULL;
1783 		team_id lastTeamID;
1784 
1785 		cpu_status state = disable_interrupts();
1786 		SpinLocker teamsLock(gTeamSpinlock);
1787 
1788 		lastTeamID = peek_next_thread_id();
1789 		if (nextTeamID < lastTeamID) {
1790 			// get next valid team
1791 			while (nextTeamID < lastTeamID
1792 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1793 				nextTeamID++;
1794 			}
1795 
1796 			if (team) {
1797 				context = (io_context*)team->io_context;
1798 
1799 				// Some acrobatics to lock the context in a safe way
1800 				// (cf. _kern_get_next_fd_info() for details).
1801 				GRAB_THREAD_LOCK();
1802 				teamsLock.Unlock();
1803 				contextLocked = mutex_lock_threads_locked(&context->io_mutex)
1804 					== B_OK;
1805 				RELEASE_THREAD_LOCK();
1806 
1807 				nextTeamID++;
1808 			}
1809 		}
1810 
1811 		teamsLock.Unlock();
1812 		restore_interrupts(state);
1813 
1814 		if (context == NULL)
1815 			break;
1816 
1817 		// we now have a context - since we couldn't lock it while having
1818 		// safe access to the team structure, we now need to lock the mutex
1819 		// manually
1820 
1821 		if (!contextLocked) {
1822 			// team seems to be gone, go over to the next team
1823 			continue;
1824 		}
1825 
1826 		// the team cannot be deleted completely while we're owning its
1827 		// io_context mutex, so we can safely play with it now
1828 
1829 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1830 			sRoot, true);
1831 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1832 			sRoot, false);
1833 
1834 		for (uint32 i = 0; i < context->table_size; i++) {
1835 			if (struct file_descriptor* descriptor = context->fds[i]) {
1836 				inc_fd_ref_count(descriptor);
1837 
1838 				// if this descriptor points at this mount, we
1839 				// need to disconnect it to be able to unmount
1840 				struct vnode* vnode = fd_vnode(descriptor);
1841 				if (vnodeToDisconnect != NULL) {
1842 					if (vnode == vnodeToDisconnect)
1843 						disconnect_fd(descriptor);
1844 				} else if ((vnode != NULL && vnode->mount == mount)
1845 					|| (vnode == NULL && descriptor->u.mount == mount))
1846 					disconnect_fd(descriptor);
1847 
1848 				put_fd(descriptor);
1849 			}
1850 		}
1851 
1852 		mutex_unlock(&context->io_mutex);
1853 	}
1854 }
1855 
1856 
1857 /*!	\brief Gets the root node of the current IO context.
1858 	If \a kernel is \c true, the kernel IO context will be used.
1859 	The caller obtains a reference to the returned node.
1860 */
1861 struct vnode*
1862 get_root_vnode(bool kernel)
1863 {
1864 	if (!kernel) {
1865 		// Get current working directory from io context
1866 		struct io_context* context = get_current_io_context(kernel);
1867 
1868 		mutex_lock(&sIOContextRootLock);
1869 
1870 		struct vnode* root = context->root;
1871 		if (root != NULL)
1872 			inc_vnode_ref_count(root);
1873 
1874 		mutex_unlock(&sIOContextRootLock);
1875 
1876 		if (root != NULL)
1877 			return root;
1878 
1879 		// That should never happen.
1880 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1881 			"root\n", team_get_current_team_id());
1882 	}
1883 
1884 	inc_vnode_ref_count(sRoot);
1885 	return sRoot;
1886 }
1887 
1888 
1889 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1890 		   by.
1891 
1892 	Given an arbitrary vnode, the function checks, whether the node is covered
1893 	by the root of a volume. If it is the function obtains a reference to the
1894 	volume root node and returns it.
1895 
1896 	\param vnode The vnode in question.
1897 	\return The volume root vnode the vnode cover is covered by, if it is
1898 			indeed a mount point, or \c NULL otherwise.
1899 */
1900 static struct vnode*
1901 resolve_mount_point_to_volume_root(struct vnode* vnode)
1902 {
1903 	if (!vnode)
1904 		return NULL;
1905 
1906 	struct vnode* volumeRoot = NULL;
1907 
1908 	rw_lock_read_lock(&sVnodeLock);
1909 
1910 	if (vnode->covered_by) {
1911 		volumeRoot = vnode->covered_by;
1912 		inc_vnode_ref_count(volumeRoot);
1913 	}
1914 
1915 	rw_lock_read_unlock(&sVnodeLock);
1916 
1917 	return volumeRoot;
1918 }
1919 
1920 
1921 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1922 		   by.
1923 
1924 	Given an arbitrary vnode (identified by mount and node ID), the function
1925 	checks, whether the node is covered by the root of a volume. If it is the
1926 	function returns the mount and node ID of the volume root node. Otherwise
1927 	it simply returns the supplied mount and node ID.
1928 
1929 	In case of error (e.g. the supplied node could not be found) the variables
1930 	for storing the resolved mount and node ID remain untouched and an error
1931 	code is returned.
1932 
1933 	\param mountID The mount ID of the vnode in question.
1934 	\param nodeID The node ID of the vnode in question.
1935 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1936 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1937 	\return
1938 	- \c B_OK, if everything went fine,
1939 	- another error code, if something went wrong.
1940 */
1941 status_t
1942 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1943 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1944 {
1945 	// get the node
1946 	struct vnode* node;
1947 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1948 	if (error != B_OK)
1949 		return error;
1950 
1951 	// resolve the node
1952 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
1953 	if (resolvedNode) {
1954 		put_vnode(node);
1955 		node = resolvedNode;
1956 	}
1957 
1958 	// set the return values
1959 	*resolvedMountID = node->device;
1960 	*resolvedNodeID = node->id;
1961 
1962 	put_vnode(node);
1963 
1964 	return B_OK;
1965 }
1966 
1967 
1968 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1969 
1970 	Given an arbitrary vnode, the function checks, whether the node is the
1971 	root of a volume. If it is (and if it is not "/"), the function obtains
1972 	a reference to the underlying mount point node and returns it.
1973 
1974 	\param vnode The vnode in question (caller must have a reference).
1975 	\return The mount point vnode the vnode covers, if it is indeed a volume
1976 			root and not "/", or \c NULL otherwise.
1977 */
1978 static struct vnode*
1979 resolve_volume_root_to_mount_point(struct vnode* vnode)
1980 {
1981 	if (!vnode)
1982 		return NULL;
1983 
1984 	struct vnode* mountPoint = NULL;
1985 
1986 	struct fs_mount* mount = vnode->mount;
1987 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1988 		mountPoint = mount->covers_vnode;
1989 		inc_vnode_ref_count(mountPoint);
1990 	}
1991 
1992 	return mountPoint;
1993 }
1994 
1995 
1996 /*!	\brief Gets the directory path and leaf name for a given path.
1997 
1998 	The supplied \a path is transformed to refer to the directory part of
1999 	the entry identified by the original path, and into the buffer \a filename
2000 	the leaf name of the original entry is written.
2001 	Neither the returned path nor the leaf name can be expected to be
2002 	canonical.
2003 
2004 	\param path The path to be analyzed. Must be able to store at least one
2005 		   additional character.
2006 	\param filename The buffer into which the leaf name will be written.
2007 		   Must be of size B_FILE_NAME_LENGTH at least.
2008 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2009 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2010 		   if the given path name is empty.
2011 */
2012 static status_t
2013 get_dir_path_and_leaf(char* path, char* filename)
2014 {
2015 	if (*path == '\0')
2016 		return B_ENTRY_NOT_FOUND;
2017 
2018 	char* last = strrchr(path, '/');
2019 		// '/' are not allowed in file names!
2020 
2021 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2022 
2023 	if (last == NULL) {
2024 		// this path is single segment with no '/' in it
2025 		// ex. "foo"
2026 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2027 			return B_NAME_TOO_LONG;
2028 
2029 		strcpy(path, ".");
2030 	} else {
2031 		last++;
2032 		if (last[0] == '\0') {
2033 			// special case: the path ends in one or more '/' - remove them
2034 			while (*--last == '/' && last != path);
2035 			last[1] = '\0';
2036 
2037 			if (last == path && last[0] == '/') {
2038 				// This path points to the root of the file system
2039 				strcpy(filename, ".");
2040 				return B_OK;
2041 			}
2042 			for (; last != path && *(last - 1) != '/'; last--);
2043 				// rewind to the start of the leaf before the '/'
2044 		}
2045 
2046 		// normal leaf: replace the leaf portion of the path with a '.'
2047 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2048 			return B_NAME_TOO_LONG;
2049 
2050 		last[0] = '.';
2051 		last[1] = '\0';
2052 	}
2053 	return B_OK;
2054 }
2055 
2056 
2057 static status_t
2058 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2059 	bool traverse, bool kernel, struct vnode** _vnode)
2060 {
2061 	char clonedName[B_FILE_NAME_LENGTH + 1];
2062 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2063 		return B_NAME_TOO_LONG;
2064 
2065 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2066 	struct vnode* directory;
2067 
2068 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2069 	if (status < 0)
2070 		return status;
2071 
2072 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2073 		_vnode, NULL);
2074 }
2075 
2076 
2077 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2078 	and returns the respective vnode.
2079 	On success a reference to the vnode is acquired for the caller.
2080 */
2081 static status_t
2082 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2083 {
2084 	ino_t id;
2085 
2086 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2087 		return get_vnode(dir->device, id, _vnode, true, false);
2088 
2089 	status_t status = FS_CALL(dir, lookup, name, &id);
2090 	if (status != B_OK)
2091 		return status;
2092 
2093 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2094 	// have a reference and just need to look the node up.
2095 	rw_lock_read_lock(&sVnodeLock);
2096 	*_vnode = lookup_vnode(dir->device, id);
2097 	rw_lock_read_unlock(&sVnodeLock);
2098 
2099 	if (*_vnode == NULL) {
2100 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2101 			"0x%Lx)\n", dir->device, id);
2102 		return B_ENTRY_NOT_FOUND;
2103 	}
2104 
2105 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2106 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2107 //		(*_vnode)->mount->id, (*_vnode)->id);
2108 
2109 	return B_OK;
2110 }
2111 
2112 
2113 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2114 	\a path must not be NULL.
2115 	If it returns successfully, \a path contains the name of the last path
2116 	component. This function clobbers the buffer pointed to by \a path only
2117 	if it does contain more than one component.
2118 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2119 	it is successful or not!
2120 */
2121 static status_t
2122 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2123 	int count, struct io_context* ioContext, struct vnode** _vnode,
2124 	ino_t* _parentID)
2125 {
2126 	status_t status = B_OK;
2127 	ino_t lastParentID = vnode->id;
2128 
2129 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2130 
2131 	if (path == NULL) {
2132 		put_vnode(vnode);
2133 		return B_BAD_VALUE;
2134 	}
2135 
2136 	if (*path == '\0') {
2137 		put_vnode(vnode);
2138 		return B_ENTRY_NOT_FOUND;
2139 	}
2140 
2141 	while (true) {
2142 		struct vnode* nextVnode;
2143 		char* nextPath;
2144 
2145 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2146 			path));
2147 
2148 		// done?
2149 		if (path[0] == '\0')
2150 			break;
2151 
2152 		// walk to find the next path component ("path" will point to a single
2153 		// path component), and filter out multiple slashes
2154 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2155 				nextPath++);
2156 
2157 		if (*nextPath == '/') {
2158 			*nextPath = '\0';
2159 			do
2160 				nextPath++;
2161 			while (*nextPath == '/');
2162 		}
2163 
2164 		// See if the '..' is at the root of a mount and move to the covered
2165 		// vnode so we pass the '..' path to the underlying filesystem.
2166 		// Also prevent breaking the root of the IO context.
2167 		if (strcmp("..", path) == 0) {
2168 			if (vnode == ioContext->root) {
2169 				// Attempted prison break! Keep it contained.
2170 				path = nextPath;
2171 				continue;
2172 			} else if (vnode->mount->root_vnode == vnode
2173 				&& vnode->mount->covers_vnode) {
2174 				nextVnode = vnode->mount->covers_vnode;
2175 				inc_vnode_ref_count(nextVnode);
2176 				put_vnode(vnode);
2177 				vnode = nextVnode;
2178 			}
2179 		}
2180 
2181 		// check if vnode is really a directory
2182 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2183 			status = B_NOT_A_DIRECTORY;
2184 
2185 		// Check if we have the right to search the current directory vnode.
2186 		// If a file system doesn't have the access() function, we assume that
2187 		// searching a directory is always allowed
2188 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2189 			status = FS_CALL(vnode, access, X_OK);
2190 
2191 		// Tell the filesystem to get the vnode of this path component (if we
2192 		// got the permission from the call above)
2193 		if (status == B_OK)
2194 			status = lookup_dir_entry(vnode, path, &nextVnode);
2195 
2196 		if (status != B_OK) {
2197 			put_vnode(vnode);
2198 			return status;
2199 		}
2200 
2201 		// If the new node is a symbolic link, resolve it (if we've been told
2202 		// to do it)
2203 		if (S_ISLNK(nextVnode->Type())
2204 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2205 			size_t bufferSize;
2206 			char* buffer;
2207 
2208 			TRACE(("traverse link\n"));
2209 
2210 			// it's not exactly nice style using goto in this way, but hey,
2211 			// it works :-/
2212 			if (count + 1 > B_MAX_SYMLINKS) {
2213 				status = B_LINK_LIMIT;
2214 				goto resolve_link_error;
2215 			}
2216 
2217 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2218 			if (buffer == NULL) {
2219 				status = B_NO_MEMORY;
2220 				goto resolve_link_error;
2221 			}
2222 
2223 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2224 				bufferSize--;
2225 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2226 				// null-terminate
2227 				if (status >= 0)
2228 					buffer[bufferSize] = '\0';
2229 			} else
2230 				status = B_BAD_VALUE;
2231 
2232 			if (status != B_OK) {
2233 				free(buffer);
2234 
2235 		resolve_link_error:
2236 				put_vnode(vnode);
2237 				put_vnode(nextVnode);
2238 
2239 				return status;
2240 			}
2241 			put_vnode(nextVnode);
2242 
2243 			// Check if we start from the root directory or the current
2244 			// directory ("vnode" still points to that one).
2245 			// Cut off all leading slashes if it's the root directory
2246 			path = buffer;
2247 			bool absoluteSymlink = false;
2248 			if (path[0] == '/') {
2249 				// we don't need the old directory anymore
2250 				put_vnode(vnode);
2251 
2252 				while (*++path == '/')
2253 					;
2254 
2255 				mutex_lock(&sIOContextRootLock);
2256 				vnode = ioContext->root;
2257 				inc_vnode_ref_count(vnode);
2258 				mutex_unlock(&sIOContextRootLock);
2259 
2260 				absoluteSymlink = true;
2261 			}
2262 
2263 			inc_vnode_ref_count(vnode);
2264 				// balance the next recursion - we will decrement the
2265 				// ref_count of the vnode, no matter if we succeeded or not
2266 
2267 			if (absoluteSymlink && *path == '\0') {
2268 				// symlink was just "/"
2269 				nextVnode = vnode;
2270 			} else {
2271 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2272 					ioContext, &nextVnode, &lastParentID);
2273 			}
2274 
2275 			free(buffer);
2276 
2277 			if (status != B_OK) {
2278 				put_vnode(vnode);
2279 				return status;
2280 			}
2281 		} else
2282 			lastParentID = vnode->id;
2283 
2284 		// decrease the ref count on the old dir we just looked up into
2285 		put_vnode(vnode);
2286 
2287 		path = nextPath;
2288 		vnode = nextVnode;
2289 
2290 		// see if we hit a mount point
2291 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2292 		if (mountPoint) {
2293 			put_vnode(vnode);
2294 			vnode = mountPoint;
2295 		}
2296 	}
2297 
2298 	*_vnode = vnode;
2299 	if (_parentID)
2300 		*_parentID = lastParentID;
2301 
2302 	return B_OK;
2303 }
2304 
2305 
2306 static status_t
2307 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2308 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2309 {
2310 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2311 		get_current_io_context(kernel), _vnode, _parentID);
2312 }
2313 
2314 
2315 static status_t
2316 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2317 	ino_t* _parentID, bool kernel)
2318 {
2319 	struct vnode* start = NULL;
2320 
2321 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2322 
2323 	if (!path)
2324 		return B_BAD_VALUE;
2325 
2326 	if (*path == '\0')
2327 		return B_ENTRY_NOT_FOUND;
2328 
2329 	// figure out if we need to start at root or at cwd
2330 	if (*path == '/') {
2331 		if (sRoot == NULL) {
2332 			// we're a bit early, aren't we?
2333 			return B_ERROR;
2334 		}
2335 
2336 		while (*++path == '/')
2337 			;
2338 		start = get_root_vnode(kernel);
2339 
2340 		if (*path == '\0') {
2341 			*_vnode = start;
2342 			return B_OK;
2343 		}
2344 
2345 	} else {
2346 		struct io_context* context = get_current_io_context(kernel);
2347 
2348 		mutex_lock(&context->io_mutex);
2349 		start = context->cwd;
2350 		if (start != NULL)
2351 			inc_vnode_ref_count(start);
2352 		mutex_unlock(&context->io_mutex);
2353 
2354 		if (start == NULL)
2355 			return B_ERROR;
2356 	}
2357 
2358 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2359 		_parentID);
2360 }
2361 
2362 
2363 /*! Returns the vnode in the next to last segment of the path, and returns
2364 	the last portion in filename.
2365 	The path buffer must be able to store at least one additional character.
2366 */
2367 static status_t
2368 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2369 	bool kernel)
2370 {
2371 	status_t status = get_dir_path_and_leaf(path, filename);
2372 	if (status != B_OK)
2373 		return status;
2374 
2375 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2376 }
2377 
2378 
2379 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2380 		   to by a FD + path pair.
2381 
2382 	\a path must be given in either case. \a fd might be omitted, in which
2383 	case \a path is either an absolute path or one relative to the current
2384 	directory. If both a supplied and \a path is relative it is reckoned off
2385 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2386 	ignored.
2387 
2388 	The caller has the responsibility to call put_vnode() on the returned
2389 	directory vnode.
2390 
2391 	\param fd The FD. May be < 0.
2392 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2393 	       is modified by this function. It must have at least room for a
2394 	       string one character longer than the path it contains.
2395 	\param _vnode A pointer to a variable the directory vnode shall be written
2396 		   into.
2397 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2398 		   the leaf name of the specified entry will be written.
2399 	\param kernel \c true, if invoked from inside the kernel, \c false if
2400 		   invoked from userland.
2401 	\return \c B_OK, if everything went fine, another error code otherwise.
2402 */
2403 static status_t
2404 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2405 	char* filename, bool kernel)
2406 {
2407 	if (!path)
2408 		return B_BAD_VALUE;
2409 	if (*path == '\0')
2410 		return B_ENTRY_NOT_FOUND;
2411 	if (fd < 0)
2412 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2413 
2414 	status_t status = get_dir_path_and_leaf(path, filename);
2415 	if (status != B_OK)
2416 		return status;
2417 
2418 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2419 }
2420 
2421 
2422 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2423 		   to by a vnode + path pair.
2424 
2425 	\a path must be given in either case. \a vnode might be omitted, in which
2426 	case \a path is either an absolute path or one relative to the current
2427 	directory. If both a supplied and \a path is relative it is reckoned off
2428 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2429 	ignored.
2430 
2431 	The caller has the responsibility to call put_vnode() on the returned
2432 	directory vnode.
2433 
2434 	\param vnode The vnode. May be \c NULL.
2435 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2436 	       is modified by this function. It must have at least room for a
2437 	       string one character longer than the path it contains.
2438 	\param _vnode A pointer to a variable the directory vnode shall be written
2439 		   into.
2440 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2441 		   the leaf name of the specified entry will be written.
2442 	\param kernel \c true, if invoked from inside the kernel, \c false if
2443 		   invoked from userland.
2444 	\return \c B_OK, if everything went fine, another error code otherwise.
2445 */
2446 static status_t
2447 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2448 	struct vnode** _vnode, char* filename, bool kernel)
2449 {
2450 	if (!path)
2451 		return B_BAD_VALUE;
2452 	if (*path == '\0')
2453 		return B_ENTRY_NOT_FOUND;
2454 	if (vnode == NULL || path[0] == '/')
2455 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2456 
2457 	status_t status = get_dir_path_and_leaf(path, filename);
2458 	if (status != B_OK)
2459 		return status;
2460 
2461 	inc_vnode_ref_count(vnode);
2462 		// vnode_path_to_vnode() always decrements the ref count
2463 
2464 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2465 }
2466 
2467 
2468 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2469 */
2470 static status_t
2471 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2472 	size_t bufferSize, struct io_context* ioContext)
2473 {
2474 	if (bufferSize < sizeof(struct dirent))
2475 		return B_BAD_VALUE;
2476 
2477 	// See if vnode is the root of a mount and move to the covered
2478 	// vnode so we get the underlying file system
2479 	VNodePutter vnodePutter;
2480 	if (vnode->mount->root_vnode == vnode
2481 		&& vnode->mount->covers_vnode != NULL) {
2482 		vnode = vnode->mount->covers_vnode;
2483 		inc_vnode_ref_count(vnode);
2484 		vnodePutter.SetTo(vnode);
2485 	}
2486 
2487 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2488 		// The FS supports getting the name of a vnode.
2489 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2490 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2491 			return B_OK;
2492 	}
2493 
2494 	// The FS doesn't support getting the name of a vnode. So we search the
2495 	// parent directory for the vnode, if the caller let us.
2496 
2497 	if (parent == NULL)
2498 		return B_NOT_SUPPORTED;
2499 
2500 	void* cookie;
2501 
2502 	status_t status = FS_CALL(parent, open_dir, &cookie);
2503 	if (status >= B_OK) {
2504 		while (true) {
2505 			uint32 num = 1;
2506 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2507 				&num);
2508 			if (status != B_OK)
2509 				break;
2510 			if (num == 0) {
2511 				status = B_ENTRY_NOT_FOUND;
2512 				break;
2513 			}
2514 
2515 			if (vnode->id == buffer->d_ino) {
2516 				// found correct entry!
2517 				break;
2518 			}
2519 		}
2520 
2521 		FS_CALL(vnode, close_dir, cookie);
2522 		FS_CALL(vnode, free_dir_cookie, cookie);
2523 	}
2524 	return status;
2525 }
2526 
2527 
2528 static status_t
2529 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2530 	size_t nameSize, bool kernel)
2531 {
2532 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2533 	struct dirent* dirent = (struct dirent*)buffer;
2534 
2535 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2536 		get_current_io_context(kernel));
2537 	if (status != B_OK)
2538 		return status;
2539 
2540 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2541 		return B_BUFFER_OVERFLOW;
2542 
2543 	return B_OK;
2544 }
2545 
2546 
2547 /*!	Gets the full path to a given directory vnode.
2548 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2549 	file system doesn't support this call, it will fall back to iterating
2550 	through the parent directory to get the name of the child.
2551 
2552 	To protect against circular loops, it supports a maximum tree depth
2553 	of 256 levels.
2554 
2555 	Note that the path may not be correct the time this function returns!
2556 	It doesn't use any locking to prevent returning the correct path, as
2557 	paths aren't safe anyway: the path to a file can change at any time.
2558 
2559 	It might be a good idea, though, to check if the returned path exists
2560 	in the calling function (it's not done here because of efficiency)
2561 */
2562 static status_t
2563 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2564 	bool kernel)
2565 {
2566 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2567 
2568 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2569 		return B_BAD_VALUE;
2570 
2571 	if (!S_ISDIR(vnode->Type()))
2572 		return B_NOT_A_DIRECTORY;
2573 
2574 	char* path = buffer;
2575 	int32 insert = bufferSize;
2576 	int32 maxLevel = 256;
2577 	int32 length;
2578 	status_t status;
2579 	struct io_context* ioContext = get_current_io_context(kernel);
2580 
2581 	// we don't use get_vnode() here because this call is more
2582 	// efficient and does all we need from get_vnode()
2583 	inc_vnode_ref_count(vnode);
2584 
2585 	if (vnode != ioContext->root) {
2586 		// we don't hit the IO context root
2587 		// resolve a volume root to its mount point
2588 		struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2589 		if (mountPoint) {
2590 			put_vnode(vnode);
2591 			vnode = mountPoint;
2592 		}
2593 	}
2594 
2595 	path[--insert] = '\0';
2596 		// the path is filled right to left
2597 
2598 	while (true) {
2599 		// the name buffer is also used for fs_read_dir()
2600 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2601 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2602 		struct vnode* parentVnode;
2603 		ino_t parentID;
2604 
2605 		// lookup the parent vnode
2606 		if (vnode == ioContext->root) {
2607 			// we hit the IO context root
2608 			parentVnode = vnode;
2609 			inc_vnode_ref_count(vnode);
2610 		} else {
2611 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2612 			if (status != B_OK)
2613 				goto out;
2614 		}
2615 
2616 		// get the node's name
2617 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2618 			sizeof(nameBuffer), ioContext);
2619 
2620 		if (vnode != ioContext->root) {
2621 			// we don't hit the IO context root
2622 			// resolve a volume root to its mount point
2623 			struct vnode* mountPoint
2624 				= resolve_volume_root_to_mount_point(parentVnode);
2625 			if (mountPoint) {
2626 				put_vnode(parentVnode);
2627 				parentVnode = mountPoint;
2628 				parentID = parentVnode->id;
2629 			}
2630 		}
2631 
2632 		bool hitRoot = (parentVnode == vnode);
2633 
2634 		// release the current vnode, we only need its parent from now on
2635 		put_vnode(vnode);
2636 		vnode = parentVnode;
2637 
2638 		if (status != B_OK)
2639 			goto out;
2640 
2641 		if (hitRoot) {
2642 			// we have reached "/", which means we have constructed the full
2643 			// path
2644 			break;
2645 		}
2646 
2647 		// TODO: add an explicit check for loops in about 10 levels to do
2648 		// real loop detection
2649 
2650 		// don't go deeper as 'maxLevel' to prevent circular loops
2651 		if (maxLevel-- < 0) {
2652 			status = B_LINK_LIMIT;
2653 			goto out;
2654 		}
2655 
2656 		// add the name in front of the current path
2657 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2658 		length = strlen(name);
2659 		insert -= length;
2660 		if (insert <= 0) {
2661 			status = B_RESULT_NOT_REPRESENTABLE;
2662 			goto out;
2663 		}
2664 		memcpy(path + insert, name, length);
2665 		path[--insert] = '/';
2666 	}
2667 
2668 	// the root dir will result in an empty path: fix it
2669 	if (path[insert] == '\0')
2670 		path[--insert] = '/';
2671 
2672 	TRACE(("  path is: %s\n", path + insert));
2673 
2674 	// move the path to the start of the buffer
2675 	length = bufferSize - insert;
2676 	memmove(buffer, path + insert, length);
2677 
2678 out:
2679 	put_vnode(vnode);
2680 	return status;
2681 }
2682 
2683 
2684 /*!	Checks the length of every path component, and adds a '.'
2685 	if the path ends in a slash.
2686 	The given path buffer must be able to store at least one
2687 	additional character.
2688 */
2689 static status_t
2690 check_path(char* to)
2691 {
2692 	int32 length = 0;
2693 
2694 	// check length of every path component
2695 
2696 	while (*to) {
2697 		char* begin;
2698 		if (*to == '/')
2699 			to++, length++;
2700 
2701 		begin = to;
2702 		while (*to != '/' && *to)
2703 			to++, length++;
2704 
2705 		if (to - begin > B_FILE_NAME_LENGTH)
2706 			return B_NAME_TOO_LONG;
2707 	}
2708 
2709 	if (length == 0)
2710 		return B_ENTRY_NOT_FOUND;
2711 
2712 	// complete path if there is a slash at the end
2713 
2714 	if (*(to - 1) == '/') {
2715 		if (length > B_PATH_NAME_LENGTH - 2)
2716 			return B_NAME_TOO_LONG;
2717 
2718 		to[0] = '.';
2719 		to[1] = '\0';
2720 	}
2721 
2722 	return B_OK;
2723 }
2724 
2725 
2726 static struct file_descriptor*
2727 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2728 {
2729 	struct file_descriptor* descriptor
2730 		= get_fd(get_current_io_context(kernel), fd);
2731 	if (descriptor == NULL)
2732 		return NULL;
2733 
2734 	struct vnode* vnode = fd_vnode(descriptor);
2735 	if (vnode == NULL) {
2736 		put_fd(descriptor);
2737 		return NULL;
2738 	}
2739 
2740 	// ToDo: when we can close a file descriptor at any point, investigate
2741 	//	if this is still valid to do (accessing the vnode without ref_count
2742 	//	or locking)
2743 	*_vnode = vnode;
2744 	return descriptor;
2745 }
2746 
2747 
2748 static struct vnode*
2749 get_vnode_from_fd(int fd, bool kernel)
2750 {
2751 	struct file_descriptor* descriptor;
2752 	struct vnode* vnode;
2753 
2754 	descriptor = get_fd(get_current_io_context(kernel), fd);
2755 	if (descriptor == NULL)
2756 		return NULL;
2757 
2758 	vnode = fd_vnode(descriptor);
2759 	if (vnode != NULL)
2760 		inc_vnode_ref_count(vnode);
2761 
2762 	put_fd(descriptor);
2763 	return vnode;
2764 }
2765 
2766 
2767 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2768 	only the path will be considered. In this case, the \a path must not be
2769 	NULL.
2770 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2771 	and should be NULL for files.
2772 */
2773 static status_t
2774 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2775 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2776 {
2777 	if (fd < 0 && !path)
2778 		return B_BAD_VALUE;
2779 
2780 	if (path != NULL && *path == '\0')
2781 		return B_ENTRY_NOT_FOUND;
2782 
2783 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2784 		// no FD or absolute path
2785 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2786 	}
2787 
2788 	// FD only, or FD + relative path
2789 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2790 	if (!vnode)
2791 		return B_FILE_ERROR;
2792 
2793 	if (path != NULL) {
2794 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2795 			_vnode, _parentID);
2796 	}
2797 
2798 	// there is no relative path to take into account
2799 
2800 	*_vnode = vnode;
2801 	if (_parentID)
2802 		*_parentID = -1;
2803 
2804 	return B_OK;
2805 }
2806 
2807 
2808 static int
2809 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2810 	void* cookie, int openMode, bool kernel)
2811 {
2812 	struct file_descriptor* descriptor;
2813 	int fd;
2814 
2815 	// If the vnode is locked, we don't allow creating a new file/directory
2816 	// file_descriptor for it
2817 	if (vnode && vnode->mandatory_locked_by != NULL
2818 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2819 		return B_BUSY;
2820 
2821 	descriptor = alloc_fd();
2822 	if (!descriptor)
2823 		return B_NO_MEMORY;
2824 
2825 	if (vnode)
2826 		descriptor->u.vnode = vnode;
2827 	else
2828 		descriptor->u.mount = mount;
2829 	descriptor->cookie = cookie;
2830 
2831 	switch (type) {
2832 		// vnode types
2833 		case FDTYPE_FILE:
2834 			descriptor->ops = &sFileOps;
2835 			break;
2836 		case FDTYPE_DIR:
2837 			descriptor->ops = &sDirectoryOps;
2838 			break;
2839 		case FDTYPE_ATTR:
2840 			descriptor->ops = &sAttributeOps;
2841 			break;
2842 		case FDTYPE_ATTR_DIR:
2843 			descriptor->ops = &sAttributeDirectoryOps;
2844 			break;
2845 
2846 		// mount types
2847 		case FDTYPE_INDEX_DIR:
2848 			descriptor->ops = &sIndexDirectoryOps;
2849 			break;
2850 		case FDTYPE_QUERY:
2851 			descriptor->ops = &sQueryOps;
2852 			break;
2853 
2854 		default:
2855 			panic("get_new_fd() called with unknown type %d\n", type);
2856 			break;
2857 	}
2858 	descriptor->type = type;
2859 	descriptor->open_mode = openMode;
2860 
2861 	io_context* context = get_current_io_context(kernel);
2862 	fd = new_fd(context, descriptor);
2863 	if (fd < 0) {
2864 		free(descriptor);
2865 		return B_NO_MORE_FDS;
2866 	}
2867 
2868 	mutex_lock(&context->io_mutex);
2869 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2870 	mutex_unlock(&context->io_mutex);
2871 
2872 	return fd;
2873 }
2874 
2875 
2876 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2877 	vfs_normalize_path(). See there for more documentation.
2878 */
2879 static status_t
2880 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2881 {
2882 	VNodePutter dirPutter;
2883 	struct vnode* dir = NULL;
2884 	status_t error;
2885 
2886 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2887 		// get dir vnode + leaf name
2888 		struct vnode* nextDir;
2889 		char leaf[B_FILE_NAME_LENGTH];
2890 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2891 		if (error != B_OK)
2892 			return error;
2893 
2894 		dir = nextDir;
2895 		strcpy(path, leaf);
2896 		dirPutter.SetTo(dir);
2897 
2898 		// get file vnode, if we shall resolve links
2899 		bool fileExists = false;
2900 		struct vnode* fileVnode;
2901 		VNodePutter fileVnodePutter;
2902 		if (traverseLink) {
2903 			inc_vnode_ref_count(dir);
2904 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2905 					NULL) == B_OK) {
2906 				fileVnodePutter.SetTo(fileVnode);
2907 				fileExists = true;
2908 			}
2909 		}
2910 
2911 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2912 			// we're done -- construct the path
2913 			bool hasLeaf = true;
2914 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2915 				// special cases "." and ".." -- get the dir, forget the leaf
2916 				inc_vnode_ref_count(dir);
2917 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2918 					&nextDir, NULL);
2919 				if (error != B_OK)
2920 					return error;
2921 				dir = nextDir;
2922 				dirPutter.SetTo(dir);
2923 				hasLeaf = false;
2924 			}
2925 
2926 			// get the directory path
2927 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2928 			if (error != B_OK)
2929 				return error;
2930 
2931 			// append the leaf name
2932 			if (hasLeaf) {
2933 				// insert a directory separator if this is not the file system
2934 				// root
2935 				if ((strcmp(path, "/") != 0
2936 					&& strlcat(path, "/", pathSize) >= pathSize)
2937 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2938 					return B_NAME_TOO_LONG;
2939 				}
2940 			}
2941 
2942 			return B_OK;
2943 		}
2944 
2945 		// read link
2946 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2947 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2948 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2949 			if (error != B_OK)
2950 				return error;
2951 			path[bufferSize] = '\0';
2952 		} else
2953 			return B_BAD_VALUE;
2954 	}
2955 
2956 	return B_LINK_LIMIT;
2957 }
2958 
2959 
2960 #ifdef ADD_DEBUGGER_COMMANDS
2961 
2962 
2963 static void
2964 _dump_advisory_locking(advisory_locking* locking)
2965 {
2966 	if (locking == NULL)
2967 		return;
2968 
2969 	kprintf("   lock:        %ld", locking->lock);
2970 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2971 
2972 	int32 index = 0;
2973 	LockList::Iterator iterator = locking->locks.GetIterator();
2974 	while (iterator.HasNext()) {
2975 		struct advisory_lock* lock = iterator.Next();
2976 
2977 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2978 		kprintf("        start:  %Ld\n", lock->start);
2979 		kprintf("        end:    %Ld\n", lock->end);
2980 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2981 	}
2982 }
2983 
2984 
2985 static void
2986 _dump_mount(struct fs_mount* mount)
2987 {
2988 	kprintf("MOUNT: %p\n", mount);
2989 	kprintf(" id:            %ld\n", mount->id);
2990 	kprintf(" device_name:   %s\n", mount->device_name);
2991 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2992 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2993 	kprintf(" partition:     %p\n", mount->partition);
2994 	kprintf(" lock:          %p\n", &mount->rlock);
2995 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2996 		mount->owns_file_device ? " owns_file_device" : "");
2997 
2998 	fs_volume* volume = mount->volume;
2999 	while (volume != NULL) {
3000 		kprintf(" volume %p:\n", volume);
3001 		kprintf("  layer:            %ld\n", volume->layer);
3002 		kprintf("  private_volume:   %p\n", volume->private_volume);
3003 		kprintf("  ops:              %p\n", volume->ops);
3004 		kprintf("  file_system:      %p\n", volume->file_system);
3005 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3006 		volume = volume->super_volume;
3007 	}
3008 
3009 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3010 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3011 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
3012 	set_debug_variable("_partition", (addr_t)mount->partition);
3013 }
3014 
3015 
3016 static bool
3017 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3018 	const char* name)
3019 {
3020 	bool insertSlash = buffer[bufferSize] != '\0';
3021 	size_t nameLength = strlen(name);
3022 
3023 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3024 		return false;
3025 
3026 	if (insertSlash)
3027 		buffer[--bufferSize] = '/';
3028 
3029 	bufferSize -= nameLength;
3030 	memcpy(buffer + bufferSize, name, nameLength);
3031 
3032 	return true;
3033 }
3034 
3035 
3036 static bool
3037 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3038 	ino_t nodeID)
3039 {
3040 	if (bufferSize == 0)
3041 		return false;
3042 
3043 	bool insertSlash = buffer[bufferSize] != '\0';
3044 	if (insertSlash)
3045 		buffer[--bufferSize] = '/';
3046 
3047 	size_t size = snprintf(buffer, bufferSize,
3048 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3049 	if (size > bufferSize) {
3050 		if (insertSlash)
3051 			bufferSize++;
3052 		return false;
3053 	}
3054 
3055 	if (size < bufferSize)
3056 		memmove(buffer + bufferSize - size, buffer, size);
3057 
3058 	bufferSize -= size;
3059 	return true;
3060 }
3061 
3062 
3063 static char*
3064 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3065 	bool& _truncated)
3066 {
3067 	// null-terminate the path
3068 	buffer[--bufferSize] = '\0';
3069 
3070 	while (true) {
3071 		while (vnode->mount->root_vnode == vnode
3072 				&& vnode->mount->covers_vnode != NULL) {
3073 			vnode = vnode->mount->covers_vnode;
3074 		}
3075 
3076 		if (vnode == sRoot) {
3077 			_truncated = bufferSize == 0;
3078 			if (!_truncated)
3079 				buffer[--bufferSize] = '/';
3080 			return buffer + bufferSize;
3081 		}
3082 
3083 		// resolve the name
3084 		ino_t dirID;
3085 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3086 			vnode->id, dirID);
3087 		if (name == NULL) {
3088 			// Failed to resolve the name -- prepend "<dev,node>/".
3089 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3090 				vnode->mount->id, vnode->id);
3091 			return buffer + bufferSize;
3092 		}
3093 
3094 		// prepend the name
3095 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3096 			_truncated = true;
3097 			return buffer + bufferSize;
3098 		}
3099 
3100 		// resolve the directory node
3101 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3102 		if (nextVnode == NULL) {
3103 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3104 				vnode->mount->id, dirID);
3105 			return buffer + bufferSize;
3106 		}
3107 
3108 		vnode = nextVnode;
3109 	}
3110 }
3111 
3112 
3113 static void
3114 _dump_vnode(struct vnode* vnode, bool printPath)
3115 {
3116 	kprintf("VNODE: %p\n", vnode);
3117 	kprintf(" device:        %ld\n", vnode->device);
3118 	kprintf(" id:            %Ld\n", vnode->id);
3119 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3120 	kprintf(" private_node:  %p\n", vnode->private_node);
3121 	kprintf(" mount:         %p\n", vnode->mount);
3122 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3123 	kprintf(" cache:         %p\n", vnode->cache);
3124 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3125 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3126 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3127 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3128 
3129 	_dump_advisory_locking(vnode->advisory_locking);
3130 
3131 	if (printPath) {
3132 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3133 		if (buffer != NULL) {
3134 			bool truncated;
3135 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3136 				B_PATH_NAME_LENGTH, truncated);
3137 			if (path != NULL) {
3138 				kprintf(" path:          ");
3139 				if (truncated)
3140 					kputs("<truncated>/");
3141 				kputs(path);
3142 				kputs("\n");
3143 			} else
3144 				kprintf("Failed to resolve vnode path.\n");
3145 
3146 			debug_free(buffer);
3147 		} else
3148 			kprintf("Failed to allocate memory for constructing the path.\n");
3149 	}
3150 
3151 	set_debug_variable("_node", (addr_t)vnode->private_node);
3152 	set_debug_variable("_mount", (addr_t)vnode->mount);
3153 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3154 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3155 }
3156 
3157 
3158 static int
3159 dump_mount(int argc, char** argv)
3160 {
3161 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3162 		kprintf("usage: %s [id|address]\n", argv[0]);
3163 		return 0;
3164 	}
3165 
3166 	uint32 id = parse_expression(argv[1]);
3167 	struct fs_mount* mount = NULL;
3168 
3169 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3170 	if (mount == NULL) {
3171 		if (IS_USER_ADDRESS(id)) {
3172 			kprintf("fs_mount not found\n");
3173 			return 0;
3174 		}
3175 		mount = (fs_mount*)id;
3176 	}
3177 
3178 	_dump_mount(mount);
3179 	return 0;
3180 }
3181 
3182 
3183 static int
3184 dump_mounts(int argc, char** argv)
3185 {
3186 	if (argc != 1) {
3187 		kprintf("usage: %s\n", argv[0]);
3188 		return 0;
3189 	}
3190 
3191 	kprintf("address     id root       covers     cookie     fs_name\n");
3192 
3193 	struct hash_iterator iterator;
3194 	struct fs_mount* mount;
3195 
3196 	hash_open(sMountsTable, &iterator);
3197 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3198 			!= NULL) {
3199 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3200 			mount->covers_vnode, mount->volume->private_volume,
3201 			mount->volume->file_system_name);
3202 
3203 		fs_volume* volume = mount->volume;
3204 		while (volume->super_volume != NULL) {
3205 			volume = volume->super_volume;
3206 			kprintf("                                     %p %s\n",
3207 				volume->private_volume, volume->file_system_name);
3208 		}
3209 	}
3210 
3211 	hash_close(sMountsTable, &iterator, false);
3212 	return 0;
3213 }
3214 
3215 
3216 static int
3217 dump_vnode(int argc, char** argv)
3218 {
3219 	bool printPath = false;
3220 	int argi = 1;
3221 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3222 		printPath = true;
3223 		argi++;
3224 	}
3225 
3226 	if (argi >= argc || argi + 2 < argc) {
3227 		print_debugger_command_usage(argv[0]);
3228 		return 0;
3229 	}
3230 
3231 	struct vnode* vnode = NULL;
3232 
3233 	if (argi + 1 == argc) {
3234 		vnode = (struct vnode*)parse_expression(argv[argi]);
3235 		if (IS_USER_ADDRESS(vnode)) {
3236 			kprintf("invalid vnode address\n");
3237 			return 0;
3238 		}
3239 		_dump_vnode(vnode, printPath);
3240 		return 0;
3241 	}
3242 
3243 	struct hash_iterator iterator;
3244 	dev_t device = parse_expression(argv[argi]);
3245 	ino_t id = parse_expression(argv[argi + 1]);
3246 
3247 	hash_open(sVnodeTable, &iterator);
3248 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3249 		if (vnode->id != id || vnode->device != device)
3250 			continue;
3251 
3252 		_dump_vnode(vnode, printPath);
3253 	}
3254 
3255 	hash_close(sVnodeTable, &iterator, false);
3256 	return 0;
3257 }
3258 
3259 
3260 static int
3261 dump_vnodes(int argc, char** argv)
3262 {
3263 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3264 		kprintf("usage: %s [device]\n", argv[0]);
3265 		return 0;
3266 	}
3267 
3268 	// restrict dumped nodes to a certain device if requested
3269 	dev_t device = parse_expression(argv[1]);
3270 
3271 	struct hash_iterator iterator;
3272 	struct vnode* vnode;
3273 
3274 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3275 		"flags\n");
3276 
3277 	hash_open(sVnodeTable, &iterator);
3278 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3279 		if (vnode->device != device)
3280 			continue;
3281 
3282 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3283 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3284 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3285 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3286 	}
3287 
3288 	hash_close(sVnodeTable, &iterator, false);
3289 	return 0;
3290 }
3291 
3292 
3293 static int
3294 dump_vnode_caches(int argc, char** argv)
3295 {
3296 	struct hash_iterator iterator;
3297 	struct vnode* vnode;
3298 
3299 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3300 		kprintf("usage: %s [device]\n", argv[0]);
3301 		return 0;
3302 	}
3303 
3304 	// restrict dumped nodes to a certain device if requested
3305 	dev_t device = -1;
3306 	if (argc > 1)
3307 		device = parse_expression(argv[1]);
3308 
3309 	kprintf("address    dev     inode cache          size   pages\n");
3310 
3311 	hash_open(sVnodeTable, &iterator);
3312 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3313 		if (vnode->cache == NULL)
3314 			continue;
3315 		if (device != -1 && vnode->device != device)
3316 			continue;
3317 
3318 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3319 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3320 				/ B_PAGE_SIZE, vnode->cache->page_count);
3321 	}
3322 
3323 	hash_close(sVnodeTable, &iterator, false);
3324 	return 0;
3325 }
3326 
3327 
3328 int
3329 dump_io_context(int argc, char** argv)
3330 {
3331 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3332 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3333 		return 0;
3334 	}
3335 
3336 	struct io_context* context = NULL;
3337 
3338 	if (argc > 1) {
3339 		uint32 num = parse_expression(argv[1]);
3340 		if (IS_KERNEL_ADDRESS(num))
3341 			context = (struct io_context*)num;
3342 		else {
3343 			struct team* team = team_get_team_struct_locked(num);
3344 			if (team == NULL) {
3345 				kprintf("could not find team with ID %ld\n", num);
3346 				return 0;
3347 			}
3348 			context = (struct io_context*)team->io_context;
3349 		}
3350 	} else
3351 		context = get_current_io_context(true);
3352 
3353 	kprintf("I/O CONTEXT: %p\n", context);
3354 	kprintf(" root vnode:\t%p\n", context->root);
3355 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3356 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3357 	kprintf(" max fds:\t%lu\n", context->table_size);
3358 
3359 	if (context->num_used_fds)
3360 		kprintf("   no.  type         ops  ref  open  mode         pos"
3361 			"      cookie\n");
3362 
3363 	for (uint32 i = 0; i < context->table_size; i++) {
3364 		struct file_descriptor* fd = context->fds[i];
3365 		if (fd == NULL)
3366 			continue;
3367 
3368 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3369 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3370 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3371 			fd->pos, fd->cookie,
3372 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3373 				? "mount" : "vnode",
3374 			fd->u.vnode);
3375 	}
3376 
3377 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3378 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3379 
3380 	set_debug_variable("_cwd", (addr_t)context->cwd);
3381 
3382 	return 0;
3383 }
3384 
3385 
3386 int
3387 dump_vnode_usage(int argc, char** argv)
3388 {
3389 	if (argc != 1) {
3390 		kprintf("usage: %s\n", argv[0]);
3391 		return 0;
3392 	}
3393 
3394 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3395 		kMaxUnusedVnodes);
3396 
3397 	struct hash_iterator iterator;
3398 	hash_open(sVnodeTable, &iterator);
3399 
3400 	uint32 count = 0;
3401 	struct vnode* vnode;
3402 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3403 		count++;
3404 	}
3405 
3406 	hash_close(sVnodeTable, &iterator, false);
3407 
3408 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3409 	return 0;
3410 }
3411 
3412 #endif	// ADD_DEBUGGER_COMMANDS
3413 
3414 /*!	Clears an iovec array of physical pages.
3415 	Returns in \a _bytes the number of bytes successfully cleared.
3416 */
3417 static status_t
3418 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3419 {
3420 	size_t bytes = *_bytes;
3421 	size_t index = 0;
3422 
3423 	while (bytes > 0) {
3424 		size_t length = min_c(vecs[index].iov_len, bytes);
3425 
3426 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3427 			length);
3428 		if (status != B_OK) {
3429 			*_bytes -= bytes;
3430 			return status;
3431 		}
3432 
3433 		bytes -= length;
3434 	}
3435 
3436 	return B_OK;
3437 }
3438 
3439 
3440 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3441 	and calls the file system hooks to read/write the request to disk.
3442 */
3443 static status_t
3444 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3445 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3446 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3447 	bool doWrite)
3448 {
3449 	if (fileVecCount == 0) {
3450 		// There are no file vecs at this offset, so we're obviously trying
3451 		// to access the file outside of its bounds
3452 		return B_BAD_VALUE;
3453 	}
3454 
3455 	size_t numBytes = *_numBytes;
3456 	uint32 fileVecIndex;
3457 	size_t vecOffset = *_vecOffset;
3458 	uint32 vecIndex = *_vecIndex;
3459 	status_t status;
3460 	size_t size;
3461 
3462 	if (!doWrite && vecOffset == 0) {
3463 		// now directly read the data from the device
3464 		// the first file_io_vec can be read directly
3465 
3466 		if (fileVecs[0].length < numBytes)
3467 			size = fileVecs[0].length;
3468 		else
3469 			size = numBytes;
3470 
3471 		if (fileVecs[0].offset >= 0) {
3472 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3473 				&vecs[vecIndex], vecCount - vecIndex, &size);
3474 		} else {
3475 			// sparse read
3476 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3477 		}
3478 		if (status != B_OK)
3479 			return status;
3480 
3481 		// TODO: this is a work-around for buggy device drivers!
3482 		//	When our own drivers honour the length, we can:
3483 		//	a) also use this direct I/O for writes (otherwise, it would
3484 		//	   overwrite precious data)
3485 		//	b) panic if the term below is true (at least for writes)
3486 		if (size > fileVecs[0].length) {
3487 			//dprintf("warning: device driver %p doesn't respect total length "
3488 			//	"in read_pages() call!\n", ref->device);
3489 			size = fileVecs[0].length;
3490 		}
3491 
3492 		ASSERT(size <= fileVecs[0].length);
3493 
3494 		// If the file portion was contiguous, we're already done now
3495 		if (size == numBytes)
3496 			return B_OK;
3497 
3498 		// if we reached the end of the file, we can return as well
3499 		if (size != fileVecs[0].length) {
3500 			*_numBytes = size;
3501 			return B_OK;
3502 		}
3503 
3504 		fileVecIndex = 1;
3505 
3506 		// first, find out where we have to continue in our iovecs
3507 		for (; vecIndex < vecCount; vecIndex++) {
3508 			if (size < vecs[vecIndex].iov_len)
3509 				break;
3510 
3511 			size -= vecs[vecIndex].iov_len;
3512 		}
3513 
3514 		vecOffset = size;
3515 	} else {
3516 		fileVecIndex = 0;
3517 		size = 0;
3518 	}
3519 
3520 	// Too bad, let's process the rest of the file_io_vecs
3521 
3522 	size_t totalSize = size;
3523 	size_t bytesLeft = numBytes - size;
3524 
3525 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3526 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3527 		off_t fileOffset = fileVec.offset;
3528 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3529 
3530 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3531 
3532 		// process the complete fileVec
3533 		while (fileLeft > 0) {
3534 			iovec tempVecs[MAX_TEMP_IO_VECS];
3535 			uint32 tempCount = 0;
3536 
3537 			// size tracks how much of what is left of the current fileVec
3538 			// (fileLeft) has been assigned to tempVecs
3539 			size = 0;
3540 
3541 			// assign what is left of the current fileVec to the tempVecs
3542 			for (size = 0; size < fileLeft && vecIndex < vecCount
3543 					&& tempCount < MAX_TEMP_IO_VECS;) {
3544 				// try to satisfy one iovec per iteration (or as much as
3545 				// possible)
3546 
3547 				// bytes left of the current iovec
3548 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3549 				if (vecLeft == 0) {
3550 					vecOffset = 0;
3551 					vecIndex++;
3552 					continue;
3553 				}
3554 
3555 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3556 					vecIndex, vecOffset, size));
3557 
3558 				// actually available bytes
3559 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3560 
3561 				tempVecs[tempCount].iov_base
3562 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3563 				tempVecs[tempCount].iov_len = tempVecSize;
3564 				tempCount++;
3565 
3566 				size += tempVecSize;
3567 				vecOffset += tempVecSize;
3568 			}
3569 
3570 			size_t bytes = size;
3571 
3572 			if (fileOffset == -1) {
3573 				if (doWrite) {
3574 					panic("sparse write attempt: vnode %p", vnode);
3575 					status = B_IO_ERROR;
3576 				} else {
3577 					// sparse read
3578 					status = zero_pages(tempVecs, tempCount, &bytes);
3579 				}
3580 			} else if (doWrite) {
3581 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3582 					tempVecs, tempCount, &bytes);
3583 			} else {
3584 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3585 					tempVecs, tempCount, &bytes);
3586 			}
3587 			if (status != B_OK)
3588 				return status;
3589 
3590 			totalSize += bytes;
3591 			bytesLeft -= size;
3592 			if (fileOffset >= 0)
3593 				fileOffset += size;
3594 			fileLeft -= size;
3595 			//dprintf("-> file left = %Lu\n", fileLeft);
3596 
3597 			if (size != bytes || vecIndex >= vecCount) {
3598 				// there are no more bytes or iovecs, let's bail out
3599 				*_numBytes = totalSize;
3600 				return B_OK;
3601 			}
3602 		}
3603 	}
3604 
3605 	*_vecIndex = vecIndex;
3606 	*_vecOffset = vecOffset;
3607 	*_numBytes = totalSize;
3608 	return B_OK;
3609 }
3610 
3611 
3612 //	#pragma mark - public API for file systems
3613 
3614 
3615 extern "C" status_t
3616 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3617 	fs_vnode_ops* ops)
3618 {
3619 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3620 		volume, volume->id, vnodeID, privateNode));
3621 
3622 	if (privateNode == NULL)
3623 		return B_BAD_VALUE;
3624 
3625 	// create the node
3626 	bool nodeCreated;
3627 	struct vnode* vnode;
3628 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3629 		nodeCreated);
3630 	if (status != B_OK)
3631 		return status;
3632 
3633 	WriteLocker nodeLocker(sVnodeLock, true);
3634 		// create_new_vnode_and_lock() has locked for us
3635 
3636 	// file system integrity check:
3637 	// test if the vnode already exists and bail out if this is the case!
3638 	if (!nodeCreated) {
3639 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3640 			volume->id, vnodeID, privateNode, vnode->private_node);
3641 		return B_ERROR;
3642 	}
3643 
3644 	vnode->private_node = privateNode;
3645 	vnode->ops = ops;
3646 	vnode->SetUnpublished(true);
3647 
3648 	TRACE(("returns: %s\n", strerror(status)));
3649 
3650 	return status;
3651 }
3652 
3653 
3654 extern "C" status_t
3655 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3656 	fs_vnode_ops* ops, int type, uint32 flags)
3657 {
3658 	FUNCTION(("publish_vnode()\n"));
3659 
3660 	WriteLocker locker(sVnodeLock);
3661 
3662 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3663 
3664 	bool nodeCreated = false;
3665 	if (vnode == NULL) {
3666 		if (privateNode == NULL)
3667 			return B_BAD_VALUE;
3668 
3669 		// create the node
3670 		locker.Unlock();
3671 			// create_new_vnode_and_lock() will re-lock for us on success
3672 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3673 			nodeCreated);
3674 		if (status != B_OK)
3675 			return status;
3676 
3677 		locker.SetTo(sVnodeLock, true);
3678 	}
3679 
3680 	if (nodeCreated) {
3681 		vnode->private_node = privateNode;
3682 		vnode->ops = ops;
3683 		vnode->SetUnpublished(true);
3684 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3685 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3686 		// already known, but not published
3687 	} else
3688 		return B_BAD_VALUE;
3689 
3690 	bool publishSpecialSubNode = false;
3691 
3692 	vnode->SetType(type);
3693 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3694 	publishSpecialSubNode = is_special_node_type(type)
3695 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3696 
3697 	status_t status = B_OK;
3698 
3699 	// create sub vnodes, if necessary
3700 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3701 		locker.Unlock();
3702 
3703 		fs_volume* subVolume = volume;
3704 		if (volume->sub_volume != NULL) {
3705 			while (status == B_OK && subVolume->sub_volume != NULL) {
3706 				subVolume = subVolume->sub_volume;
3707 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3708 					vnode);
3709 			}
3710 		}
3711 
3712 		if (status == B_OK && publishSpecialSubNode)
3713 			status = create_special_sub_node(vnode, flags);
3714 
3715 		if (status != B_OK) {
3716 			// error -- clean up the created sub vnodes
3717 			while (subVolume->super_volume != volume) {
3718 				subVolume = subVolume->super_volume;
3719 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3720 			}
3721 		}
3722 
3723 		if (status == B_OK) {
3724 			ReadLocker vnodesReadLocker(sVnodeLock);
3725 			AutoLocker<Vnode> nodeLocker(vnode);
3726 			vnode->SetBusy(false);
3727 			vnode->SetUnpublished(false);
3728 		} else {
3729 			locker.Lock();
3730 			hash_remove(sVnodeTable, vnode);
3731 			remove_vnode_from_mount_list(vnode, vnode->mount);
3732 			free(vnode);
3733 		}
3734 	} else {
3735 		// we still hold the write lock -- mark the node unbusy and published
3736 		vnode->SetBusy(false);
3737 		vnode->SetUnpublished(false);
3738 	}
3739 
3740 	TRACE(("returns: %s\n", strerror(status)));
3741 
3742 	return status;
3743 }
3744 
3745 
3746 extern "C" status_t
3747 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3748 {
3749 	struct vnode* vnode;
3750 
3751 	if (volume == NULL)
3752 		return B_BAD_VALUE;
3753 
3754 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3755 	if (status != B_OK)
3756 		return status;
3757 
3758 	// If this is a layered FS, we need to get the node cookie for the requested
3759 	// layer.
3760 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3761 		fs_vnode resolvedNode;
3762 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3763 			&resolvedNode);
3764 		if (status != B_OK) {
3765 			panic("get_vnode(): Failed to get super node for vnode %p, "
3766 				"volume: %p", vnode, volume);
3767 			put_vnode(vnode);
3768 			return status;
3769 		}
3770 
3771 		if (_privateNode != NULL)
3772 			*_privateNode = resolvedNode.private_node;
3773 	} else if (_privateNode != NULL)
3774 		*_privateNode = vnode->private_node;
3775 
3776 	return B_OK;
3777 }
3778 
3779 
3780 extern "C" status_t
3781 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3782 {
3783 	struct vnode* vnode;
3784 
3785 	rw_lock_read_lock(&sVnodeLock);
3786 	vnode = lookup_vnode(volume->id, vnodeID);
3787 	rw_lock_read_unlock(&sVnodeLock);
3788 
3789 	if (vnode == NULL)
3790 		return B_BAD_VALUE;
3791 
3792 	inc_vnode_ref_count(vnode);
3793 	return B_OK;
3794 }
3795 
3796 
3797 extern "C" status_t
3798 put_vnode(fs_volume* volume, ino_t vnodeID)
3799 {
3800 	struct vnode* vnode;
3801 
3802 	rw_lock_read_lock(&sVnodeLock);
3803 	vnode = lookup_vnode(volume->id, vnodeID);
3804 	rw_lock_read_unlock(&sVnodeLock);
3805 
3806 	if (vnode == NULL)
3807 		return B_BAD_VALUE;
3808 
3809 	dec_vnode_ref_count(vnode, false, true);
3810 	return B_OK;
3811 }
3812 
3813 
3814 extern "C" status_t
3815 remove_vnode(fs_volume* volume, ino_t vnodeID)
3816 {
3817 	ReadLocker locker(sVnodeLock);
3818 
3819 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3820 	if (vnode == NULL)
3821 		return B_ENTRY_NOT_FOUND;
3822 
3823 	if (vnode->covered_by != NULL) {
3824 		// this vnode is in use
3825 		return B_BUSY;
3826 	}
3827 
3828 	vnode->Lock();
3829 
3830 	vnode->SetRemoved(true);
3831 	bool removeUnpublished = false;
3832 
3833 	if (vnode->IsUnpublished()) {
3834 		// prepare the vnode for deletion
3835 		removeUnpublished = true;
3836 		vnode->SetBusy(true);
3837 	}
3838 
3839 	vnode->Unlock();
3840 	locker.Unlock();
3841 
3842 	if (removeUnpublished) {
3843 		// If the vnode hasn't been published yet, we delete it here
3844 		atomic_add(&vnode->ref_count, -1);
3845 		free_vnode(vnode, true);
3846 	}
3847 
3848 	return B_OK;
3849 }
3850 
3851 
3852 extern "C" status_t
3853 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3854 {
3855 	struct vnode* vnode;
3856 
3857 	rw_lock_read_lock(&sVnodeLock);
3858 
3859 	vnode = lookup_vnode(volume->id, vnodeID);
3860 	if (vnode) {
3861 		AutoLocker<Vnode> nodeLocker(vnode);
3862 		vnode->SetRemoved(false);
3863 	}
3864 
3865 	rw_lock_read_unlock(&sVnodeLock);
3866 	return B_OK;
3867 }
3868 
3869 
3870 extern "C" status_t
3871 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3872 {
3873 	ReadLocker _(sVnodeLock);
3874 
3875 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3876 		if (_removed != NULL)
3877 			*_removed = vnode->IsRemoved();
3878 		return B_OK;
3879 	}
3880 
3881 	return B_BAD_VALUE;
3882 }
3883 
3884 
3885 extern "C" fs_volume*
3886 volume_for_vnode(fs_vnode* _vnode)
3887 {
3888 	if (_vnode == NULL)
3889 		return NULL;
3890 
3891 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3892 	return vnode->mount->volume;
3893 }
3894 
3895 
3896 #if 0
3897 extern "C" status_t
3898 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3899 	size_t* _numBytes)
3900 {
3901 	struct file_descriptor* descriptor;
3902 	struct vnode* vnode;
3903 
3904 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3905 	if (descriptor == NULL)
3906 		return B_FILE_ERROR;
3907 
3908 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3909 		count, 0, _numBytes);
3910 
3911 	put_fd(descriptor);
3912 	return status;
3913 }
3914 
3915 
3916 extern "C" status_t
3917 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3918 	size_t* _numBytes)
3919 {
3920 	struct file_descriptor* descriptor;
3921 	struct vnode* vnode;
3922 
3923 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3924 	if (descriptor == NULL)
3925 		return B_FILE_ERROR;
3926 
3927 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3928 		count, 0, _numBytes);
3929 
3930 	put_fd(descriptor);
3931 	return status;
3932 }
3933 #endif
3934 
3935 
3936 extern "C" status_t
3937 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3938 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3939 	size_t* _bytes)
3940 {
3941 	struct file_descriptor* descriptor;
3942 	struct vnode* vnode;
3943 
3944 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3945 	if (descriptor == NULL)
3946 		return B_FILE_ERROR;
3947 
3948 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3949 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3950 		false);
3951 
3952 	put_fd(descriptor);
3953 	return status;
3954 }
3955 
3956 
3957 extern "C" status_t
3958 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3959 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3960 	size_t* _bytes)
3961 {
3962 	struct file_descriptor* descriptor;
3963 	struct vnode* vnode;
3964 
3965 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3966 	if (descriptor == NULL)
3967 		return B_FILE_ERROR;
3968 
3969 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3970 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3971 		true);
3972 
3973 	put_fd(descriptor);
3974 	return status;
3975 }
3976 
3977 
3978 extern "C" status_t
3979 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3980 {
3981 	// lookup mount -- the caller is required to make sure that the mount
3982 	// won't go away
3983 	MutexLocker locker(sMountMutex);
3984 	struct fs_mount* mount = find_mount(mountID);
3985 	if (mount == NULL)
3986 		return B_BAD_VALUE;
3987 	locker.Unlock();
3988 
3989 	return mount->entry_cache.Add(dirID, name, nodeID);
3990 }
3991 
3992 
3993 extern "C" status_t
3994 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3995 {
3996 	// lookup mount -- the caller is required to make sure that the mount
3997 	// won't go away
3998 	MutexLocker locker(sMountMutex);
3999 	struct fs_mount* mount = find_mount(mountID);
4000 	if (mount == NULL)
4001 		return B_BAD_VALUE;
4002 	locker.Unlock();
4003 
4004 	return mount->entry_cache.Remove(dirID, name);
4005 }
4006 
4007 
4008 //	#pragma mark - private VFS API
4009 //	Functions the VFS exports for other parts of the kernel
4010 
4011 
4012 /*! Acquires another reference to the vnode that has to be released
4013 	by calling vfs_put_vnode().
4014 */
4015 void
4016 vfs_acquire_vnode(struct vnode* vnode)
4017 {
4018 	inc_vnode_ref_count(vnode);
4019 }
4020 
4021 
4022 /*! This is currently called from file_cache_create() only.
4023 	It's probably a temporary solution as long as devfs requires that
4024 	fs_read_pages()/fs_write_pages() are called with the standard
4025 	open cookie and not with a device cookie.
4026 	If that's done differently, remove this call; it has no other
4027 	purpose.
4028 */
4029 extern "C" status_t
4030 vfs_get_cookie_from_fd(int fd, void** _cookie)
4031 {
4032 	struct file_descriptor* descriptor;
4033 
4034 	descriptor = get_fd(get_current_io_context(true), fd);
4035 	if (descriptor == NULL)
4036 		return B_FILE_ERROR;
4037 
4038 	*_cookie = descriptor->cookie;
4039 	return B_OK;
4040 }
4041 
4042 
4043 extern "C" status_t
4044 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4045 {
4046 	*vnode = get_vnode_from_fd(fd, kernel);
4047 
4048 	if (*vnode == NULL)
4049 		return B_FILE_ERROR;
4050 
4051 	return B_NO_ERROR;
4052 }
4053 
4054 
4055 extern "C" status_t
4056 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4057 {
4058 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4059 		path, kernel));
4060 
4061 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4062 	if (pathBuffer.InitCheck() != B_OK)
4063 		return B_NO_MEMORY;
4064 
4065 	char* buffer = pathBuffer.LockBuffer();
4066 	strlcpy(buffer, path, pathBuffer.BufferSize());
4067 
4068 	struct vnode* vnode;
4069 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4070 	if (status != B_OK)
4071 		return status;
4072 
4073 	*_vnode = vnode;
4074 	return B_OK;
4075 }
4076 
4077 
4078 extern "C" status_t
4079 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4080 {
4081 	struct vnode* vnode;
4082 
4083 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4084 	if (status != B_OK)
4085 		return status;
4086 
4087 	*_vnode = vnode;
4088 	return B_OK;
4089 }
4090 
4091 
4092 extern "C" status_t
4093 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4094 	const char* name, struct vnode** _vnode)
4095 {
4096 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4097 }
4098 
4099 
4100 extern "C" void
4101 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4102 {
4103 	*_mountID = vnode->device;
4104 	*_vnodeID = vnode->id;
4105 }
4106 
4107 
4108 /*!
4109 	Calls fs_open() on the given vnode and returns a new
4110 	file descriptor for it
4111 */
4112 int
4113 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4114 {
4115 	return open_vnode(vnode, openMode, kernel);
4116 }
4117 
4118 
4119 /*!	Looks up a vnode with the given mount and vnode ID.
4120 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4121 	to the node.
4122 	It's currently only be used by file_cache_create().
4123 */
4124 extern "C" status_t
4125 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4126 {
4127 	rw_lock_read_lock(&sVnodeLock);
4128 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4129 	rw_lock_read_unlock(&sVnodeLock);
4130 
4131 	if (vnode == NULL)
4132 		return B_ERROR;
4133 
4134 	*_vnode = vnode;
4135 	return B_OK;
4136 }
4137 
4138 
4139 extern "C" status_t
4140 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4141 	bool traverseLeafLink, bool kernel, void** _node)
4142 {
4143 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4144 		volume, path, kernel));
4145 
4146 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4147 	if (pathBuffer.InitCheck() != B_OK)
4148 		return B_NO_MEMORY;
4149 
4150 	fs_mount* mount;
4151 	status_t status = get_mount(volume->id, &mount);
4152 	if (status != B_OK)
4153 		return status;
4154 
4155 	char* buffer = pathBuffer.LockBuffer();
4156 	strlcpy(buffer, path, pathBuffer.BufferSize());
4157 
4158 	struct vnode* vnode = mount->root_vnode;
4159 
4160 	if (buffer[0] == '/')
4161 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4162 	else {
4163 		inc_vnode_ref_count(vnode);
4164 			// vnode_path_to_vnode() releases a reference to the starting vnode
4165 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4166 			kernel, &vnode, NULL);
4167 	}
4168 
4169 	put_mount(mount);
4170 
4171 	if (status != B_OK)
4172 		return status;
4173 
4174 	if (vnode->device != volume->id) {
4175 		// wrong mount ID - must not gain access on foreign file system nodes
4176 		put_vnode(vnode);
4177 		return B_BAD_VALUE;
4178 	}
4179 
4180 	// Use get_vnode() to resolve the cookie for the right layer.
4181 	status = get_vnode(volume, vnode->id, _node);
4182 	put_vnode(vnode);
4183 
4184 	return status;
4185 }
4186 
4187 
4188 status_t
4189 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4190 	struct stat* stat, bool kernel)
4191 {
4192 	status_t status;
4193 
4194 	if (path) {
4195 		// path given: get the stat of the node referred to by (fd, path)
4196 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4197 		if (pathBuffer.InitCheck() != B_OK)
4198 			return B_NO_MEMORY;
4199 
4200 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4201 			traverseLeafLink, stat, kernel);
4202 	} else {
4203 		// no path given: get the FD and use the FD operation
4204 		struct file_descriptor* descriptor
4205 			= get_fd(get_current_io_context(kernel), fd);
4206 		if (descriptor == NULL)
4207 			return B_FILE_ERROR;
4208 
4209 		if (descriptor->ops->fd_read_stat)
4210 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4211 		else
4212 			status = B_NOT_SUPPORTED;
4213 
4214 		put_fd(descriptor);
4215 	}
4216 
4217 	return status;
4218 }
4219 
4220 
4221 /*!	Finds the full path to the file that contains the module \a moduleName,
4222 	puts it into \a pathBuffer, and returns B_OK for success.
4223 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4224 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4225 	\a pathBuffer is clobbered in any case and must not be relied on if this
4226 	functions returns unsuccessfully.
4227 	\a basePath and \a pathBuffer must not point to the same space.
4228 */
4229 status_t
4230 vfs_get_module_path(const char* basePath, const char* moduleName,
4231 	char* pathBuffer, size_t bufferSize)
4232 {
4233 	struct vnode* dir;
4234 	struct vnode* file;
4235 	status_t status;
4236 	size_t length;
4237 	char* path;
4238 
4239 	if (bufferSize == 0
4240 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4241 		return B_BUFFER_OVERFLOW;
4242 
4243 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4244 	if (status != B_OK)
4245 		return status;
4246 
4247 	// the path buffer had been clobbered by the above call
4248 	length = strlcpy(pathBuffer, basePath, bufferSize);
4249 	if (pathBuffer[length - 1] != '/')
4250 		pathBuffer[length++] = '/';
4251 
4252 	path = pathBuffer + length;
4253 	bufferSize -= length;
4254 
4255 	while (moduleName) {
4256 		char* nextPath = strchr(moduleName, '/');
4257 		if (nextPath == NULL)
4258 			length = strlen(moduleName);
4259 		else {
4260 			length = nextPath - moduleName;
4261 			nextPath++;
4262 		}
4263 
4264 		if (length + 1 >= bufferSize) {
4265 			status = B_BUFFER_OVERFLOW;
4266 			goto err;
4267 		}
4268 
4269 		memcpy(path, moduleName, length);
4270 		path[length] = '\0';
4271 		moduleName = nextPath;
4272 
4273 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4274 		if (status != B_OK) {
4275 			// vnode_path_to_vnode() has already released the reference to dir
4276 			return status;
4277 		}
4278 
4279 		if (S_ISDIR(file->Type())) {
4280 			// goto the next directory
4281 			path[length] = '/';
4282 			path[length + 1] = '\0';
4283 			path += length + 1;
4284 			bufferSize -= length + 1;
4285 
4286 			dir = file;
4287 		} else if (S_ISREG(file->Type())) {
4288 			// it's a file so it should be what we've searched for
4289 			put_vnode(file);
4290 
4291 			return B_OK;
4292 		} else {
4293 			TRACE(("vfs_get_module_path(): something is strange here: "
4294 				"0x%08lx...\n", file->Type()));
4295 			status = B_ERROR;
4296 			dir = file;
4297 			goto err;
4298 		}
4299 	}
4300 
4301 	// if we got here, the moduleName just pointed to a directory, not to
4302 	// a real module - what should we do in this case?
4303 	status = B_ENTRY_NOT_FOUND;
4304 
4305 err:
4306 	put_vnode(dir);
4307 	return status;
4308 }
4309 
4310 
4311 /*!	\brief Normalizes a given path.
4312 
4313 	The path must refer to an existing or non-existing entry in an existing
4314 	directory, that is chopping off the leaf component the remaining path must
4315 	refer to an existing directory.
4316 
4317 	The returned will be canonical in that it will be absolute, will not
4318 	contain any "." or ".." components or duplicate occurrences of '/'s,
4319 	and none of the directory components will by symbolic links.
4320 
4321 	Any two paths referring to the same entry, will result in the same
4322 	normalized path (well, that is pretty much the definition of `normalized',
4323 	isn't it :-).
4324 
4325 	\param path The path to be normalized.
4326 	\param buffer The buffer into which the normalized path will be written.
4327 		   May be the same one as \a path.
4328 	\param bufferSize The size of \a buffer.
4329 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4330 	\param kernel \c true, if the IO context of the kernel shall be used,
4331 		   otherwise that of the team this thread belongs to. Only relevant,
4332 		   if the path is relative (to get the CWD).
4333 	\return \c B_OK if everything went fine, another error code otherwise.
4334 */
4335 status_t
4336 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4337 	bool traverseLink, bool kernel)
4338 {
4339 	if (!path || !buffer || bufferSize < 1)
4340 		return B_BAD_VALUE;
4341 
4342 	if (path != buffer) {
4343 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4344 			return B_BUFFER_OVERFLOW;
4345 	}
4346 
4347 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4348 }
4349 
4350 
4351 /*!	\brief Creates a special node in the file system.
4352 
4353 	The caller gets a reference to the newly created node (which is passed
4354 	back through \a _createdVnode) and is responsible for releasing it.
4355 
4356 	\param path The path where to create the entry for the node. Can be \c NULL,
4357 		in which case the node is created without an entry in the root FS -- it
4358 		will automatically be deleted when the last reference has been released.
4359 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4360 		the target file system will just create the node with its standard
4361 		operations. Depending on the type of the node a subnode might be created
4362 		automatically, though.
4363 	\param mode The type and permissions for the node to be created.
4364 	\param flags Flags to be passed to the creating FS.
4365 	\param kernel \c true, if called in the kernel context (relevant only if
4366 		\a path is not \c NULL and not absolute).
4367 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4368 		file system creating the node, with the private data pointer and
4369 		operations for the super node. Can be \c NULL.
4370 	\param _createVnode Pointer to pre-allocated storage where to store the
4371 		pointer to the newly created node.
4372 	\return \c B_OK, if everything went fine, another error code otherwise.
4373 */
4374 status_t
4375 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4376 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4377 	struct vnode** _createdVnode)
4378 {
4379 	struct vnode* dirNode;
4380 	char _leaf[B_FILE_NAME_LENGTH];
4381 	char* leaf = NULL;
4382 
4383 	if (path) {
4384 		// We've got a path. Get the dir vnode and the leaf name.
4385 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4386 		if (tmpPathBuffer.InitCheck() != B_OK)
4387 			return B_NO_MEMORY;
4388 
4389 		char* tmpPath = tmpPathBuffer.LockBuffer();
4390 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4391 			return B_NAME_TOO_LONG;
4392 
4393 		// get the dir vnode and the leaf name
4394 		leaf = _leaf;
4395 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4396 		if (error != B_OK)
4397 			return error;
4398 	} else {
4399 		// No path. Create the node in the root FS.
4400 		dirNode = sRoot;
4401 		inc_vnode_ref_count(dirNode);
4402 	}
4403 
4404 	VNodePutter _(dirNode);
4405 
4406 	// check support for creating special nodes
4407 	if (!HAS_FS_CALL(dirNode, create_special_node))
4408 		return B_UNSUPPORTED;
4409 
4410 	// create the node
4411 	fs_vnode superVnode;
4412 	ino_t nodeID;
4413 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4414 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4415 	if (status != B_OK)
4416 		return status;
4417 
4418 	// lookup the node
4419 	rw_lock_read_lock(&sVnodeLock);
4420 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4421 	rw_lock_read_unlock(&sVnodeLock);
4422 
4423 	if (*_createdVnode == NULL) {
4424 		panic("vfs_create_special_node(): lookup of node failed");
4425 		return B_ERROR;
4426 	}
4427 
4428 	return B_OK;
4429 }
4430 
4431 
4432 extern "C" void
4433 vfs_put_vnode(struct vnode* vnode)
4434 {
4435 	put_vnode(vnode);
4436 }
4437 
4438 
4439 extern "C" status_t
4440 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4441 {
4442 	// Get current working directory from io context
4443 	struct io_context* context = get_current_io_context(false);
4444 	status_t status = B_OK;
4445 
4446 	mutex_lock(&context->io_mutex);
4447 
4448 	if (context->cwd != NULL) {
4449 		*_mountID = context->cwd->device;
4450 		*_vnodeID = context->cwd->id;
4451 	} else
4452 		status = B_ERROR;
4453 
4454 	mutex_unlock(&context->io_mutex);
4455 	return status;
4456 }
4457 
4458 
4459 status_t
4460 vfs_unmount(dev_t mountID, uint32 flags)
4461 {
4462 	return fs_unmount(NULL, mountID, flags, true);
4463 }
4464 
4465 
4466 extern "C" status_t
4467 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4468 {
4469 	struct vnode* vnode;
4470 
4471 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4472 	if (status != B_OK)
4473 		return status;
4474 
4475 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4476 	put_vnode(vnode);
4477 	return B_OK;
4478 }
4479 
4480 
4481 extern "C" void
4482 vfs_free_unused_vnodes(int32 level)
4483 {
4484 	vnode_low_resource_handler(NULL,
4485 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4486 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4487 		level);
4488 }
4489 
4490 
4491 extern "C" bool
4492 vfs_can_page(struct vnode* vnode, void* cookie)
4493 {
4494 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4495 
4496 	if (HAS_FS_CALL(vnode, can_page))
4497 		return FS_CALL(vnode, can_page, cookie);
4498 	return false;
4499 }
4500 
4501 
4502 extern "C" status_t
4503 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4504 	const generic_io_vec* vecs, size_t count, uint32 flags,
4505 	generic_size_t* _numBytes)
4506 {
4507 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4508 		pos));
4509 
4510 #if VFS_PAGES_IO_TRACING
4511 	generic_size_t bytesRequested = *_numBytes;
4512 #endif
4513 
4514 	IORequest request;
4515 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4516 	if (status == B_OK) {
4517 		status = vfs_vnode_io(vnode, cookie, &request);
4518 		if (status == B_OK)
4519 			status = request.Wait();
4520 		*_numBytes = request.TransferredBytes();
4521 	}
4522 
4523 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4524 		status, *_numBytes));
4525 
4526 	return status;
4527 }
4528 
4529 
4530 extern "C" status_t
4531 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4532 	const generic_io_vec* vecs, size_t count, uint32 flags,
4533 	generic_size_t* _numBytes)
4534 {
4535 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4536 		pos));
4537 
4538 #if VFS_PAGES_IO_TRACING
4539 	generic_size_t bytesRequested = *_numBytes;
4540 #endif
4541 
4542 	IORequest request;
4543 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4544 	if (status == B_OK) {
4545 		status = vfs_vnode_io(vnode, cookie, &request);
4546 		if (status == B_OK)
4547 			status = request.Wait();
4548 		*_numBytes = request.TransferredBytes();
4549 	}
4550 
4551 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4552 		status, *_numBytes));
4553 
4554 	return status;
4555 }
4556 
4557 
4558 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4559 	created if \a allocate is \c true.
4560 	In case it's successful, it will also grab a reference to the cache
4561 	it returns.
4562 */
4563 extern "C" status_t
4564 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4565 {
4566 	if (vnode->cache != NULL) {
4567 		vnode->cache->AcquireRef();
4568 		*_cache = vnode->cache;
4569 		return B_OK;
4570 	}
4571 
4572 	rw_lock_read_lock(&sVnodeLock);
4573 	vnode->Lock();
4574 
4575 	status_t status = B_OK;
4576 
4577 	// The cache could have been created in the meantime
4578 	if (vnode->cache == NULL) {
4579 		if (allocate) {
4580 			// TODO: actually the vnode needs to be busy already here, or
4581 			//	else this won't work...
4582 			bool wasBusy = vnode->IsBusy();
4583 			vnode->SetBusy(true);
4584 
4585 			vnode->Unlock();
4586 			rw_lock_read_unlock(&sVnodeLock);
4587 
4588 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4589 
4590 			rw_lock_read_lock(&sVnodeLock);
4591 			vnode->Lock();
4592 			vnode->SetBusy(wasBusy);
4593 		} else
4594 			status = B_BAD_VALUE;
4595 	}
4596 
4597 	vnode->Unlock();
4598 	rw_lock_read_unlock(&sVnodeLock);
4599 
4600 	if (status == B_OK) {
4601 		vnode->cache->AcquireRef();
4602 		*_cache = vnode->cache;
4603 	}
4604 
4605 	return status;
4606 }
4607 
4608 
4609 status_t
4610 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4611 	file_io_vec* vecs, size_t* _count)
4612 {
4613 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4614 		vnode, vecs, offset, size));
4615 
4616 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4617 }
4618 
4619 
4620 status_t
4621 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4622 {
4623 	status_t status = FS_CALL(vnode, read_stat, stat);
4624 
4625 	// fill in the st_dev and st_ino fields
4626 	if (status == B_OK) {
4627 		stat->st_dev = vnode->device;
4628 		stat->st_ino = vnode->id;
4629 		stat->st_rdev = -1;
4630 	}
4631 
4632 	return status;
4633 }
4634 
4635 
4636 status_t
4637 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4638 {
4639 	struct vnode* vnode;
4640 	status_t status = get_vnode(device, inode, &vnode, true, false);
4641 	if (status != B_OK)
4642 		return status;
4643 
4644 	status = FS_CALL(vnode, read_stat, stat);
4645 
4646 	// fill in the st_dev and st_ino fields
4647 	if (status == B_OK) {
4648 		stat->st_dev = vnode->device;
4649 		stat->st_ino = vnode->id;
4650 		stat->st_rdev = -1;
4651 	}
4652 
4653 	put_vnode(vnode);
4654 	return status;
4655 }
4656 
4657 
4658 status_t
4659 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4660 {
4661 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4662 }
4663 
4664 
4665 status_t
4666 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4667 	char* path, size_t pathLength)
4668 {
4669 	struct vnode* vnode;
4670 	status_t status;
4671 
4672 	// filter invalid leaf names
4673 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4674 		return B_BAD_VALUE;
4675 
4676 	// get the vnode matching the dir's node_ref
4677 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4678 		// special cases "." and "..": we can directly get the vnode of the
4679 		// referenced directory
4680 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4681 		leaf = NULL;
4682 	} else
4683 		status = get_vnode(device, inode, &vnode, true, false);
4684 	if (status != B_OK)
4685 		return status;
4686 
4687 	// get the directory path
4688 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4689 	put_vnode(vnode);
4690 		// we don't need the vnode anymore
4691 	if (status != B_OK)
4692 		return status;
4693 
4694 	// append the leaf name
4695 	if (leaf) {
4696 		// insert a directory separator if this is not the file system root
4697 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4698 				>= pathLength)
4699 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4700 			return B_NAME_TOO_LONG;
4701 		}
4702 	}
4703 
4704 	return B_OK;
4705 }
4706 
4707 
4708 /*!	If the given descriptor locked its vnode, that lock will be released. */
4709 void
4710 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4711 {
4712 	struct vnode* vnode = fd_vnode(descriptor);
4713 
4714 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4715 		vnode->mandatory_locked_by = NULL;
4716 }
4717 
4718 
4719 /*!	Closes all file descriptors of the specified I/O context that
4720 	have the O_CLOEXEC flag set.
4721 */
4722 void
4723 vfs_exec_io_context(io_context* context)
4724 {
4725 	uint32 i;
4726 
4727 	for (i = 0; i < context->table_size; i++) {
4728 		mutex_lock(&context->io_mutex);
4729 
4730 		struct file_descriptor* descriptor = context->fds[i];
4731 		bool remove = false;
4732 
4733 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4734 			context->fds[i] = NULL;
4735 			context->num_used_fds--;
4736 
4737 			remove = true;
4738 		}
4739 
4740 		mutex_unlock(&context->io_mutex);
4741 
4742 		if (remove) {
4743 			close_fd(descriptor);
4744 			put_fd(descriptor);
4745 		}
4746 	}
4747 }
4748 
4749 
4750 /*! Sets up a new io_control structure, and inherits the properties
4751 	of the parent io_control if it is given.
4752 */
4753 io_context*
4754 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4755 {
4756 	io_context* context = (io_context*)malloc(sizeof(io_context));
4757 	if (context == NULL)
4758 		return NULL;
4759 
4760 	TIOC(NewIOContext(context, parentContext));
4761 
4762 	memset(context, 0, sizeof(io_context));
4763 	context->ref_count = 1;
4764 
4765 	MutexLocker parentLocker;
4766 
4767 	size_t tableSize;
4768 	if (parentContext) {
4769 		parentLocker.SetTo(parentContext->io_mutex, false);
4770 		tableSize = parentContext->table_size;
4771 	} else
4772 		tableSize = DEFAULT_FD_TABLE_SIZE;
4773 
4774 	// allocate space for FDs and their close-on-exec flag
4775 	context->fds = (file_descriptor**)malloc(
4776 		sizeof(struct file_descriptor*) * tableSize
4777 		+ sizeof(struct select_sync*) * tableSize
4778 		+ (tableSize + 7) / 8);
4779 	if (context->fds == NULL) {
4780 		free(context);
4781 		return NULL;
4782 	}
4783 
4784 	context->select_infos = (select_info**)(context->fds + tableSize);
4785 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4786 
4787 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4788 		+ sizeof(struct select_sync*) * tableSize
4789 		+ (tableSize + 7) / 8);
4790 
4791 	mutex_init(&context->io_mutex, "I/O context");
4792 
4793 	// Copy all parent file descriptors
4794 
4795 	if (parentContext) {
4796 		size_t i;
4797 
4798 		mutex_lock(&sIOContextRootLock);
4799 		context->root = parentContext->root;
4800 		if (context->root)
4801 			inc_vnode_ref_count(context->root);
4802 		mutex_unlock(&sIOContextRootLock);
4803 
4804 		context->cwd = parentContext->cwd;
4805 		if (context->cwd)
4806 			inc_vnode_ref_count(context->cwd);
4807 
4808 		for (i = 0; i < tableSize; i++) {
4809 			struct file_descriptor* descriptor = parentContext->fds[i];
4810 
4811 			if (descriptor != NULL) {
4812 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4813 				if (closeOnExec && purgeCloseOnExec)
4814 					continue;
4815 
4816 				TFD(InheritFD(context, i, descriptor, parentContext));
4817 
4818 				context->fds[i] = descriptor;
4819 				context->num_used_fds++;
4820 				atomic_add(&descriptor->ref_count, 1);
4821 				atomic_add(&descriptor->open_count, 1);
4822 
4823 				if (closeOnExec)
4824 					fd_set_close_on_exec(context, i, true);
4825 			}
4826 		}
4827 
4828 		parentLocker.Unlock();
4829 	} else {
4830 		context->root = sRoot;
4831 		context->cwd = sRoot;
4832 
4833 		if (context->root)
4834 			inc_vnode_ref_count(context->root);
4835 
4836 		if (context->cwd)
4837 			inc_vnode_ref_count(context->cwd);
4838 	}
4839 
4840 	context->table_size = tableSize;
4841 
4842 	list_init(&context->node_monitors);
4843 	context->max_monitors = DEFAULT_NODE_MONITORS;
4844 
4845 	return context;
4846 }
4847 
4848 
4849 static status_t
4850 vfs_free_io_context(io_context* context)
4851 {
4852 	uint32 i;
4853 
4854 	TIOC(FreeIOContext(context));
4855 
4856 	if (context->root)
4857 		put_vnode(context->root);
4858 
4859 	if (context->cwd)
4860 		put_vnode(context->cwd);
4861 
4862 	mutex_lock(&context->io_mutex);
4863 
4864 	for (i = 0; i < context->table_size; i++) {
4865 		if (struct file_descriptor* descriptor = context->fds[i]) {
4866 			close_fd(descriptor);
4867 			put_fd(descriptor);
4868 		}
4869 	}
4870 
4871 	mutex_destroy(&context->io_mutex);
4872 
4873 	remove_node_monitors(context);
4874 	free(context->fds);
4875 	free(context);
4876 
4877 	return B_OK;
4878 }
4879 
4880 
4881 void
4882 vfs_get_io_context(io_context* context)
4883 {
4884 	atomic_add(&context->ref_count, 1);
4885 }
4886 
4887 
4888 void
4889 vfs_put_io_context(io_context* context)
4890 {
4891 	if (atomic_add(&context->ref_count, -1) == 1)
4892 		vfs_free_io_context(context);
4893 }
4894 
4895 
4896 static status_t
4897 vfs_resize_fd_table(struct io_context* context, const int newSize)
4898 {
4899 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4900 		return B_BAD_VALUE;
4901 
4902 	TIOC(ResizeIOContext(context, newSize));
4903 
4904 	MutexLocker _(context->io_mutex);
4905 
4906 	int oldSize = context->table_size;
4907 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4908 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4909 
4910 	// If the tables shrink, make sure none of the fds being dropped are in use.
4911 	if (newSize < oldSize) {
4912 		for (int i = oldSize; i-- > newSize;) {
4913 			if (context->fds[i])
4914 				return B_BUSY;
4915 		}
4916 	}
4917 
4918 	// store pointers to the old tables
4919 	file_descriptor** oldFDs = context->fds;
4920 	select_info** oldSelectInfos = context->select_infos;
4921 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4922 
4923 	// allocate new tables
4924 	file_descriptor** newFDs = (file_descriptor**)malloc(
4925 		sizeof(struct file_descriptor*) * newSize
4926 		+ sizeof(struct select_sync*) * newSize
4927 		+ newCloseOnExitBitmapSize);
4928 	if (newFDs == NULL)
4929 		return B_NO_MEMORY;
4930 
4931 	context->fds = newFDs;
4932 	context->select_infos = (select_info**)(context->fds + newSize);
4933 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4934 	context->table_size = newSize;
4935 
4936 	// copy entries from old tables
4937 	int toCopy = min_c(oldSize, newSize);
4938 
4939 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4940 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4941 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4942 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4943 
4944 	// clear additional entries, if the tables grow
4945 	if (newSize > oldSize) {
4946 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4947 		memset(context->select_infos + oldSize, 0,
4948 			sizeof(void*) * (newSize - oldSize));
4949 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4950 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4951 	}
4952 
4953 	free(oldFDs);
4954 
4955 	return B_OK;
4956 }
4957 
4958 
4959 static status_t
4960 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4961 {
4962 	int	status = B_OK;
4963 
4964 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4965 		return B_BAD_VALUE;
4966 
4967 	mutex_lock(&context->io_mutex);
4968 
4969 	if ((size_t)newSize < context->num_monitors) {
4970 		status = B_BUSY;
4971 		goto out;
4972 	}
4973 	context->max_monitors = newSize;
4974 
4975 out:
4976 	mutex_unlock(&context->io_mutex);
4977 	return status;
4978 }
4979 
4980 
4981 int
4982 vfs_getrlimit(int resource, struct rlimit* rlp)
4983 {
4984 	if (!rlp)
4985 		return B_BAD_ADDRESS;
4986 
4987 	switch (resource) {
4988 		case RLIMIT_NOFILE:
4989 		{
4990 			struct io_context* context = get_current_io_context(false);
4991 			MutexLocker _(context->io_mutex);
4992 
4993 			rlp->rlim_cur = context->table_size;
4994 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4995 			return 0;
4996 		}
4997 
4998 		case RLIMIT_NOVMON:
4999 		{
5000 			struct io_context* context = get_current_io_context(false);
5001 			MutexLocker _(context->io_mutex);
5002 
5003 			rlp->rlim_cur = context->max_monitors;
5004 			rlp->rlim_max = MAX_NODE_MONITORS;
5005 			return 0;
5006 		}
5007 
5008 		default:
5009 			return B_BAD_VALUE;
5010 	}
5011 }
5012 
5013 
5014 int
5015 vfs_setrlimit(int resource, const struct rlimit* rlp)
5016 {
5017 	if (!rlp)
5018 		return B_BAD_ADDRESS;
5019 
5020 	switch (resource) {
5021 		case RLIMIT_NOFILE:
5022 			/* TODO: check getuid() */
5023 			if (rlp->rlim_max != RLIM_SAVED_MAX
5024 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5025 				return B_NOT_ALLOWED;
5026 
5027 			return vfs_resize_fd_table(get_current_io_context(false),
5028 				rlp->rlim_cur);
5029 
5030 		case RLIMIT_NOVMON:
5031 			/* TODO: check getuid() */
5032 			if (rlp->rlim_max != RLIM_SAVED_MAX
5033 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5034 				return B_NOT_ALLOWED;
5035 
5036 			return vfs_resize_monitor_table(get_current_io_context(false),
5037 				rlp->rlim_cur);
5038 
5039 		default:
5040 			return B_BAD_VALUE;
5041 	}
5042 }
5043 
5044 
5045 status_t
5046 vfs_init(kernel_args* args)
5047 {
5048 	vnode::StaticInit();
5049 
5050 	struct vnode dummyVnode;
5051 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5052 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5053 	if (sVnodeTable == NULL)
5054 		panic("vfs_init: error creating vnode hash table\n");
5055 
5056 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5057 
5058 	struct fs_mount dummyMount;
5059 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5060 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5061 	if (sMountsTable == NULL)
5062 		panic("vfs_init: error creating mounts hash table\n");
5063 
5064 	node_monitor_init();
5065 
5066 	sRoot = NULL;
5067 
5068 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5069 
5070 	if (block_cache_init() != B_OK)
5071 		return B_ERROR;
5072 
5073 #ifdef ADD_DEBUGGER_COMMANDS
5074 	// add some debugger commands
5075 	add_debugger_command_etc("vnode", &dump_vnode,
5076 		"Print info about the specified vnode",
5077 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5078 		"Prints information about the vnode specified by address <vnode> or\n"
5079 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5080 		"constructed and printed. It might not be possible to construct a\n"
5081 		"complete path, though.\n",
5082 		0);
5083 	add_debugger_command("vnodes", &dump_vnodes,
5084 		"list all vnodes (from the specified device)");
5085 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5086 		"list all vnode caches");
5087 	add_debugger_command("mount", &dump_mount,
5088 		"info about the specified fs_mount");
5089 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5090 	add_debugger_command("io_context", &dump_io_context,
5091 		"info about the I/O context");
5092 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5093 		"info about vnode usage");
5094 #endif
5095 
5096 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5097 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5098 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5099 		0);
5100 
5101 	file_map_init();
5102 
5103 	return file_cache_init();
5104 }
5105 
5106 
5107 //	#pragma mark - fd_ops implementations
5108 
5109 
5110 /*!
5111 	Calls fs_open() on the given vnode and returns a new
5112 	file descriptor for it
5113 */
5114 static int
5115 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5116 {
5117 	void* cookie;
5118 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5119 	if (status != B_OK)
5120 		return status;
5121 
5122 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5123 	if (fd < 0) {
5124 		FS_CALL(vnode, close, cookie);
5125 		FS_CALL(vnode, free_cookie, cookie);
5126 	}
5127 	return fd;
5128 }
5129 
5130 
5131 /*!
5132 	Calls fs_open() on the given vnode and returns a new
5133 	file descriptor for it
5134 */
5135 static int
5136 create_vnode(struct vnode* directory, const char* name, int openMode,
5137 	int perms, bool kernel)
5138 {
5139 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5140 	status_t status = B_ERROR;
5141 	struct vnode* vnode;
5142 	void* cookie;
5143 	ino_t newID;
5144 
5145 	// This is somewhat tricky: If the entry already exists, the FS responsible
5146 	// for the directory might not necessarily also be the one responsible for
5147 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5148 	// we can actually never call the create() hook without O_EXCL. Instead we
5149 	// try to look the entry up first. If it already exists, we just open the
5150 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5151 	// introduces a race condition, since someone else might have created the
5152 	// entry in the meantime. We hope the respective FS returns the correct
5153 	// error code and retry (up to 3 times) again.
5154 
5155 	for (int i = 0; i < 3 && status != B_OK; i++) {
5156 		// look the node up
5157 		status = lookup_dir_entry(directory, name, &vnode);
5158 		if (status == B_OK) {
5159 			VNodePutter putter(vnode);
5160 
5161 			if ((openMode & O_EXCL) != 0)
5162 				return B_FILE_EXISTS;
5163 
5164 			// If the node is a symlink, we have to follow it, unless
5165 			// O_NOTRAVERSE is set.
5166 			if (S_ISLNK(vnode->Type()) && traverse) {
5167 				putter.Put();
5168 				char clonedName[B_FILE_NAME_LENGTH + 1];
5169 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5170 						>= B_FILE_NAME_LENGTH) {
5171 					return B_NAME_TOO_LONG;
5172 				}
5173 
5174 				inc_vnode_ref_count(directory);
5175 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5176 					kernel, &vnode, NULL);
5177 				if (status != B_OK)
5178 					return status;
5179 
5180 				putter.SetTo(vnode);
5181 			}
5182 
5183 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5184 				put_vnode(vnode);
5185 				return B_LINK_LIMIT;
5186 			}
5187 
5188 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5189 			// on success keep the vnode reference for the FD
5190 			if (fd >= 0)
5191 				putter.Detach();
5192 
5193 			return fd;
5194 		}
5195 
5196 		// it doesn't exist yet -- try to create it
5197 
5198 		if (!HAS_FS_CALL(directory, create))
5199 			return B_READ_ONLY_DEVICE;
5200 
5201 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5202 			&cookie, &newID);
5203 		if (status != B_OK
5204 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5205 			return status;
5206 		}
5207 	}
5208 
5209 	if (status != B_OK)
5210 		return status;
5211 
5212 	// the node has been created successfully
5213 
5214 	rw_lock_read_lock(&sVnodeLock);
5215 	vnode = lookup_vnode(directory->device, newID);
5216 	rw_lock_read_unlock(&sVnodeLock);
5217 
5218 	if (vnode == NULL) {
5219 		panic("vfs: fs_create() returned success but there is no vnode, "
5220 			"mount ID %ld!\n", directory->device);
5221 		return B_BAD_VALUE;
5222 	}
5223 
5224 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5225 	if (fd >= 0)
5226 		return fd;
5227 
5228 	status = fd;
5229 
5230 	// something went wrong, clean up
5231 
5232 	FS_CALL(vnode, close, cookie);
5233 	FS_CALL(vnode, free_cookie, cookie);
5234 	put_vnode(vnode);
5235 
5236 	FS_CALL(directory, unlink, name);
5237 
5238 	return status;
5239 }
5240 
5241 
5242 /*! Calls fs open_dir() on the given vnode and returns a new
5243 	file descriptor for it
5244 */
5245 static int
5246 open_dir_vnode(struct vnode* vnode, bool kernel)
5247 {
5248 	void* cookie;
5249 	int status;
5250 
5251 	status = FS_CALL(vnode, open_dir, &cookie);
5252 	if (status != B_OK)
5253 		return status;
5254 
5255 	// directory is opened, create a fd
5256 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5257 	if (status >= 0)
5258 		return status;
5259 
5260 	FS_CALL(vnode, close_dir, cookie);
5261 	FS_CALL(vnode, free_dir_cookie, cookie);
5262 
5263 	return status;
5264 }
5265 
5266 
5267 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5268 	file descriptor for it.
5269 	Used by attr_dir_open(), and attr_dir_open_fd().
5270 */
5271 static int
5272 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5273 {
5274 	void* cookie;
5275 	int status;
5276 
5277 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5278 		return B_NOT_SUPPORTED;
5279 
5280 	status = FS_CALL(vnode, open_attr_dir, &cookie);
5281 	if (status != B_OK)
5282 		return status;
5283 
5284 	// directory is opened, create a fd
5285 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5286 	if (status >= 0)
5287 		return status;
5288 
5289 	FS_CALL(vnode, close_attr_dir, cookie);
5290 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5291 
5292 	return status;
5293 }
5294 
5295 
5296 static int
5297 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5298 	int openMode, int perms, bool kernel)
5299 {
5300 	struct vnode* directory;
5301 	int status;
5302 
5303 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5304 		"kernel %d\n", name, openMode, perms, kernel));
5305 
5306 	// get directory to put the new file in
5307 	status = get_vnode(mountID, directoryID, &directory, true, false);
5308 	if (status != B_OK)
5309 		return status;
5310 
5311 	status = create_vnode(directory, name, openMode, perms, kernel);
5312 	put_vnode(directory);
5313 
5314 	return status;
5315 }
5316 
5317 
5318 static int
5319 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5320 {
5321 	char name[B_FILE_NAME_LENGTH];
5322 	struct vnode* directory;
5323 	int status;
5324 
5325 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5326 		openMode, perms, kernel));
5327 
5328 	// get directory to put the new file in
5329 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5330 	if (status < 0)
5331 		return status;
5332 
5333 	status = create_vnode(directory, name, openMode, perms, kernel);
5334 
5335 	put_vnode(directory);
5336 	return status;
5337 }
5338 
5339 
5340 static int
5341 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5342 	int openMode, bool kernel)
5343 {
5344 	if (name == NULL || *name == '\0')
5345 		return B_BAD_VALUE;
5346 
5347 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5348 		mountID, directoryID, name, openMode));
5349 
5350 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5351 
5352 	// get the vnode matching the entry_ref
5353 	struct vnode* vnode;
5354 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5355 		kernel, &vnode);
5356 	if (status != B_OK)
5357 		return status;
5358 
5359 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5360 		put_vnode(vnode);
5361 		return B_LINK_LIMIT;
5362 	}
5363 
5364 	int fd = open_vnode(vnode, openMode, kernel);
5365 	if (fd < 0)
5366 		put_vnode(vnode);
5367 
5368 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID,
5369 		vnode->id, name);
5370 	return fd;
5371 }
5372 
5373 
5374 static int
5375 file_open(int fd, char* path, int openMode, bool kernel)
5376 {
5377 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5378 
5379 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5380 		fd, path, openMode, kernel));
5381 
5382 	// get the vnode matching the vnode + path combination
5383 	struct vnode* vnode;
5384 	ino_t parentID;
5385 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5386 		&parentID, kernel);
5387 	if (status != B_OK)
5388 		return status;
5389 
5390 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5391 		put_vnode(vnode);
5392 		return B_LINK_LIMIT;
5393 	}
5394 
5395 	// open the vnode
5396 	int newFD = open_vnode(vnode, openMode, kernel);
5397 	// put only on error -- otherwise our reference was transferred to the FD
5398 	if (newFD < 0)
5399 		put_vnode(vnode);
5400 
5401 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5402 		vnode->device, parentID, vnode->id, NULL);
5403 
5404 	return newFD;
5405 }
5406 
5407 
5408 static status_t
5409 file_close(struct file_descriptor* descriptor)
5410 {
5411 	struct vnode* vnode = descriptor->u.vnode;
5412 	status_t status = B_OK;
5413 
5414 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5415 
5416 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5417 		vnode->id);
5418 	if (HAS_FS_CALL(vnode, close)) {
5419 		status = FS_CALL(vnode, close, descriptor->cookie);
5420 	}
5421 
5422 	if (status == B_OK) {
5423 		// remove all outstanding locks for this team
5424 		release_advisory_lock(vnode, NULL);
5425 	}
5426 	return status;
5427 }
5428 
5429 
5430 static void
5431 file_free_fd(struct file_descriptor* descriptor)
5432 {
5433 	struct vnode* vnode = descriptor->u.vnode;
5434 
5435 	if (vnode != NULL) {
5436 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5437 		put_vnode(vnode);
5438 	}
5439 }
5440 
5441 
5442 static status_t
5443 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5444 	size_t* length)
5445 {
5446 	struct vnode* vnode = descriptor->u.vnode;
5447 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5448 		*length));
5449 
5450 	if (S_ISDIR(vnode->Type()))
5451 		return B_IS_A_DIRECTORY;
5452 
5453 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5454 }
5455 
5456 
5457 static status_t
5458 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5459 	size_t* length)
5460 {
5461 	struct vnode* vnode = descriptor->u.vnode;
5462 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5463 
5464 	if (S_ISDIR(vnode->Type()))
5465 		return B_IS_A_DIRECTORY;
5466 	if (!HAS_FS_CALL(vnode, write))
5467 		return B_READ_ONLY_DEVICE;
5468 
5469 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5470 }
5471 
5472 
5473 static off_t
5474 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5475 {
5476 	struct vnode* vnode = descriptor->u.vnode;
5477 	off_t offset;
5478 
5479 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5480 
5481 	// some kinds of files are not seekable
5482 	switch (vnode->Type() & S_IFMT) {
5483 		case S_IFIFO:
5484 		case S_IFSOCK:
5485 			return ESPIPE;
5486 
5487 		// The Open Group Base Specs don't mention any file types besides pipes,
5488 		// fifos, and sockets specially, so we allow seeking them.
5489 		case S_IFREG:
5490 		case S_IFBLK:
5491 		case S_IFDIR:
5492 		case S_IFLNK:
5493 		case S_IFCHR:
5494 			break;
5495 	}
5496 
5497 	switch (seekType) {
5498 		case SEEK_SET:
5499 			offset = 0;
5500 			break;
5501 		case SEEK_CUR:
5502 			offset = descriptor->pos;
5503 			break;
5504 		case SEEK_END:
5505 		{
5506 			// stat() the node
5507 			if (!HAS_FS_CALL(vnode, read_stat))
5508 				return B_NOT_SUPPORTED;
5509 
5510 			struct stat stat;
5511 			status_t status = FS_CALL(vnode, read_stat, &stat);
5512 			if (status != B_OK)
5513 				return status;
5514 
5515 			offset = stat.st_size;
5516 			break;
5517 		}
5518 		default:
5519 			return B_BAD_VALUE;
5520 	}
5521 
5522 	// assumes off_t is 64 bits wide
5523 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5524 		return B_BUFFER_OVERFLOW;
5525 
5526 	pos += offset;
5527 	if (pos < 0)
5528 		return B_BAD_VALUE;
5529 
5530 	return descriptor->pos = pos;
5531 }
5532 
5533 
5534 static status_t
5535 file_select(struct file_descriptor* descriptor, uint8 event,
5536 	struct selectsync* sync)
5537 {
5538 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5539 
5540 	struct vnode* vnode = descriptor->u.vnode;
5541 
5542 	// If the FS has no select() hook, notify select() now.
5543 	if (!HAS_FS_CALL(vnode, select))
5544 		return notify_select_event(sync, event);
5545 
5546 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5547 }
5548 
5549 
5550 static status_t
5551 file_deselect(struct file_descriptor* descriptor, uint8 event,
5552 	struct selectsync* sync)
5553 {
5554 	struct vnode* vnode = descriptor->u.vnode;
5555 
5556 	if (!HAS_FS_CALL(vnode, deselect))
5557 		return B_OK;
5558 
5559 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5560 }
5561 
5562 
5563 static status_t
5564 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5565 	bool kernel)
5566 {
5567 	struct vnode* vnode;
5568 	status_t status;
5569 
5570 	if (name == NULL || *name == '\0')
5571 		return B_BAD_VALUE;
5572 
5573 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5574 		"perms = %d)\n", mountID, parentID, name, perms));
5575 
5576 	status = get_vnode(mountID, parentID, &vnode, true, false);
5577 	if (status != B_OK)
5578 		return status;
5579 
5580 	if (HAS_FS_CALL(vnode, create_dir))
5581 		status = FS_CALL(vnode, create_dir, name, perms);
5582 	else
5583 		status = B_READ_ONLY_DEVICE;
5584 
5585 	put_vnode(vnode);
5586 	return status;
5587 }
5588 
5589 
5590 static status_t
5591 dir_create(int fd, char* path, int perms, bool kernel)
5592 {
5593 	char filename[B_FILE_NAME_LENGTH];
5594 	struct vnode* vnode;
5595 	status_t status;
5596 
5597 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5598 		kernel));
5599 
5600 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5601 	if (status < 0)
5602 		return status;
5603 
5604 	if (HAS_FS_CALL(vnode, create_dir)) {
5605 		status = FS_CALL(vnode, create_dir, filename, perms);
5606 	} else
5607 		status = B_READ_ONLY_DEVICE;
5608 
5609 	put_vnode(vnode);
5610 	return status;
5611 }
5612 
5613 
5614 static int
5615 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5616 {
5617 	struct vnode* vnode;
5618 	int status;
5619 
5620 	FUNCTION(("dir_open_entry_ref()\n"));
5621 
5622 	if (name && *name == '\0')
5623 		return B_BAD_VALUE;
5624 
5625 	// get the vnode matching the entry_ref/node_ref
5626 	if (name) {
5627 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5628 			&vnode);
5629 	} else
5630 		status = get_vnode(mountID, parentID, &vnode, true, false);
5631 	if (status != B_OK)
5632 		return status;
5633 
5634 	int fd = open_dir_vnode(vnode, kernel);
5635 	if (fd < 0)
5636 		put_vnode(vnode);
5637 
5638 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5639 		vnode->id, name);
5640 	return fd;
5641 }
5642 
5643 
5644 static int
5645 dir_open(int fd, char* path, bool kernel)
5646 {
5647 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5648 		kernel));
5649 
5650 	// get the vnode matching the vnode + path combination
5651 	struct vnode* vnode = NULL;
5652 	ino_t parentID;
5653 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5654 		kernel);
5655 	if (status != B_OK)
5656 		return status;
5657 
5658 	// open the dir
5659 	int newFD = open_dir_vnode(vnode, kernel);
5660 	if (newFD < 0)
5661 		put_vnode(vnode);
5662 
5663 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID,
5664 		vnode->id, NULL);
5665 	return newFD;
5666 }
5667 
5668 
5669 static status_t
5670 dir_close(struct file_descriptor* descriptor)
5671 {
5672 	struct vnode* vnode = descriptor->u.vnode;
5673 
5674 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5675 
5676 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5677 		vnode->id);
5678 	if (HAS_FS_CALL(vnode, close_dir))
5679 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5680 
5681 	return B_OK;
5682 }
5683 
5684 
5685 static void
5686 dir_free_fd(struct file_descriptor* descriptor)
5687 {
5688 	struct vnode* vnode = descriptor->u.vnode;
5689 
5690 	if (vnode != NULL) {
5691 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5692 		put_vnode(vnode);
5693 	}
5694 }
5695 
5696 
5697 static status_t
5698 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5699 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5700 {
5701 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5702 		bufferSize, _count);
5703 }
5704 
5705 
5706 static status_t
5707 fix_dirent(struct vnode* parent, struct dirent* entry,
5708 	struct io_context* ioContext)
5709 {
5710 	// set d_pdev and d_pino
5711 	entry->d_pdev = parent->device;
5712 	entry->d_pino = parent->id;
5713 
5714 	// If this is the ".." entry and the directory is the root of a FS,
5715 	// we need to replace d_dev and d_ino with the actual values.
5716 	if (strcmp(entry->d_name, "..") == 0
5717 		&& parent->mount->root_vnode == parent
5718 		&& parent->mount->covers_vnode) {
5719 		inc_vnode_ref_count(parent);
5720 			// vnode_path_to_vnode() puts the node
5721 
5722 		// Make sure the IO context root is not bypassed.
5723 		if (parent == ioContext->root) {
5724 			entry->d_dev = parent->device;
5725 			entry->d_ino = parent->id;
5726 		} else {
5727 			// ".." is guaranteed not to be clobbered by this call
5728 			struct vnode* vnode;
5729 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5730 				ioContext, &vnode, NULL);
5731 
5732 			if (status == B_OK) {
5733 				entry->d_dev = vnode->device;
5734 				entry->d_ino = vnode->id;
5735 			}
5736 		}
5737 	} else {
5738 		// resolve mount points
5739 		ReadLocker _(&sVnodeLock);
5740 
5741 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5742 		if (vnode != NULL) {
5743 			if (vnode->covered_by != NULL) {
5744 				entry->d_dev = vnode->covered_by->device;
5745 				entry->d_ino = vnode->covered_by->id;
5746 			}
5747 		}
5748 	}
5749 
5750 	return B_OK;
5751 }
5752 
5753 
5754 static status_t
5755 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5756 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5757 {
5758 	if (!HAS_FS_CALL(vnode, read_dir))
5759 		return B_NOT_SUPPORTED;
5760 
5761 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5762 		_count);
5763 	if (error != B_OK)
5764 		return error;
5765 
5766 	// we need to adjust the read dirents
5767 	uint32 count = *_count;
5768 	for (uint32 i = 0; i < count; i++) {
5769 		error = fix_dirent(vnode, buffer, ioContext);
5770 		if (error != B_OK)
5771 			return error;
5772 
5773 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5774 	}
5775 
5776 	return error;
5777 }
5778 
5779 
5780 static status_t
5781 dir_rewind(struct file_descriptor* descriptor)
5782 {
5783 	struct vnode* vnode = descriptor->u.vnode;
5784 
5785 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5786 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5787 	}
5788 
5789 	return B_NOT_SUPPORTED;
5790 }
5791 
5792 
5793 static status_t
5794 dir_remove(int fd, char* path, bool kernel)
5795 {
5796 	char name[B_FILE_NAME_LENGTH];
5797 	struct vnode* directory;
5798 	status_t status;
5799 
5800 	if (path != NULL) {
5801 		// we need to make sure our path name doesn't stop with "/", ".",
5802 		// or ".."
5803 		char* lastSlash = strrchr(path, '/');
5804 		if (lastSlash != NULL) {
5805 			char* leaf = lastSlash + 1;
5806 			if (!strcmp(leaf, ".."))
5807 				return B_NOT_ALLOWED;
5808 
5809 			// omit multiple slashes
5810 			while (lastSlash > path && lastSlash[-1] == '/') {
5811 				lastSlash--;
5812 			}
5813 
5814 			if (!leaf[0]
5815 				|| !strcmp(leaf, ".")) {
5816 				// "name/" -> "name", or "name/." -> "name"
5817 				lastSlash[0] = '\0';
5818 			}
5819 		}
5820 
5821 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5822 			return B_NOT_ALLOWED;
5823 	}
5824 
5825 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5826 	if (status != B_OK)
5827 		return status;
5828 
5829 	if (HAS_FS_CALL(directory, remove_dir))
5830 		status = FS_CALL(directory, remove_dir, name);
5831 	else
5832 		status = B_READ_ONLY_DEVICE;
5833 
5834 	put_vnode(directory);
5835 	return status;
5836 }
5837 
5838 
5839 static status_t
5840 common_ioctl(struct file_descriptor* descriptor, uint32 op, void* buffer,
5841 	size_t length)
5842 {
5843 	struct vnode* vnode = descriptor->u.vnode;
5844 
5845 	if (HAS_FS_CALL(vnode, ioctl))
5846 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5847 
5848 	return B_NOT_SUPPORTED;
5849 }
5850 
5851 
5852 static status_t
5853 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5854 {
5855 	struct flock flock;
5856 
5857 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5858 		fd, op, argument, kernel ? "kernel" : "user"));
5859 
5860 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5861 		fd);
5862 	if (descriptor == NULL)
5863 		return B_FILE_ERROR;
5864 
5865 	struct vnode* vnode = fd_vnode(descriptor);
5866 
5867 	status_t status = B_OK;
5868 
5869 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5870 		if (descriptor->type != FDTYPE_FILE)
5871 			status = B_BAD_VALUE;
5872 		else if (user_memcpy(&flock, (struct flock*)argument,
5873 				sizeof(struct flock)) != B_OK)
5874 			status = B_BAD_ADDRESS;
5875 
5876 		if (status != B_OK) {
5877 			put_fd(descriptor);
5878 			return status;
5879 		}
5880 	}
5881 
5882 	switch (op) {
5883 		case F_SETFD:
5884 		{
5885 			struct io_context* context = get_current_io_context(kernel);
5886 			// Set file descriptor flags
5887 
5888 			// O_CLOEXEC is the only flag available at this time
5889 			mutex_lock(&context->io_mutex);
5890 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5891 			mutex_unlock(&context->io_mutex);
5892 
5893 			status = B_OK;
5894 			break;
5895 		}
5896 
5897 		case F_GETFD:
5898 		{
5899 			struct io_context* context = get_current_io_context(kernel);
5900 
5901 			// Get file descriptor flags
5902 			mutex_lock(&context->io_mutex);
5903 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5904 			mutex_unlock(&context->io_mutex);
5905 			break;
5906 		}
5907 
5908 		case F_SETFL:
5909 			// Set file descriptor open mode
5910 
5911 			// we only accept changes to O_APPEND and O_NONBLOCK
5912 			argument &= O_APPEND | O_NONBLOCK;
5913 			if (descriptor->ops->fd_set_flags != NULL) {
5914 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5915 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5916 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5917 					(int)argument);
5918 			} else
5919 				status = B_NOT_SUPPORTED;
5920 
5921 			if (status == B_OK) {
5922 				// update this descriptor's open_mode field
5923 				descriptor->open_mode = (descriptor->open_mode
5924 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5925 			}
5926 
5927 			break;
5928 
5929 		case F_GETFL:
5930 			// Get file descriptor open mode
5931 			status = descriptor->open_mode;
5932 			break;
5933 
5934 		case F_DUPFD:
5935 		{
5936 			struct io_context* context = get_current_io_context(kernel);
5937 
5938 			status = new_fd_etc(context, descriptor, (int)argument);
5939 			if (status >= 0) {
5940 				mutex_lock(&context->io_mutex);
5941 				fd_set_close_on_exec(context, fd, false);
5942 				mutex_unlock(&context->io_mutex);
5943 
5944 				atomic_add(&descriptor->ref_count, 1);
5945 			}
5946 			break;
5947 		}
5948 
5949 		case F_GETLK:
5950 			if (vnode != NULL) {
5951 				status = get_advisory_lock(vnode, &flock);
5952 				if (status == B_OK) {
5953 					// copy back flock structure
5954 					status = user_memcpy((struct flock*)argument, &flock,
5955 						sizeof(struct flock));
5956 				}
5957 			} else
5958 				status = B_BAD_VALUE;
5959 			break;
5960 
5961 		case F_SETLK:
5962 		case F_SETLKW:
5963 			status = normalize_flock(descriptor, &flock);
5964 			if (status != B_OK)
5965 				break;
5966 
5967 			if (vnode == NULL) {
5968 				status = B_BAD_VALUE;
5969 			} else if (flock.l_type == F_UNLCK) {
5970 				status = release_advisory_lock(vnode, &flock);
5971 			} else {
5972 				// the open mode must match the lock type
5973 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5974 						&& flock.l_type == F_WRLCK)
5975 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5976 						&& flock.l_type == F_RDLCK))
5977 					status = B_FILE_ERROR;
5978 				else {
5979 					status = acquire_advisory_lock(vnode, -1,
5980 						&flock, op == F_SETLKW);
5981 				}
5982 			}
5983 			break;
5984 
5985 		// ToDo: add support for more ops?
5986 
5987 		default:
5988 			status = B_BAD_VALUE;
5989 	}
5990 
5991 	put_fd(descriptor);
5992 	return status;
5993 }
5994 
5995 
5996 static status_t
5997 common_sync(int fd, bool kernel)
5998 {
5999 	struct file_descriptor* descriptor;
6000 	struct vnode* vnode;
6001 	status_t status;
6002 
6003 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6004 
6005 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6006 	if (descriptor == NULL)
6007 		return B_FILE_ERROR;
6008 
6009 	if (HAS_FS_CALL(vnode, fsync))
6010 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6011 	else
6012 		status = B_NOT_SUPPORTED;
6013 
6014 	put_fd(descriptor);
6015 	return status;
6016 }
6017 
6018 
6019 static status_t
6020 common_lock_node(int fd, bool kernel)
6021 {
6022 	struct file_descriptor* descriptor;
6023 	struct vnode* vnode;
6024 
6025 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6026 	if (descriptor == NULL)
6027 		return B_FILE_ERROR;
6028 
6029 	status_t status = B_OK;
6030 
6031 	// We need to set the locking atomically - someone
6032 	// else might set one at the same time
6033 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6034 			(file_descriptor*)NULL) != NULL)
6035 		status = B_BUSY;
6036 
6037 	put_fd(descriptor);
6038 	return status;
6039 }
6040 
6041 
6042 static status_t
6043 common_unlock_node(int fd, bool kernel)
6044 {
6045 	struct file_descriptor* descriptor;
6046 	struct vnode* vnode;
6047 
6048 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6049 	if (descriptor == NULL)
6050 		return B_FILE_ERROR;
6051 
6052 	status_t status = B_OK;
6053 
6054 	// We need to set the locking atomically - someone
6055 	// else might set one at the same time
6056 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6057 			(file_descriptor*)NULL, descriptor) != descriptor)
6058 		status = B_BAD_VALUE;
6059 
6060 	put_fd(descriptor);
6061 	return status;
6062 }
6063 
6064 
6065 static status_t
6066 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6067 	bool kernel)
6068 {
6069 	struct vnode* vnode;
6070 	status_t status;
6071 
6072 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6073 	if (status != B_OK)
6074 		return status;
6075 
6076 	if (HAS_FS_CALL(vnode, read_symlink)) {
6077 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6078 	} else
6079 		status = B_BAD_VALUE;
6080 
6081 	put_vnode(vnode);
6082 	return status;
6083 }
6084 
6085 
6086 static status_t
6087 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6088 	bool kernel)
6089 {
6090 	// path validity checks have to be in the calling function!
6091 	char name[B_FILE_NAME_LENGTH];
6092 	struct vnode* vnode;
6093 	status_t status;
6094 
6095 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6096 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6097 
6098 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6099 	if (status != B_OK)
6100 		return status;
6101 
6102 	if (HAS_FS_CALL(vnode, create_symlink))
6103 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6104 	else {
6105 		status = HAS_FS_CALL(vnode, write)
6106 			? B_NOT_SUPPORTED : B_READ_ONLY_DEVICE;
6107 	}
6108 
6109 	put_vnode(vnode);
6110 
6111 	return status;
6112 }
6113 
6114 
6115 static status_t
6116 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6117 	bool traverseLeafLink, bool kernel)
6118 {
6119 	// path validity checks have to be in the calling function!
6120 
6121 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6122 		toPath, kernel));
6123 
6124 	char name[B_FILE_NAME_LENGTH];
6125 	struct vnode* directory;
6126 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6127 		kernel);
6128 	if (status != B_OK)
6129 		return status;
6130 
6131 	struct vnode* vnode;
6132 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6133 		kernel);
6134 	if (status != B_OK)
6135 		goto err;
6136 
6137 	if (directory->mount != vnode->mount) {
6138 		status = B_CROSS_DEVICE_LINK;
6139 		goto err1;
6140 	}
6141 
6142 	if (HAS_FS_CALL(directory, link))
6143 		status = FS_CALL(directory, link, name, vnode);
6144 	else
6145 		status = B_READ_ONLY_DEVICE;
6146 
6147 err1:
6148 	put_vnode(vnode);
6149 err:
6150 	put_vnode(directory);
6151 
6152 	return status;
6153 }
6154 
6155 
6156 static status_t
6157 common_unlink(int fd, char* path, bool kernel)
6158 {
6159 	char filename[B_FILE_NAME_LENGTH];
6160 	struct vnode* vnode;
6161 	status_t status;
6162 
6163 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6164 		kernel));
6165 
6166 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6167 	if (status < 0)
6168 		return status;
6169 
6170 	if (HAS_FS_CALL(vnode, unlink))
6171 		status = FS_CALL(vnode, unlink, filename);
6172 	else
6173 		status = B_READ_ONLY_DEVICE;
6174 
6175 	put_vnode(vnode);
6176 
6177 	return status;
6178 }
6179 
6180 
6181 static status_t
6182 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6183 {
6184 	struct vnode* vnode;
6185 	status_t status;
6186 
6187 	// TODO: honor effectiveUserGroup argument
6188 
6189 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6190 	if (status != B_OK)
6191 		return status;
6192 
6193 	if (HAS_FS_CALL(vnode, access))
6194 		status = FS_CALL(vnode, access, mode);
6195 	else
6196 		status = B_OK;
6197 
6198 	put_vnode(vnode);
6199 
6200 	return status;
6201 }
6202 
6203 
6204 static status_t
6205 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6206 {
6207 	struct vnode* fromVnode;
6208 	struct vnode* toVnode;
6209 	char fromName[B_FILE_NAME_LENGTH];
6210 	char toName[B_FILE_NAME_LENGTH];
6211 	status_t status;
6212 
6213 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6214 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6215 
6216 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6217 	if (status != B_OK)
6218 		return status;
6219 
6220 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6221 	if (status != B_OK)
6222 		goto err1;
6223 
6224 	if (fromVnode->device != toVnode->device) {
6225 		status = B_CROSS_DEVICE_LINK;
6226 		goto err2;
6227 	}
6228 
6229 	if (fromName[0] == '\0' || toName == '\0'
6230 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6231 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6232 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6233 		status = B_BAD_VALUE;
6234 		goto err2;
6235 	}
6236 
6237 	if (HAS_FS_CALL(fromVnode, rename))
6238 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6239 	else
6240 		status = B_READ_ONLY_DEVICE;
6241 
6242 err2:
6243 	put_vnode(toVnode);
6244 err1:
6245 	put_vnode(fromVnode);
6246 
6247 	return status;
6248 }
6249 
6250 
6251 static status_t
6252 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6253 {
6254 	struct vnode* vnode = descriptor->u.vnode;
6255 
6256 	FUNCTION(("common_read_stat: stat %p\n", stat));
6257 
6258 	// TODO: remove this once all file systems properly set them!
6259 	stat->st_crtim.tv_nsec = 0;
6260 	stat->st_ctim.tv_nsec = 0;
6261 	stat->st_mtim.tv_nsec = 0;
6262 	stat->st_atim.tv_nsec = 0;
6263 
6264 	status_t status = FS_CALL(vnode, read_stat, stat);
6265 
6266 	// fill in the st_dev and st_ino fields
6267 	if (status == B_OK) {
6268 		stat->st_dev = vnode->device;
6269 		stat->st_ino = vnode->id;
6270 		stat->st_rdev = -1;
6271 	}
6272 
6273 	return status;
6274 }
6275 
6276 
6277 static status_t
6278 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6279 	int statMask)
6280 {
6281 	struct vnode* vnode = descriptor->u.vnode;
6282 
6283 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6284 		vnode, stat, statMask));
6285 
6286 	if (!HAS_FS_CALL(vnode, write_stat))
6287 		return B_READ_ONLY_DEVICE;
6288 
6289 	return FS_CALL(vnode, write_stat, stat, statMask);
6290 }
6291 
6292 
6293 static status_t
6294 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6295 	struct stat* stat, bool kernel)
6296 {
6297 	struct vnode* vnode;
6298 	status_t status;
6299 
6300 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6301 		stat));
6302 
6303 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6304 		kernel);
6305 	if (status < 0)
6306 		return status;
6307 
6308 	status = FS_CALL(vnode, read_stat, stat);
6309 
6310 	// fill in the st_dev and st_ino fields
6311 	if (status == B_OK) {
6312 		stat->st_dev = vnode->device;
6313 		stat->st_ino = vnode->id;
6314 		stat->st_rdev = -1;
6315 	}
6316 
6317 	put_vnode(vnode);
6318 	return status;
6319 }
6320 
6321 
6322 static status_t
6323 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6324 	const struct stat* stat, int statMask, bool kernel)
6325 {
6326 	struct vnode* vnode;
6327 	status_t status;
6328 
6329 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6330 		"kernel %d\n", fd, path, stat, statMask, kernel));
6331 
6332 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6333 		kernel);
6334 	if (status < 0)
6335 		return status;
6336 
6337 	if (HAS_FS_CALL(vnode, write_stat))
6338 		status = FS_CALL(vnode, write_stat, stat, statMask);
6339 	else
6340 		status = B_READ_ONLY_DEVICE;
6341 
6342 	put_vnode(vnode);
6343 
6344 	return status;
6345 }
6346 
6347 
6348 static int
6349 attr_dir_open(int fd, char* path, bool kernel)
6350 {
6351 	struct vnode* vnode;
6352 	int status;
6353 
6354 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6355 		kernel));
6356 
6357 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6358 	if (status != B_OK)
6359 		return status;
6360 
6361 	status = open_attr_dir_vnode(vnode, kernel);
6362 	if (status < 0)
6363 		put_vnode(vnode);
6364 
6365 	return status;
6366 }
6367 
6368 
6369 static status_t
6370 attr_dir_close(struct file_descriptor* descriptor)
6371 {
6372 	struct vnode* vnode = descriptor->u.vnode;
6373 
6374 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6375 
6376 	if (HAS_FS_CALL(vnode, close_attr_dir))
6377 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6378 
6379 	return B_OK;
6380 }
6381 
6382 
6383 static void
6384 attr_dir_free_fd(struct file_descriptor* descriptor)
6385 {
6386 	struct vnode* vnode = descriptor->u.vnode;
6387 
6388 	if (vnode != NULL) {
6389 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6390 		put_vnode(vnode);
6391 	}
6392 }
6393 
6394 
6395 static status_t
6396 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6397 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6398 {
6399 	struct vnode* vnode = descriptor->u.vnode;
6400 
6401 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6402 
6403 	if (HAS_FS_CALL(vnode, read_attr_dir))
6404 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6405 			bufferSize, _count);
6406 
6407 	return B_NOT_SUPPORTED;
6408 }
6409 
6410 
6411 static status_t
6412 attr_dir_rewind(struct file_descriptor* descriptor)
6413 {
6414 	struct vnode* vnode = descriptor->u.vnode;
6415 
6416 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6417 
6418 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6419 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6420 
6421 	return B_NOT_SUPPORTED;
6422 }
6423 
6424 
6425 static int
6426 attr_create(int fd, char* path, const char* name, uint32 type,
6427 	int openMode, bool kernel)
6428 {
6429 	if (name == NULL || *name == '\0')
6430 		return B_BAD_VALUE;
6431 
6432 	struct vnode* vnode;
6433 	status_t status = fd_and_path_to_vnode(fd, path,
6434 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6435 	if (status != B_OK)
6436 		return status;
6437 
6438 	if (!HAS_FS_CALL(vnode, create_attr)) {
6439 		status = B_READ_ONLY_DEVICE;
6440 		goto err;
6441 	}
6442 
6443 	void* cookie;
6444 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6445 	if (status != B_OK)
6446 		goto err;
6447 
6448 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6449 	if (fd >= 0)
6450 		return fd;
6451 
6452 	status = fd;
6453 
6454 	FS_CALL(vnode, close_attr, cookie);
6455 	FS_CALL(vnode, free_attr_cookie, cookie);
6456 
6457 	FS_CALL(vnode, remove_attr, name);
6458 
6459 err:
6460 	put_vnode(vnode);
6461 
6462 	return status;
6463 }
6464 
6465 
6466 static int
6467 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6468 {
6469 	if (name == NULL || *name == '\0')
6470 		return B_BAD_VALUE;
6471 
6472 	struct vnode* vnode;
6473 	status_t status = fd_and_path_to_vnode(fd, path,
6474 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6475 	if (status != B_OK)
6476 		return status;
6477 
6478 	if (!HAS_FS_CALL(vnode, open_attr)) {
6479 		status = B_NOT_SUPPORTED;
6480 		goto err;
6481 	}
6482 
6483 	void* cookie;
6484 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6485 	if (status != B_OK)
6486 		goto err;
6487 
6488 	// now we only need a file descriptor for this attribute and we're done
6489 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6490 	if (fd >= 0)
6491 		return fd;
6492 
6493 	status = fd;
6494 
6495 	FS_CALL(vnode, close_attr, cookie);
6496 	FS_CALL(vnode, free_attr_cookie, cookie);
6497 
6498 err:
6499 	put_vnode(vnode);
6500 
6501 	return status;
6502 }
6503 
6504 
6505 static status_t
6506 attr_close(struct file_descriptor* descriptor)
6507 {
6508 	struct vnode* vnode = descriptor->u.vnode;
6509 
6510 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6511 
6512 	if (HAS_FS_CALL(vnode, close_attr))
6513 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6514 
6515 	return B_OK;
6516 }
6517 
6518 
6519 static void
6520 attr_free_fd(struct file_descriptor* descriptor)
6521 {
6522 	struct vnode* vnode = descriptor->u.vnode;
6523 
6524 	if (vnode != NULL) {
6525 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6526 		put_vnode(vnode);
6527 	}
6528 }
6529 
6530 
6531 static status_t
6532 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6533 	size_t* length)
6534 {
6535 	struct vnode* vnode = descriptor->u.vnode;
6536 
6537 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6538 		*length));
6539 
6540 	if (!HAS_FS_CALL(vnode, read_attr))
6541 		return B_NOT_SUPPORTED;
6542 
6543 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6544 }
6545 
6546 
6547 static status_t
6548 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6549 	size_t* length)
6550 {
6551 	struct vnode* vnode = descriptor->u.vnode;
6552 
6553 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6554 	if (!HAS_FS_CALL(vnode, write_attr))
6555 		return B_NOT_SUPPORTED;
6556 
6557 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6558 }
6559 
6560 
6561 static off_t
6562 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6563 {
6564 	off_t offset;
6565 
6566 	switch (seekType) {
6567 		case SEEK_SET:
6568 			offset = 0;
6569 			break;
6570 		case SEEK_CUR:
6571 			offset = descriptor->pos;
6572 			break;
6573 		case SEEK_END:
6574 		{
6575 			struct vnode* vnode = descriptor->u.vnode;
6576 			if (!HAS_FS_CALL(vnode, read_stat))
6577 				return B_NOT_SUPPORTED;
6578 
6579 			struct stat stat;
6580 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6581 				&stat);
6582 			if (status != B_OK)
6583 				return status;
6584 
6585 			offset = stat.st_size;
6586 			break;
6587 		}
6588 		default:
6589 			return B_BAD_VALUE;
6590 	}
6591 
6592 	// assumes off_t is 64 bits wide
6593 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6594 		return B_BUFFER_OVERFLOW;
6595 
6596 	pos += offset;
6597 	if (pos < 0)
6598 		return B_BAD_VALUE;
6599 
6600 	return descriptor->pos = pos;
6601 }
6602 
6603 
6604 static status_t
6605 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6606 {
6607 	struct vnode* vnode = descriptor->u.vnode;
6608 
6609 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6610 
6611 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6612 		return B_NOT_SUPPORTED;
6613 
6614 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6615 }
6616 
6617 
6618 static status_t
6619 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6620 	int statMask)
6621 {
6622 	struct vnode* vnode = descriptor->u.vnode;
6623 
6624 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6625 
6626 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6627 		return B_READ_ONLY_DEVICE;
6628 
6629 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6630 }
6631 
6632 
6633 static status_t
6634 attr_remove(int fd, const char* name, bool kernel)
6635 {
6636 	struct file_descriptor* descriptor;
6637 	struct vnode* vnode;
6638 	status_t status;
6639 
6640 	if (name == NULL || *name == '\0')
6641 		return B_BAD_VALUE;
6642 
6643 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6644 		kernel));
6645 
6646 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6647 	if (descriptor == NULL)
6648 		return B_FILE_ERROR;
6649 
6650 	if (HAS_FS_CALL(vnode, remove_attr))
6651 		status = FS_CALL(vnode, remove_attr, name);
6652 	else
6653 		status = B_READ_ONLY_DEVICE;
6654 
6655 	put_fd(descriptor);
6656 
6657 	return status;
6658 }
6659 
6660 
6661 static status_t
6662 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6663 	bool kernel)
6664 {
6665 	struct file_descriptor* fromDescriptor;
6666 	struct file_descriptor* toDescriptor;
6667 	struct vnode* fromVnode;
6668 	struct vnode* toVnode;
6669 	status_t status;
6670 
6671 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6672 		|| *toName == '\0')
6673 		return B_BAD_VALUE;
6674 
6675 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6676 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6677 
6678 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6679 	if (fromDescriptor == NULL)
6680 		return B_FILE_ERROR;
6681 
6682 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6683 	if (toDescriptor == NULL) {
6684 		status = B_FILE_ERROR;
6685 		goto err;
6686 	}
6687 
6688 	// are the files on the same volume?
6689 	if (fromVnode->device != toVnode->device) {
6690 		status = B_CROSS_DEVICE_LINK;
6691 		goto err1;
6692 	}
6693 
6694 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6695 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6696 	} else
6697 		status = B_READ_ONLY_DEVICE;
6698 
6699 err1:
6700 	put_fd(toDescriptor);
6701 err:
6702 	put_fd(fromDescriptor);
6703 
6704 	return status;
6705 }
6706 
6707 
6708 static int
6709 index_dir_open(dev_t mountID, bool kernel)
6710 {
6711 	struct fs_mount* mount;
6712 	void* cookie;
6713 
6714 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6715 
6716 	status_t status = get_mount(mountID, &mount);
6717 	if (status != B_OK)
6718 		return status;
6719 
6720 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6721 		status = B_NOT_SUPPORTED;
6722 		goto error;
6723 	}
6724 
6725 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6726 	if (status != B_OK)
6727 		goto error;
6728 
6729 	// get fd for the index directory
6730 	int fd;
6731 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6732 	if (fd >= 0)
6733 		return fd;
6734 
6735 	// something went wrong
6736 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6737 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6738 
6739 	status = fd;
6740 
6741 error:
6742 	put_mount(mount);
6743 	return status;
6744 }
6745 
6746 
6747 static status_t
6748 index_dir_close(struct file_descriptor* descriptor)
6749 {
6750 	struct fs_mount* mount = descriptor->u.mount;
6751 
6752 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6753 
6754 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6755 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6756 
6757 	return B_OK;
6758 }
6759 
6760 
6761 static void
6762 index_dir_free_fd(struct file_descriptor* descriptor)
6763 {
6764 	struct fs_mount* mount = descriptor->u.mount;
6765 
6766 	if (mount != NULL) {
6767 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6768 		put_mount(mount);
6769 	}
6770 }
6771 
6772 
6773 static status_t
6774 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6775 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6776 {
6777 	struct fs_mount* mount = descriptor->u.mount;
6778 
6779 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6780 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6781 			bufferSize, _count);
6782 	}
6783 
6784 	return B_NOT_SUPPORTED;
6785 }
6786 
6787 
6788 static status_t
6789 index_dir_rewind(struct file_descriptor* descriptor)
6790 {
6791 	struct fs_mount* mount = descriptor->u.mount;
6792 
6793 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6794 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6795 
6796 	return B_NOT_SUPPORTED;
6797 }
6798 
6799 
6800 static status_t
6801 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6802 	bool kernel)
6803 {
6804 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6805 		name, kernel));
6806 
6807 	struct fs_mount* mount;
6808 	status_t status = get_mount(mountID, &mount);
6809 	if (status != B_OK)
6810 		return status;
6811 
6812 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6813 		status = B_READ_ONLY_DEVICE;
6814 		goto out;
6815 	}
6816 
6817 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6818 
6819 out:
6820 	put_mount(mount);
6821 	return status;
6822 }
6823 
6824 
6825 #if 0
6826 static status_t
6827 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6828 {
6829 	struct vnode* vnode = descriptor->u.vnode;
6830 
6831 	// ToDo: currently unused!
6832 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6833 	if (!HAS_FS_CALL(vnode, read_index_stat))
6834 		return B_NOT_SUPPORTED;
6835 
6836 	return B_NOT_SUPPORTED;
6837 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6838 }
6839 
6840 
6841 static void
6842 index_free_fd(struct file_descriptor* descriptor)
6843 {
6844 	struct vnode* vnode = descriptor->u.vnode;
6845 
6846 	if (vnode != NULL) {
6847 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6848 		put_vnode(vnode);
6849 	}
6850 }
6851 #endif
6852 
6853 
6854 static status_t
6855 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6856 	bool kernel)
6857 {
6858 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6859 		name, kernel));
6860 
6861 	struct fs_mount* mount;
6862 	status_t status = get_mount(mountID, &mount);
6863 	if (status != B_OK)
6864 		return status;
6865 
6866 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6867 		status = B_NOT_SUPPORTED;
6868 		goto out;
6869 	}
6870 
6871 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6872 
6873 out:
6874 	put_mount(mount);
6875 	return status;
6876 }
6877 
6878 
6879 static status_t
6880 index_remove(dev_t mountID, const char* name, bool kernel)
6881 {
6882 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6883 		name, kernel));
6884 
6885 	struct fs_mount* mount;
6886 	status_t status = get_mount(mountID, &mount);
6887 	if (status != B_OK)
6888 		return status;
6889 
6890 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6891 		status = B_READ_ONLY_DEVICE;
6892 		goto out;
6893 	}
6894 
6895 	status = FS_MOUNT_CALL(mount, remove_index, name);
6896 
6897 out:
6898 	put_mount(mount);
6899 	return status;
6900 }
6901 
6902 
6903 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6904 		It would be nice if the FS would find some more kernel support
6905 		for them.
6906 		For example, query parsing should be moved into the kernel.
6907 */
6908 static int
6909 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6910 	int32 token, bool kernel)
6911 {
6912 	struct fs_mount* mount;
6913 	void* cookie;
6914 
6915 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6916 		query, kernel));
6917 
6918 	status_t status = get_mount(device, &mount);
6919 	if (status != B_OK)
6920 		return status;
6921 
6922 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6923 		status = B_NOT_SUPPORTED;
6924 		goto error;
6925 	}
6926 
6927 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6928 		&cookie);
6929 	if (status != B_OK)
6930 		goto error;
6931 
6932 	// get fd for the index directory
6933 	int fd;
6934 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
6935 	if (fd >= 0)
6936 		return fd;
6937 
6938 	status = fd;
6939 
6940 	// something went wrong
6941 	FS_MOUNT_CALL(mount, close_query, cookie);
6942 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6943 
6944 error:
6945 	put_mount(mount);
6946 	return status;
6947 }
6948 
6949 
6950 static status_t
6951 query_close(struct file_descriptor* descriptor)
6952 {
6953 	struct fs_mount* mount = descriptor->u.mount;
6954 
6955 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6956 
6957 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6958 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6959 
6960 	return B_OK;
6961 }
6962 
6963 
6964 static void
6965 query_free_fd(struct file_descriptor* descriptor)
6966 {
6967 	struct fs_mount* mount = descriptor->u.mount;
6968 
6969 	if (mount != NULL) {
6970 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6971 		put_mount(mount);
6972 	}
6973 }
6974 
6975 
6976 static status_t
6977 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6978 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6979 {
6980 	struct fs_mount* mount = descriptor->u.mount;
6981 
6982 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6983 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6984 			bufferSize, _count);
6985 	}
6986 
6987 	return B_NOT_SUPPORTED;
6988 }
6989 
6990 
6991 static status_t
6992 query_rewind(struct file_descriptor* descriptor)
6993 {
6994 	struct fs_mount* mount = descriptor->u.mount;
6995 
6996 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6997 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6998 
6999 	return B_NOT_SUPPORTED;
7000 }
7001 
7002 
7003 //	#pragma mark - General File System functions
7004 
7005 
7006 static dev_t
7007 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7008 	const char* args, bool kernel)
7009 {
7010 	struct ::fs_mount* mount;
7011 	status_t status = B_OK;
7012 	fs_volume* volume = NULL;
7013 	int32 layer = 0;
7014 
7015 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
7016 
7017 	// The path is always safe, we just have to make sure that fsName is
7018 	// almost valid - we can't make any assumptions about args, though.
7019 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7020 	// We'll get it from the DDM later.
7021 	if (fsName == NULL) {
7022 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7023 			return B_BAD_VALUE;
7024 	} else if (fsName[0] == '\0')
7025 		return B_BAD_VALUE;
7026 
7027 	RecursiveLocker mountOpLocker(sMountOpLock);
7028 
7029 	// Helper to delete a newly created file device on failure.
7030 	// Not exactly beautiful, but helps to keep the code below cleaner.
7031 	struct FileDeviceDeleter {
7032 		FileDeviceDeleter() : id(-1) {}
7033 		~FileDeviceDeleter()
7034 		{
7035 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7036 		}
7037 
7038 		partition_id id;
7039 	} fileDeviceDeleter;
7040 
7041 	// If the file system is not a "virtual" one, the device argument should
7042 	// point to a real file/device (if given at all).
7043 	// get the partition
7044 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7045 	KPartition* partition = NULL;
7046 	KPath normalizedDevice;
7047 	bool newlyCreatedFileDevice = false;
7048 
7049 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7050 		// normalize the device path
7051 		status = normalizedDevice.SetTo(device, true);
7052 		if (status != B_OK)
7053 			return status;
7054 
7055 		// get a corresponding partition from the DDM
7056 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7057 		if (partition == NULL) {
7058 			// Partition not found: This either means, the user supplied
7059 			// an invalid path, or the path refers to an image file. We try
7060 			// to let the DDM create a file device for the path.
7061 			partition_id deviceID = ddm->CreateFileDevice(
7062 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7063 			if (deviceID >= 0) {
7064 				partition = ddm->RegisterPartition(deviceID);
7065 				if (newlyCreatedFileDevice)
7066 					fileDeviceDeleter.id = deviceID;
7067 			}
7068 		}
7069 
7070 		if (!partition) {
7071 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7072 				normalizedDevice.Path()));
7073 			return B_ENTRY_NOT_FOUND;
7074 		}
7075 
7076 		device = normalizedDevice.Path();
7077 			// correct path to file device
7078 	}
7079 	PartitionRegistrar partitionRegistrar(partition, true);
7080 
7081 	// Write lock the partition's device. For the time being, we keep the lock
7082 	// until we're done mounting -- not nice, but ensure, that no-one is
7083 	// interfering.
7084 	// TODO: Just mark the partition busy while mounting!
7085 	KDiskDevice* diskDevice = NULL;
7086 	if (partition) {
7087 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7088 		if (!diskDevice) {
7089 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7090 			return B_ERROR;
7091 		}
7092 	}
7093 
7094 	DeviceWriteLocker writeLocker(diskDevice, true);
7095 		// this takes over the write lock acquired before
7096 
7097 	if (partition != NULL) {
7098 		// make sure, that the partition is not busy
7099 		if (partition->IsBusy()) {
7100 			TRACE(("fs_mount(): Partition is busy.\n"));
7101 			return B_BUSY;
7102 		}
7103 
7104 		// if no FS name had been supplied, we get it from the partition
7105 		if (fsName == NULL) {
7106 			KDiskSystem* diskSystem = partition->DiskSystem();
7107 			if (!diskSystem) {
7108 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7109 					"recognize it.\n"));
7110 				return B_BAD_VALUE;
7111 			}
7112 
7113 			if (!diskSystem->IsFileSystem()) {
7114 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7115 					"partitioning system.\n"));
7116 				return B_BAD_VALUE;
7117 			}
7118 
7119 			// The disk system name will not change, and the KDiskSystem
7120 			// object will not go away while the disk device is locked (and
7121 			// the partition has a reference to it), so this is safe.
7122 			fsName = diskSystem->Name();
7123 		}
7124 	}
7125 
7126 	mount = new(std::nothrow) (struct ::fs_mount);
7127 	if (mount == NULL)
7128 		return B_NO_MEMORY;
7129 
7130 	mount->device_name = strdup(device);
7131 		// "device" can be NULL
7132 
7133 	status = mount->entry_cache.Init();
7134 	if (status != B_OK)
7135 		goto err1;
7136 
7137 	// initialize structure
7138 	mount->id = sNextMountID++;
7139 	mount->partition = NULL;
7140 	mount->root_vnode = NULL;
7141 	mount->covers_vnode = NULL;
7142 	mount->unmounting = false;
7143 	mount->owns_file_device = false;
7144 	mount->volume = NULL;
7145 
7146 	// build up the volume(s)
7147 	while (true) {
7148 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7149 		if (layerFSName == NULL) {
7150 			if (layer == 0) {
7151 				status = B_NO_MEMORY;
7152 				goto err1;
7153 			}
7154 
7155 			break;
7156 		}
7157 
7158 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7159 		if (volume == NULL) {
7160 			status = B_NO_MEMORY;
7161 			free(layerFSName);
7162 			goto err1;
7163 		}
7164 
7165 		volume->id = mount->id;
7166 		volume->partition = partition != NULL ? partition->ID() : -1;
7167 		volume->layer = layer++;
7168 		volume->private_volume = NULL;
7169 		volume->ops = NULL;
7170 		volume->sub_volume = NULL;
7171 		volume->super_volume = NULL;
7172 		volume->file_system = NULL;
7173 		volume->file_system_name = NULL;
7174 
7175 		volume->file_system_name = get_file_system_name(layerFSName);
7176 		if (volume->file_system_name == NULL) {
7177 			status = B_NO_MEMORY;
7178 			free(layerFSName);
7179 			free(volume);
7180 			goto err1;
7181 		}
7182 
7183 		volume->file_system = get_file_system(layerFSName);
7184 		if (volume->file_system == NULL) {
7185 			status = B_DEVICE_NOT_FOUND;
7186 			free(layerFSName);
7187 			free(volume->file_system_name);
7188 			free(volume);
7189 			goto err1;
7190 		}
7191 
7192 		if (mount->volume == NULL)
7193 			mount->volume = volume;
7194 		else {
7195 			volume->super_volume = mount->volume;
7196 			mount->volume->sub_volume = volume;
7197 			mount->volume = volume;
7198 		}
7199 	}
7200 
7201 	// insert mount struct into list before we call FS's mount() function
7202 	// so that vnodes can be created for this mount
7203 	mutex_lock(&sMountMutex);
7204 	hash_insert(sMountsTable, mount);
7205 	mutex_unlock(&sMountMutex);
7206 
7207 	ino_t rootID;
7208 
7209 	if (!sRoot) {
7210 		// we haven't mounted anything yet
7211 		if (strcmp(path, "/") != 0) {
7212 			status = B_ERROR;
7213 			goto err2;
7214 		}
7215 
7216 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7217 			args, &rootID);
7218 		if (status != 0)
7219 			goto err2;
7220 	} else {
7221 		status = path_to_vnode(path, true, &mount->covers_vnode, NULL, kernel);
7222 		if (status != B_OK)
7223 			goto err2;
7224 
7225 		// make sure covered_vnode is a directory
7226 		if (!S_ISDIR(mount->covers_vnode->Type())) {
7227 			status = B_NOT_A_DIRECTORY;
7228 			goto err3;
7229 		}
7230 
7231 		if (mount->covers_vnode->mount->root_vnode == mount->covers_vnode) {
7232 			// this is already a mount point
7233 			status = B_BUSY;
7234 			goto err3;
7235 		}
7236 
7237 		// mount it/them
7238 		fs_volume* volume = mount->volume;
7239 		while (volume) {
7240 			status = volume->file_system->mount(volume, device, flags, args,
7241 				&rootID);
7242 			if (status != B_OK) {
7243 				if (volume->sub_volume)
7244 					goto err4;
7245 				goto err3;
7246 			}
7247 
7248 			volume = volume->super_volume;
7249 		}
7250 
7251 		volume = mount->volume;
7252 		while (volume) {
7253 			if (volume->ops->all_layers_mounted != NULL)
7254 				volume->ops->all_layers_mounted(volume);
7255 			volume = volume->super_volume;
7256 		}
7257 	}
7258 
7259 	// the root node is supposed to be owned by the file system - it must
7260 	// exist at this point
7261 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7262 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7263 		panic("fs_mount: file system does not own its root node!\n");
7264 		status = B_ERROR;
7265 		goto err4;
7266 	}
7267 
7268 	// No race here, since fs_mount() is the only function changing
7269 	// covers_vnode (and holds sMountOpLock at that time).
7270 	rw_lock_write_lock(&sVnodeLock);
7271 	if (mount->covers_vnode)
7272 		mount->covers_vnode->covered_by = mount->root_vnode;
7273 	rw_lock_write_unlock(&sVnodeLock);
7274 
7275 	if (!sRoot) {
7276 		sRoot = mount->root_vnode;
7277 		mutex_lock(&sIOContextRootLock);
7278 		get_current_io_context(true)->root = sRoot;
7279 		mutex_unlock(&sIOContextRootLock);
7280 		inc_vnode_ref_count(sRoot);
7281 	}
7282 
7283 	// supply the partition (if any) with the mount cookie and mark it mounted
7284 	if (partition) {
7285 		partition->SetMountCookie(mount->volume->private_volume);
7286 		partition->SetVolumeID(mount->id);
7287 
7288 		// keep a partition reference as long as the partition is mounted
7289 		partitionRegistrar.Detach();
7290 		mount->partition = partition;
7291 		mount->owns_file_device = newlyCreatedFileDevice;
7292 		fileDeviceDeleter.id = -1;
7293 	}
7294 
7295 	notify_mount(mount->id,
7296 		mount->covers_vnode ? mount->covers_vnode->device : -1,
7297 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7298 
7299 	return mount->id;
7300 
7301 err4:
7302 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7303 err3:
7304 	if (mount->covers_vnode != NULL)
7305 		put_vnode(mount->covers_vnode);
7306 err2:
7307 	mutex_lock(&sMountMutex);
7308 	hash_remove(sMountsTable, mount);
7309 	mutex_unlock(&sMountMutex);
7310 err1:
7311 	delete mount;
7312 
7313 	return status;
7314 }
7315 
7316 
7317 static status_t
7318 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7319 {
7320 	struct fs_mount* mount;
7321 	status_t err;
7322 
7323 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7324 		kernel));
7325 
7326 	struct vnode* pathVnode = NULL;
7327 	if (path != NULL) {
7328 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7329 		if (err != B_OK)
7330 			return B_ENTRY_NOT_FOUND;
7331 	}
7332 
7333 	RecursiveLocker mountOpLocker(sMountOpLock);
7334 
7335 	// this lock is not strictly necessary, but here in case of KDEBUG
7336 	// to keep the ASSERT in find_mount() working.
7337 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7338 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7339 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7340 	if (mount == NULL) {
7341 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7342 			pathVnode);
7343 	}
7344 
7345 	if (path != NULL) {
7346 		put_vnode(pathVnode);
7347 
7348 		if (mount->root_vnode != pathVnode) {
7349 			// not mountpoint
7350 			return B_BAD_VALUE;
7351 		}
7352 	}
7353 
7354 	// if the volume is associated with a partition, lock the device of the
7355 	// partition as long as we are unmounting
7356 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7357 	KPartition* partition = mount->partition;
7358 	KDiskDevice* diskDevice = NULL;
7359 	if (partition != NULL) {
7360 		if (partition->Device() == NULL) {
7361 			dprintf("fs_unmount(): There is no device!\n");
7362 			return B_ERROR;
7363 		}
7364 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7365 		if (!diskDevice) {
7366 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7367 			return B_ERROR;
7368 		}
7369 	}
7370 	DeviceWriteLocker writeLocker(diskDevice, true);
7371 
7372 	// make sure, that the partition is not busy
7373 	if (partition != NULL) {
7374 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7375 			TRACE(("fs_unmount(): Partition is busy.\n"));
7376 			return B_BUSY;
7377 		}
7378 	}
7379 
7380 	// grab the vnode master mutex to keep someone from creating
7381 	// a vnode while we're figuring out if we can continue
7382 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7383 
7384 	bool disconnectedDescriptors = false;
7385 
7386 	while (true) {
7387 		bool busy = false;
7388 
7389 		// cycle through the list of vnodes associated with this mount and
7390 		// make sure all of them are not busy or have refs on them
7391 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7392 		while (struct vnode* vnode = iterator.Next()) {
7393 			// The root vnode ref_count needs to be 1 here (the mount has a
7394 			// reference).
7395 			if (vnode->IsBusy()
7396 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7397 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7398 				// there are still vnodes in use on this mount, so we cannot
7399 				// unmount yet
7400 				busy = true;
7401 				break;
7402 			}
7403 		}
7404 
7405 		if (!busy)
7406 			break;
7407 
7408 		if ((flags & B_FORCE_UNMOUNT) == 0)
7409 			return B_BUSY;
7410 
7411 		if (disconnectedDescriptors) {
7412 			// wait a bit until the last access is finished, and then try again
7413 			vnodesWriteLocker.Unlock();
7414 			snooze(100000);
7415 			// TODO: if there is some kind of bug that prevents the ref counts
7416 			// from getting back to zero, this will fall into an endless loop...
7417 			vnodesWriteLocker.Lock();
7418 			continue;
7419 		}
7420 
7421 		// the file system is still busy - but we're forced to unmount it,
7422 		// so let's disconnect all open file descriptors
7423 
7424 		mount->unmounting = true;
7425 			// prevent new vnodes from being created
7426 
7427 		vnodesWriteLocker.Unlock();
7428 
7429 		disconnect_mount_or_vnode_fds(mount, NULL);
7430 		disconnectedDescriptors = true;
7431 
7432 		vnodesWriteLocker.Lock();
7433 	}
7434 
7435 	// we can safely continue, mark all of the vnodes busy and this mount
7436 	// structure in unmounting state
7437 	mount->unmounting = true;
7438 
7439 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7440 	while (struct vnode* vnode = iterator.Next()) {
7441 		vnode->SetBusy(true);
7442 		vnode_to_be_freed(vnode);
7443 	}
7444 
7445 	// The ref_count of the root node is 1 at this point, see above why this is
7446 	mount->root_vnode->ref_count--;
7447 	vnode_to_be_freed(mount->root_vnode);
7448 
7449 	mount->covers_vnode->covered_by = NULL;
7450 
7451 	vnodesWriteLocker.Unlock();
7452 
7453 	put_vnode(mount->covers_vnode);
7454 
7455 	// Free all vnodes associated with this mount.
7456 	// They will be removed from the mount list by free_vnode(), so
7457 	// we don't have to do this.
7458 	while (struct vnode* vnode = mount->vnodes.Head())
7459 		free_vnode(vnode, false);
7460 
7461 	// remove the mount structure from the hash table
7462 	mutex_lock(&sMountMutex);
7463 	hash_remove(sMountsTable, mount);
7464 	mutex_unlock(&sMountMutex);
7465 
7466 	mountOpLocker.Unlock();
7467 
7468 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7469 	notify_unmount(mount->id);
7470 
7471 	// dereference the partition and mark it unmounted
7472 	if (partition) {
7473 		partition->SetVolumeID(-1);
7474 		partition->SetMountCookie(NULL);
7475 
7476 		if (mount->owns_file_device)
7477 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7478 		partition->Unregister();
7479 	}
7480 
7481 	delete mount;
7482 	return B_OK;
7483 }
7484 
7485 
7486 static status_t
7487 fs_sync(dev_t device)
7488 {
7489 	struct fs_mount* mount;
7490 	status_t status = get_mount(device, &mount);
7491 	if (status != B_OK)
7492 		return status;
7493 
7494 	struct vnode marker;
7495 	memset(&marker, 0, sizeof(marker));
7496 	marker.SetBusy(true);
7497 	marker.SetRemoved(true);
7498 
7499 	// First, synchronize all file caches
7500 
7501 	while (true) {
7502 		WriteLocker locker(sVnodeLock);
7503 			// Note: That's the easy way. Which is probably OK for sync(),
7504 			// since it's a relatively rare call and doesn't need to allow for
7505 			// a lot of concurrency. Using a read lock would be possible, but
7506 			// also more involved, since we had to lock the individual nodes
7507 			// and take care of the locking order, which we might not want to
7508 			// do while holding fs_mount::rlock.
7509 
7510 		// synchronize access to vnode list
7511 		recursive_lock_lock(&mount->rlock);
7512 
7513 		struct vnode* vnode;
7514 		if (!marker.IsRemoved()) {
7515 			vnode = mount->vnodes.GetNext(&marker);
7516 			mount->vnodes.Remove(&marker);
7517 			marker.SetRemoved(true);
7518 		} else
7519 			vnode = mount->vnodes.First();
7520 
7521 		while (vnode != NULL && (vnode->cache == NULL
7522 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7523 			// TODO: we could track writes (and writable mapped vnodes)
7524 			//	and have a simple flag that we could test for here
7525 			vnode = mount->vnodes.GetNext(vnode);
7526 		}
7527 
7528 		if (vnode != NULL) {
7529 			// insert marker vnode again
7530 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7531 			marker.SetRemoved(false);
7532 		}
7533 
7534 		recursive_lock_unlock(&mount->rlock);
7535 
7536 		if (vnode == NULL)
7537 			break;
7538 
7539 		vnode = lookup_vnode(mount->id, vnode->id);
7540 		if (vnode == NULL || vnode->IsBusy())
7541 			continue;
7542 
7543 		if (vnode->ref_count == 0) {
7544 			// this vnode has been unused before
7545 			vnode_used(vnode);
7546 		}
7547 		inc_vnode_ref_count(vnode);
7548 
7549 		locker.Unlock();
7550 
7551 		if (vnode->cache != NULL && !vnode->IsRemoved())
7552 			vnode->cache->WriteModified();
7553 
7554 		put_vnode(vnode);
7555 	}
7556 
7557 	// And then, let the file systems do their synchronizing work
7558 
7559 	if (HAS_FS_MOUNT_CALL(mount, sync))
7560 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7561 
7562 	put_mount(mount);
7563 	return status;
7564 }
7565 
7566 
7567 static status_t
7568 fs_read_info(dev_t device, struct fs_info* info)
7569 {
7570 	struct fs_mount* mount;
7571 	status_t status = get_mount(device, &mount);
7572 	if (status != B_OK)
7573 		return status;
7574 
7575 	memset(info, 0, sizeof(struct fs_info));
7576 
7577 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7578 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7579 
7580 	// fill in info the file system doesn't (have to) know about
7581 	if (status == B_OK) {
7582 		info->dev = mount->id;
7583 		info->root = mount->root_vnode->id;
7584 
7585 		fs_volume* volume = mount->volume;
7586 		while (volume->super_volume != NULL)
7587 			volume = volume->super_volume;
7588 
7589 		strlcpy(info->fsh_name, volume->file_system_name,
7590 			sizeof(info->fsh_name));
7591 		if (mount->device_name != NULL) {
7592 			strlcpy(info->device_name, mount->device_name,
7593 				sizeof(info->device_name));
7594 		}
7595 	}
7596 
7597 	// if the call is not supported by the file system, there are still
7598 	// the parts that we filled out ourselves
7599 
7600 	put_mount(mount);
7601 	return status;
7602 }
7603 
7604 
7605 static status_t
7606 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7607 {
7608 	struct fs_mount* mount;
7609 	status_t status = get_mount(device, &mount);
7610 	if (status != B_OK)
7611 		return status;
7612 
7613 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7614 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7615 	else
7616 		status = B_READ_ONLY_DEVICE;
7617 
7618 	put_mount(mount);
7619 	return status;
7620 }
7621 
7622 
7623 static dev_t
7624 fs_next_device(int32* _cookie)
7625 {
7626 	struct fs_mount* mount = NULL;
7627 	dev_t device = *_cookie;
7628 
7629 	mutex_lock(&sMountMutex);
7630 
7631 	// Since device IDs are assigned sequentially, this algorithm
7632 	// does work good enough. It makes sure that the device list
7633 	// returned is sorted, and that no device is skipped when an
7634 	// already visited device got unmounted.
7635 
7636 	while (device < sNextMountID) {
7637 		mount = find_mount(device++);
7638 		if (mount != NULL && mount->volume->private_volume != NULL)
7639 			break;
7640 	}
7641 
7642 	*_cookie = device;
7643 
7644 	if (mount != NULL)
7645 		device = mount->id;
7646 	else
7647 		device = B_BAD_VALUE;
7648 
7649 	mutex_unlock(&sMountMutex);
7650 
7651 	return device;
7652 }
7653 
7654 
7655 ssize_t
7656 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7657 	void *buffer, size_t readBytes)
7658 {
7659 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7660 	if (attrFD < 0)
7661 		return attrFD;
7662 
7663 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7664 
7665 	_kern_close(attrFD);
7666 
7667 	return bytesRead;
7668 }
7669 
7670 
7671 static status_t
7672 get_cwd(char* buffer, size_t size, bool kernel)
7673 {
7674 	// Get current working directory from io context
7675 	struct io_context* context = get_current_io_context(kernel);
7676 	status_t status;
7677 
7678 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7679 
7680 	mutex_lock(&context->io_mutex);
7681 
7682 	struct vnode* vnode = context->cwd;
7683 	if (vnode)
7684 		inc_vnode_ref_count(vnode);
7685 
7686 	mutex_unlock(&context->io_mutex);
7687 
7688 	if (vnode) {
7689 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7690 		put_vnode(vnode);
7691 	} else
7692 		status = B_ERROR;
7693 
7694 	return status;
7695 }
7696 
7697 
7698 static status_t
7699 set_cwd(int fd, char* path, bool kernel)
7700 {
7701 	struct io_context* context;
7702 	struct vnode* vnode = NULL;
7703 	struct vnode* oldDirectory;
7704 	status_t status;
7705 
7706 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7707 
7708 	// Get vnode for passed path, and bail if it failed
7709 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7710 	if (status < 0)
7711 		return status;
7712 
7713 	if (!S_ISDIR(vnode->Type())) {
7714 		// nope, can't cwd to here
7715 		status = B_NOT_A_DIRECTORY;
7716 		goto err;
7717 	}
7718 
7719 	// Get current io context and lock
7720 	context = get_current_io_context(kernel);
7721 	mutex_lock(&context->io_mutex);
7722 
7723 	// save the old current working directory first
7724 	oldDirectory = context->cwd;
7725 	context->cwd = vnode;
7726 
7727 	mutex_unlock(&context->io_mutex);
7728 
7729 	if (oldDirectory)
7730 		put_vnode(oldDirectory);
7731 
7732 	return B_NO_ERROR;
7733 
7734 err:
7735 	put_vnode(vnode);
7736 	return status;
7737 }
7738 
7739 
7740 //	#pragma mark - kernel mirrored syscalls
7741 
7742 
7743 dev_t
7744 _kern_mount(const char* path, const char* device, const char* fsName,
7745 	uint32 flags, const char* args, size_t argsLength)
7746 {
7747 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7748 	if (pathBuffer.InitCheck() != B_OK)
7749 		return B_NO_MEMORY;
7750 
7751 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7752 }
7753 
7754 
7755 status_t
7756 _kern_unmount(const char* path, uint32 flags)
7757 {
7758 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7759 	if (pathBuffer.InitCheck() != B_OK)
7760 		return B_NO_MEMORY;
7761 
7762 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7763 }
7764 
7765 
7766 status_t
7767 _kern_read_fs_info(dev_t device, struct fs_info* info)
7768 {
7769 	if (info == NULL)
7770 		return B_BAD_VALUE;
7771 
7772 	return fs_read_info(device, info);
7773 }
7774 
7775 
7776 status_t
7777 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7778 {
7779 	if (info == NULL)
7780 		return B_BAD_VALUE;
7781 
7782 	return fs_write_info(device, info, mask);
7783 }
7784 
7785 
7786 status_t
7787 _kern_sync(void)
7788 {
7789 	// Note: _kern_sync() is also called from _user_sync()
7790 	int32 cookie = 0;
7791 	dev_t device;
7792 	while ((device = next_dev(&cookie)) >= 0) {
7793 		status_t status = fs_sync(device);
7794 		if (status != B_OK && status != B_BAD_VALUE) {
7795 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7796 				strerror(status));
7797 		}
7798 	}
7799 
7800 	return B_OK;
7801 }
7802 
7803 
7804 dev_t
7805 _kern_next_device(int32* _cookie)
7806 {
7807 	return fs_next_device(_cookie);
7808 }
7809 
7810 
7811 status_t
7812 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7813 	size_t infoSize)
7814 {
7815 	if (infoSize != sizeof(fd_info))
7816 		return B_BAD_VALUE;
7817 
7818 	struct io_context* context = NULL;
7819 	struct team* team = NULL;
7820 
7821 	cpu_status state = disable_interrupts();
7822 	GRAB_TEAM_LOCK();
7823 
7824 	bool contextLocked = false;
7825 	team = team_get_team_struct_locked(teamID);
7826 	if (team) {
7827 		// We cannot lock the IO context while holding the team lock, nor can
7828 		// we just drop the team lock, since it might be deleted in the
7829 		// meantime. team_remove_team() acquires the thread lock when removing
7830 		// the team from the team hash table, though. Hence we switch to the
7831 		// thread lock and use mutex_lock_threads_locked().
7832 		context = (io_context*)team->io_context;
7833 
7834 		GRAB_THREAD_LOCK();
7835 		RELEASE_TEAM_LOCK();
7836 		contextLocked = mutex_lock_threads_locked(&context->io_mutex) == B_OK;
7837 		RELEASE_THREAD_LOCK();
7838 	} else
7839 		RELEASE_TEAM_LOCK();
7840 
7841 	restore_interrupts(state);
7842 
7843 	if (!contextLocked) {
7844 		// team doesn't exit or seems to be gone
7845 		return B_BAD_TEAM_ID;
7846 	}
7847 
7848 	// the team cannot be deleted completely while we're owning its
7849 	// io_context mutex, so we can safely play with it now
7850 
7851 	uint32 slot = *_cookie;
7852 
7853 	struct file_descriptor* descriptor;
7854 	while (slot < context->table_size
7855 		&& (descriptor = context->fds[slot]) == NULL) {
7856 		slot++;
7857 	}
7858 
7859 	if (slot >= context->table_size) {
7860 		mutex_unlock(&context->io_mutex);
7861 		return B_ENTRY_NOT_FOUND;
7862 	}
7863 
7864 	info->number = slot;
7865 	info->open_mode = descriptor->open_mode;
7866 
7867 	struct vnode* vnode = fd_vnode(descriptor);
7868 	if (vnode != NULL) {
7869 		info->device = vnode->device;
7870 		info->node = vnode->id;
7871 	} else if (descriptor->u.mount != NULL) {
7872 		info->device = descriptor->u.mount->id;
7873 		info->node = -1;
7874 	}
7875 
7876 	mutex_unlock(&context->io_mutex);
7877 
7878 	*_cookie = slot + 1;
7879 	return B_OK;
7880 }
7881 
7882 
7883 int
7884 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7885 	int perms)
7886 {
7887 	if ((openMode & O_CREAT) != 0) {
7888 		return file_create_entry_ref(device, inode, name, openMode, perms,
7889 			true);
7890 	}
7891 
7892 	return file_open_entry_ref(device, inode, name, openMode, true);
7893 }
7894 
7895 
7896 /*!	\brief Opens a node specified by a FD + path pair.
7897 
7898 	At least one of \a fd and \a path must be specified.
7899 	If only \a fd is given, the function opens the node identified by this
7900 	FD. If only a path is given, this path is opened. If both are given and
7901 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7902 	of the directory (!) identified by \a fd.
7903 
7904 	\param fd The FD. May be < 0.
7905 	\param path The absolute or relative path. May be \c NULL.
7906 	\param openMode The open mode.
7907 	\return A FD referring to the newly opened node, or an error code,
7908 			if an error occurs.
7909 */
7910 int
7911 _kern_open(int fd, const char* path, int openMode, int perms)
7912 {
7913 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7914 	if (pathBuffer.InitCheck() != B_OK)
7915 		return B_NO_MEMORY;
7916 
7917 	if (openMode & O_CREAT)
7918 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7919 
7920 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7921 }
7922 
7923 
7924 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7925 
7926 	The supplied name may be \c NULL, in which case directory identified
7927 	by \a device and \a inode will be opened. Otherwise \a device and
7928 	\a inode identify the parent directory of the directory to be opened
7929 	and \a name its entry name.
7930 
7931 	\param device If \a name is specified the ID of the device the parent
7932 		   directory of the directory to be opened resides on, otherwise
7933 		   the device of the directory itself.
7934 	\param inode If \a name is specified the node ID of the parent
7935 		   directory of the directory to be opened, otherwise node ID of the
7936 		   directory itself.
7937 	\param name The entry name of the directory to be opened. If \c NULL,
7938 		   the \a device + \a inode pair identify the node to be opened.
7939 	\return The FD of the newly opened directory or an error code, if
7940 			something went wrong.
7941 */
7942 int
7943 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7944 {
7945 	return dir_open_entry_ref(device, inode, name, true);
7946 }
7947 
7948 
7949 /*!	\brief Opens a directory specified by a FD + path pair.
7950 
7951 	At least one of \a fd and \a path must be specified.
7952 	If only \a fd is given, the function opens the directory identified by this
7953 	FD. If only a path is given, this path is opened. If both are given and
7954 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7955 	of the directory (!) identified by \a fd.
7956 
7957 	\param fd The FD. May be < 0.
7958 	\param path The absolute or relative path. May be \c NULL.
7959 	\return A FD referring to the newly opened directory, or an error code,
7960 			if an error occurs.
7961 */
7962 int
7963 _kern_open_dir(int fd, const char* path)
7964 {
7965 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7966 	if (pathBuffer.InitCheck() != B_OK)
7967 		return B_NO_MEMORY;
7968 
7969 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7970 }
7971 
7972 
7973 status_t
7974 _kern_fcntl(int fd, int op, uint32 argument)
7975 {
7976 	return common_fcntl(fd, op, argument, true);
7977 }
7978 
7979 
7980 status_t
7981 _kern_fsync(int fd)
7982 {
7983 	return common_sync(fd, true);
7984 }
7985 
7986 
7987 status_t
7988 _kern_lock_node(int fd)
7989 {
7990 	return common_lock_node(fd, true);
7991 }
7992 
7993 
7994 status_t
7995 _kern_unlock_node(int fd)
7996 {
7997 	return common_unlock_node(fd, true);
7998 }
7999 
8000 
8001 status_t
8002 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8003 	int perms)
8004 {
8005 	return dir_create_entry_ref(device, inode, name, perms, true);
8006 }
8007 
8008 
8009 /*!	\brief Creates a directory specified by a FD + path pair.
8010 
8011 	\a path must always be specified (it contains the name of the new directory
8012 	at least). If only a path is given, this path identifies the location at
8013 	which the directory shall be created. If both \a fd and \a path are given
8014 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8015 	of the directory (!) identified by \a fd.
8016 
8017 	\param fd The FD. May be < 0.
8018 	\param path The absolute or relative path. Must not be \c NULL.
8019 	\param perms The access permissions the new directory shall have.
8020 	\return \c B_OK, if the directory has been created successfully, another
8021 			error code otherwise.
8022 */
8023 status_t
8024 _kern_create_dir(int fd, const char* path, int perms)
8025 {
8026 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8027 	if (pathBuffer.InitCheck() != B_OK)
8028 		return B_NO_MEMORY;
8029 
8030 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8031 }
8032 
8033 
8034 status_t
8035 _kern_remove_dir(int fd, const char* path)
8036 {
8037 	if (path) {
8038 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8039 		if (pathBuffer.InitCheck() != B_OK)
8040 			return B_NO_MEMORY;
8041 
8042 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8043 	}
8044 
8045 	return dir_remove(fd, NULL, true);
8046 }
8047 
8048 
8049 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8050 
8051 	At least one of \a fd and \a path must be specified.
8052 	If only \a fd is given, the function the symlink to be read is the node
8053 	identified by this FD. If only a path is given, this path identifies the
8054 	symlink to be read. If both are given and the path is absolute, \a fd is
8055 	ignored; a relative path is reckoned off of the directory (!) identified
8056 	by \a fd.
8057 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8058 	will still be updated to reflect the required buffer size.
8059 
8060 	\param fd The FD. May be < 0.
8061 	\param path The absolute or relative path. May be \c NULL.
8062 	\param buffer The buffer into which the contents of the symlink shall be
8063 		   written.
8064 	\param _bufferSize A pointer to the size of the supplied buffer.
8065 	\return The length of the link on success or an appropriate error code
8066 */
8067 status_t
8068 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8069 {
8070 	if (path) {
8071 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8072 		if (pathBuffer.InitCheck() != B_OK)
8073 			return B_NO_MEMORY;
8074 
8075 		return common_read_link(fd, pathBuffer.LockBuffer(),
8076 			buffer, _bufferSize, true);
8077 	}
8078 
8079 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8080 }
8081 
8082 
8083 /*!	\brief Creates a symlink specified by a FD + path pair.
8084 
8085 	\a path must always be specified (it contains the name of the new symlink
8086 	at least). If only a path is given, this path identifies the location at
8087 	which the symlink shall be created. If both \a fd and \a path are given and
8088 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8089 	of the directory (!) identified by \a fd.
8090 
8091 	\param fd The FD. May be < 0.
8092 	\param toPath The absolute or relative path. Must not be \c NULL.
8093 	\param mode The access permissions the new symlink shall have.
8094 	\return \c B_OK, if the symlink has been created successfully, another
8095 			error code otherwise.
8096 */
8097 status_t
8098 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8099 {
8100 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8101 	if (pathBuffer.InitCheck() != B_OK)
8102 		return B_NO_MEMORY;
8103 
8104 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8105 		toPath, mode, true);
8106 }
8107 
8108 
8109 status_t
8110 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8111 	bool traverseLeafLink)
8112 {
8113 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8114 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8115 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8116 		return B_NO_MEMORY;
8117 
8118 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8119 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8120 }
8121 
8122 
8123 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8124 
8125 	\a path must always be specified (it contains at least the name of the entry
8126 	to be deleted). If only a path is given, this path identifies the entry
8127 	directly. If both \a fd and \a path are given and the path is absolute,
8128 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8129 	identified by \a fd.
8130 
8131 	\param fd The FD. May be < 0.
8132 	\param path The absolute or relative path. Must not be \c NULL.
8133 	\return \c B_OK, if the entry has been removed successfully, another
8134 			error code otherwise.
8135 */
8136 status_t
8137 _kern_unlink(int fd, const char* path)
8138 {
8139 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8140 	if (pathBuffer.InitCheck() != B_OK)
8141 		return B_NO_MEMORY;
8142 
8143 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8144 }
8145 
8146 
8147 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8148 		   by another FD + path pair.
8149 
8150 	\a oldPath and \a newPath must always be specified (they contain at least
8151 	the name of the entry). If only a path is given, this path identifies the
8152 	entry directly. If both a FD and a path are given and the path is absolute,
8153 	the FD is ignored; a relative path is reckoned off of the directory (!)
8154 	identified by the respective FD.
8155 
8156 	\param oldFD The FD of the old location. May be < 0.
8157 	\param oldPath The absolute or relative path of the old location. Must not
8158 		   be \c NULL.
8159 	\param newFD The FD of the new location. May be < 0.
8160 	\param newPath The absolute or relative path of the new location. Must not
8161 		   be \c NULL.
8162 	\return \c B_OK, if the entry has been moved successfully, another
8163 			error code otherwise.
8164 */
8165 status_t
8166 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8167 {
8168 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8169 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8170 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8171 		return B_NO_MEMORY;
8172 
8173 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8174 		newFD, newPathBuffer.LockBuffer(), true);
8175 }
8176 
8177 
8178 status_t
8179 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8180 {
8181 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8182 	if (pathBuffer.InitCheck() != B_OK)
8183 		return B_NO_MEMORY;
8184 
8185 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8186 		true);
8187 }
8188 
8189 
8190 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8191 
8192 	If only \a fd is given, the stat operation associated with the type
8193 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8194 	given, this path identifies the entry for whose node to retrieve the
8195 	stat data. If both \a fd and \a path are given and the path is absolute,
8196 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8197 	identified by \a fd and specifies the entry whose stat data shall be
8198 	retrieved.
8199 
8200 	\param fd The FD. May be < 0.
8201 	\param path The absolute or relative path. Must not be \c NULL.
8202 	\param traverseLeafLink If \a path is given, \c true specifies that the
8203 		   function shall not stick to symlinks, but traverse them.
8204 	\param stat The buffer the stat data shall be written into.
8205 	\param statSize The size of the supplied stat buffer.
8206 	\return \c B_OK, if the the stat data have been read successfully, another
8207 			error code otherwise.
8208 */
8209 status_t
8210 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8211 	struct stat* stat, size_t statSize)
8212 {
8213 	struct stat completeStat;
8214 	struct stat* originalStat = NULL;
8215 	status_t status;
8216 
8217 	if (statSize > sizeof(struct stat))
8218 		return B_BAD_VALUE;
8219 
8220 	// this supports different stat extensions
8221 	if (statSize < sizeof(struct stat)) {
8222 		originalStat = stat;
8223 		stat = &completeStat;
8224 	}
8225 
8226 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8227 
8228 	if (status == B_OK && originalStat != NULL)
8229 		memcpy(originalStat, stat, statSize);
8230 
8231 	return status;
8232 }
8233 
8234 
8235 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8236 
8237 	If only \a fd is given, the stat operation associated with the type
8238 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8239 	given, this path identifies the entry for whose node to write the
8240 	stat data. If both \a fd and \a path are given and the path is absolute,
8241 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8242 	identified by \a fd and specifies the entry whose stat data shall be
8243 	written.
8244 
8245 	\param fd The FD. May be < 0.
8246 	\param path The absolute or relative path. Must not be \c NULL.
8247 	\param traverseLeafLink If \a path is given, \c true specifies that the
8248 		   function shall not stick to symlinks, but traverse them.
8249 	\param stat The buffer containing the stat data to be written.
8250 	\param statSize The size of the supplied stat buffer.
8251 	\param statMask A mask specifying which parts of the stat data shall be
8252 		   written.
8253 	\return \c B_OK, if the the stat data have been written successfully,
8254 			another error code otherwise.
8255 */
8256 status_t
8257 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8258 	const struct stat* stat, size_t statSize, int statMask)
8259 {
8260 	struct stat completeStat;
8261 
8262 	if (statSize > sizeof(struct stat))
8263 		return B_BAD_VALUE;
8264 
8265 	// this supports different stat extensions
8266 	if (statSize < sizeof(struct stat)) {
8267 		memset((uint8*)&completeStat + statSize, 0,
8268 			sizeof(struct stat) - statSize);
8269 		memcpy(&completeStat, stat, statSize);
8270 		stat = &completeStat;
8271 	}
8272 
8273 	status_t status;
8274 
8275 	if (path) {
8276 		// path given: write the stat of the node referred to by (fd, path)
8277 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8278 		if (pathBuffer.InitCheck() != B_OK)
8279 			return B_NO_MEMORY;
8280 
8281 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8282 			traverseLeafLink, stat, statMask, true);
8283 	} else {
8284 		// no path given: get the FD and use the FD operation
8285 		struct file_descriptor* descriptor
8286 			= get_fd(get_current_io_context(true), fd);
8287 		if (descriptor == NULL)
8288 			return B_FILE_ERROR;
8289 
8290 		if (descriptor->ops->fd_write_stat)
8291 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8292 		else
8293 			status = B_NOT_SUPPORTED;
8294 
8295 		put_fd(descriptor);
8296 	}
8297 
8298 	return status;
8299 }
8300 
8301 
8302 int
8303 _kern_open_attr_dir(int fd, const char* path)
8304 {
8305 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8306 	if (pathBuffer.InitCheck() != B_OK)
8307 		return B_NO_MEMORY;
8308 
8309 	if (path != NULL)
8310 		pathBuffer.SetTo(path);
8311 
8312 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
8313 }
8314 
8315 
8316 int
8317 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8318 	int openMode)
8319 {
8320 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8321 	if (pathBuffer.InitCheck() != B_OK)
8322 		return B_NO_MEMORY;
8323 
8324 	if ((openMode & O_CREAT) != 0) {
8325 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8326 			true);
8327 	}
8328 
8329 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8330 }
8331 
8332 
8333 status_t
8334 _kern_remove_attr(int fd, const char* name)
8335 {
8336 	return attr_remove(fd, name, true);
8337 }
8338 
8339 
8340 status_t
8341 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8342 	const char* toName)
8343 {
8344 	return attr_rename(fromFile, fromName, toFile, toName, true);
8345 }
8346 
8347 
8348 int
8349 _kern_open_index_dir(dev_t device)
8350 {
8351 	return index_dir_open(device, true);
8352 }
8353 
8354 
8355 status_t
8356 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8357 {
8358 	return index_create(device, name, type, flags, true);
8359 }
8360 
8361 
8362 status_t
8363 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8364 {
8365 	return index_name_read_stat(device, name, stat, true);
8366 }
8367 
8368 
8369 status_t
8370 _kern_remove_index(dev_t device, const char* name)
8371 {
8372 	return index_remove(device, name, true);
8373 }
8374 
8375 
8376 status_t
8377 _kern_getcwd(char* buffer, size_t size)
8378 {
8379 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8380 
8381 	// Call vfs to get current working directory
8382 	return get_cwd(buffer, size, true);
8383 }
8384 
8385 
8386 status_t
8387 _kern_setcwd(int fd, const char* path)
8388 {
8389 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8390 	if (pathBuffer.InitCheck() != B_OK)
8391 		return B_NO_MEMORY;
8392 
8393 	if (path != NULL)
8394 		pathBuffer.SetTo(path);
8395 
8396 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8397 }
8398 
8399 
8400 //	#pragma mark - userland syscalls
8401 
8402 
8403 dev_t
8404 _user_mount(const char* userPath, const char* userDevice,
8405 	const char* userFileSystem, uint32 flags, const char* userArgs,
8406 	size_t argsLength)
8407 {
8408 	char fileSystem[B_FILE_NAME_LENGTH];
8409 	KPath path, device;
8410 	char* args = NULL;
8411 	status_t status;
8412 
8413 	if (!IS_USER_ADDRESS(userPath)
8414 		|| !IS_USER_ADDRESS(userFileSystem)
8415 		|| !IS_USER_ADDRESS(userDevice))
8416 		return B_BAD_ADDRESS;
8417 
8418 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8419 		return B_NO_MEMORY;
8420 
8421 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8422 		return B_BAD_ADDRESS;
8423 
8424 	if (userFileSystem != NULL
8425 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8426 		return B_BAD_ADDRESS;
8427 
8428 	if (userDevice != NULL
8429 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8430 			< B_OK)
8431 		return B_BAD_ADDRESS;
8432 
8433 	if (userArgs != NULL && argsLength > 0) {
8434 		// this is a safety restriction
8435 		if (argsLength >= 65536)
8436 			return B_NAME_TOO_LONG;
8437 
8438 		args = (char*)malloc(argsLength + 1);
8439 		if (args == NULL)
8440 			return B_NO_MEMORY;
8441 
8442 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8443 			free(args);
8444 			return B_BAD_ADDRESS;
8445 		}
8446 	}
8447 	path.UnlockBuffer();
8448 	device.UnlockBuffer();
8449 
8450 	status = fs_mount(path.LockBuffer(),
8451 		userDevice != NULL ? device.Path() : NULL,
8452 		userFileSystem ? fileSystem : NULL, flags, args, false);
8453 
8454 	free(args);
8455 	return status;
8456 }
8457 
8458 
8459 status_t
8460 _user_unmount(const char* userPath, uint32 flags)
8461 {
8462 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8463 	if (pathBuffer.InitCheck() != B_OK)
8464 		return B_NO_MEMORY;
8465 
8466 	char* path = pathBuffer.LockBuffer();
8467 
8468 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8469 		return B_BAD_ADDRESS;
8470 
8471 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8472 }
8473 
8474 
8475 status_t
8476 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8477 {
8478 	struct fs_info info;
8479 	status_t status;
8480 
8481 	if (userInfo == NULL)
8482 		return B_BAD_VALUE;
8483 
8484 	if (!IS_USER_ADDRESS(userInfo))
8485 		return B_BAD_ADDRESS;
8486 
8487 	status = fs_read_info(device, &info);
8488 	if (status != B_OK)
8489 		return status;
8490 
8491 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8492 		return B_BAD_ADDRESS;
8493 
8494 	return B_OK;
8495 }
8496 
8497 
8498 status_t
8499 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8500 {
8501 	struct fs_info info;
8502 
8503 	if (userInfo == NULL)
8504 		return B_BAD_VALUE;
8505 
8506 	if (!IS_USER_ADDRESS(userInfo)
8507 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8508 		return B_BAD_ADDRESS;
8509 
8510 	return fs_write_info(device, &info, mask);
8511 }
8512 
8513 
8514 dev_t
8515 _user_next_device(int32* _userCookie)
8516 {
8517 	int32 cookie;
8518 	dev_t device;
8519 
8520 	if (!IS_USER_ADDRESS(_userCookie)
8521 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8522 		return B_BAD_ADDRESS;
8523 
8524 	device = fs_next_device(&cookie);
8525 
8526 	if (device >= B_OK) {
8527 		// update user cookie
8528 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8529 			return B_BAD_ADDRESS;
8530 	}
8531 
8532 	return device;
8533 }
8534 
8535 
8536 status_t
8537 _user_sync(void)
8538 {
8539 	return _kern_sync();
8540 }
8541 
8542 
8543 status_t
8544 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8545 	size_t infoSize)
8546 {
8547 	struct fd_info info;
8548 	uint32 cookie;
8549 
8550 	// only root can do this (or should root's group be enough?)
8551 	if (geteuid() != 0)
8552 		return B_NOT_ALLOWED;
8553 
8554 	if (infoSize != sizeof(fd_info))
8555 		return B_BAD_VALUE;
8556 
8557 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8558 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8559 		return B_BAD_ADDRESS;
8560 
8561 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8562 	if (status != B_OK)
8563 		return status;
8564 
8565 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8566 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8567 		return B_BAD_ADDRESS;
8568 
8569 	return status;
8570 }
8571 
8572 
8573 status_t
8574 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8575 	char* userPath, size_t pathLength)
8576 {
8577 	if (!IS_USER_ADDRESS(userPath))
8578 		return B_BAD_ADDRESS;
8579 
8580 	KPath path(B_PATH_NAME_LENGTH + 1);
8581 	if (path.InitCheck() != B_OK)
8582 		return B_NO_MEMORY;
8583 
8584 	// copy the leaf name onto the stack
8585 	char stackLeaf[B_FILE_NAME_LENGTH];
8586 	if (leaf) {
8587 		if (!IS_USER_ADDRESS(leaf))
8588 			return B_BAD_ADDRESS;
8589 
8590 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8591 		if (length < 0)
8592 			return length;
8593 		if (length >= B_FILE_NAME_LENGTH)
8594 			return B_NAME_TOO_LONG;
8595 
8596 		leaf = stackLeaf;
8597 	}
8598 
8599 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8600 		path.LockBuffer(), path.BufferSize());
8601 	if (status != B_OK)
8602 		return status;
8603 
8604 	path.UnlockBuffer();
8605 
8606 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8607 	if (length < 0)
8608 		return length;
8609 	if (length >= (int)pathLength)
8610 		return B_BUFFER_OVERFLOW;
8611 
8612 	return B_OK;
8613 }
8614 
8615 
8616 status_t
8617 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8618 {
8619 	if (userPath == NULL || buffer == NULL)
8620 		return B_BAD_VALUE;
8621 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8622 		return B_BAD_ADDRESS;
8623 
8624 	// copy path from userland
8625 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8626 	if (pathBuffer.InitCheck() != B_OK)
8627 		return B_NO_MEMORY;
8628 	char* path = pathBuffer.LockBuffer();
8629 
8630 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8631 		return B_BAD_ADDRESS;
8632 
8633 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8634 		false);
8635 	if (error != B_OK)
8636 		return error;
8637 
8638 	// copy back to userland
8639 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8640 	if (len < 0)
8641 		return len;
8642 	if (len >= B_PATH_NAME_LENGTH)
8643 		return B_BUFFER_OVERFLOW;
8644 
8645 	return B_OK;
8646 }
8647 
8648 
8649 int
8650 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8651 	int openMode, int perms)
8652 {
8653 	char name[B_FILE_NAME_LENGTH];
8654 
8655 	if (userName == NULL || device < 0 || inode < 0)
8656 		return B_BAD_VALUE;
8657 	if (!IS_USER_ADDRESS(userName)
8658 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8659 		return B_BAD_ADDRESS;
8660 
8661 	if ((openMode & O_CREAT) != 0) {
8662 		return file_create_entry_ref(device, inode, name, openMode, perms,
8663 		 false);
8664 	}
8665 
8666 	return file_open_entry_ref(device, inode, name, openMode, false);
8667 }
8668 
8669 
8670 int
8671 _user_open(int fd, const char* userPath, int openMode, int perms)
8672 {
8673 	KPath path(B_PATH_NAME_LENGTH + 1);
8674 	if (path.InitCheck() != B_OK)
8675 		return B_NO_MEMORY;
8676 
8677 	char* buffer = path.LockBuffer();
8678 
8679 	if (!IS_USER_ADDRESS(userPath)
8680 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8681 		return B_BAD_ADDRESS;
8682 
8683 	if ((openMode & O_CREAT) != 0)
8684 		return file_create(fd, buffer, openMode, perms, false);
8685 
8686 	return file_open(fd, buffer, openMode, false);
8687 }
8688 
8689 
8690 int
8691 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8692 {
8693 	if (userName != NULL) {
8694 		char name[B_FILE_NAME_LENGTH];
8695 
8696 		if (!IS_USER_ADDRESS(userName)
8697 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8698 			return B_BAD_ADDRESS;
8699 
8700 		return dir_open_entry_ref(device, inode, name, false);
8701 	}
8702 	return dir_open_entry_ref(device, inode, NULL, false);
8703 }
8704 
8705 
8706 int
8707 _user_open_dir(int fd, const char* userPath)
8708 {
8709 	if (userPath == NULL)
8710 		return dir_open(fd, NULL, false);
8711 
8712 	KPath path(B_PATH_NAME_LENGTH + 1);
8713 	if (path.InitCheck() != B_OK)
8714 		return B_NO_MEMORY;
8715 
8716 	char* buffer = path.LockBuffer();
8717 
8718 	if (!IS_USER_ADDRESS(userPath)
8719 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8720 		return B_BAD_ADDRESS;
8721 
8722 	return dir_open(fd, buffer, false);
8723 }
8724 
8725 
8726 /*!	\brief Opens a directory's parent directory and returns the entry name
8727 		   of the former.
8728 
8729 	Aside from that is returns the directory's entry name, this method is
8730 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8731 	equivalent, if \a userName is \c NULL.
8732 
8733 	If a name buffer is supplied and the name does not fit the buffer, the
8734 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8735 
8736 	\param fd A FD referring to a directory.
8737 	\param userName Buffer the directory's entry name shall be written into.
8738 		   May be \c NULL.
8739 	\param nameLength Size of the name buffer.
8740 	\return The file descriptor of the opened parent directory, if everything
8741 			went fine, an error code otherwise.
8742 */
8743 int
8744 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8745 {
8746 	bool kernel = false;
8747 
8748 	if (userName && !IS_USER_ADDRESS(userName))
8749 		return B_BAD_ADDRESS;
8750 
8751 	// open the parent dir
8752 	int parentFD = dir_open(fd, (char*)"..", kernel);
8753 	if (parentFD < 0)
8754 		return parentFD;
8755 	FDCloser fdCloser(parentFD, kernel);
8756 
8757 	if (userName) {
8758 		// get the vnodes
8759 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8760 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8761 		VNodePutter parentVNodePutter(parentVNode);
8762 		VNodePutter dirVNodePutter(dirVNode);
8763 		if (!parentVNode || !dirVNode)
8764 			return B_FILE_ERROR;
8765 
8766 		// get the vnode name
8767 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8768 		struct dirent* buffer = (struct dirent*)_buffer;
8769 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8770 			sizeof(_buffer), get_current_io_context(false));
8771 		if (status != B_OK)
8772 			return status;
8773 
8774 		// copy the name to the userland buffer
8775 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8776 		if (len < 0)
8777 			return len;
8778 		if (len >= (int)nameLength)
8779 			return B_BUFFER_OVERFLOW;
8780 	}
8781 
8782 	return fdCloser.Detach();
8783 }
8784 
8785 
8786 status_t
8787 _user_fcntl(int fd, int op, uint32 argument)
8788 {
8789 	status_t status = common_fcntl(fd, op, argument, false);
8790 	if (op == F_SETLKW)
8791 		syscall_restart_handle_post(status);
8792 
8793 	return status;
8794 }
8795 
8796 
8797 status_t
8798 _user_fsync(int fd)
8799 {
8800 	return common_sync(fd, false);
8801 }
8802 
8803 
8804 status_t
8805 _user_flock(int fd, int operation)
8806 {
8807 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8808 
8809 	// Check if the operation is valid
8810 	switch (operation & ~LOCK_NB) {
8811 		case LOCK_UN:
8812 		case LOCK_SH:
8813 		case LOCK_EX:
8814 			break;
8815 
8816 		default:
8817 			return B_BAD_VALUE;
8818 	}
8819 
8820 	struct file_descriptor* descriptor;
8821 	struct vnode* vnode;
8822 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8823 	if (descriptor == NULL)
8824 		return B_FILE_ERROR;
8825 
8826 	if (descriptor->type != FDTYPE_FILE) {
8827 		put_fd(descriptor);
8828 		return B_BAD_VALUE;
8829 	}
8830 
8831 	struct flock flock;
8832 	flock.l_start = 0;
8833 	flock.l_len = OFF_MAX;
8834 	flock.l_whence = 0;
8835 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8836 
8837 	status_t status;
8838 	if ((operation & LOCK_UN) != 0)
8839 		status = release_advisory_lock(vnode, &flock);
8840 	else {
8841 		status = acquire_advisory_lock(vnode,
8842 			thread_get_current_thread()->team->session_id, &flock,
8843 			(operation & LOCK_NB) == 0);
8844 	}
8845 
8846 	syscall_restart_handle_post(status);
8847 
8848 	put_fd(descriptor);
8849 	return status;
8850 }
8851 
8852 
8853 status_t
8854 _user_lock_node(int fd)
8855 {
8856 	return common_lock_node(fd, false);
8857 }
8858 
8859 
8860 status_t
8861 _user_unlock_node(int fd)
8862 {
8863 	return common_unlock_node(fd, false);
8864 }
8865 
8866 
8867 status_t
8868 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8869 	int perms)
8870 {
8871 	char name[B_FILE_NAME_LENGTH];
8872 	status_t status;
8873 
8874 	if (!IS_USER_ADDRESS(userName))
8875 		return B_BAD_ADDRESS;
8876 
8877 	status = user_strlcpy(name, userName, sizeof(name));
8878 	if (status < 0)
8879 		return status;
8880 
8881 	return dir_create_entry_ref(device, inode, name, perms, false);
8882 }
8883 
8884 
8885 status_t
8886 _user_create_dir(int fd, const char* userPath, int perms)
8887 {
8888 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8889 	if (pathBuffer.InitCheck() != B_OK)
8890 		return B_NO_MEMORY;
8891 
8892 	char* path = pathBuffer.LockBuffer();
8893 
8894 	if (!IS_USER_ADDRESS(userPath)
8895 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8896 		return B_BAD_ADDRESS;
8897 
8898 	return dir_create(fd, path, perms, false);
8899 }
8900 
8901 
8902 status_t
8903 _user_remove_dir(int fd, const char* userPath)
8904 {
8905 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8906 	if (pathBuffer.InitCheck() != B_OK)
8907 		return B_NO_MEMORY;
8908 
8909 	char* path = pathBuffer.LockBuffer();
8910 
8911 	if (userPath != NULL) {
8912 		if (!IS_USER_ADDRESS(userPath)
8913 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8914 			return B_BAD_ADDRESS;
8915 	}
8916 
8917 	return dir_remove(fd, userPath ? path : NULL, false);
8918 }
8919 
8920 
8921 status_t
8922 _user_read_link(int fd, const char* userPath, char* userBuffer,
8923 	size_t* userBufferSize)
8924 {
8925 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8926 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8927 		return B_NO_MEMORY;
8928 
8929 	size_t bufferSize;
8930 
8931 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8932 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
8933 		return B_BAD_ADDRESS;
8934 
8935 	char* path = pathBuffer.LockBuffer();
8936 	char* buffer = linkBuffer.LockBuffer();
8937 
8938 	if (userPath) {
8939 		if (!IS_USER_ADDRESS(userPath)
8940 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8941 			return B_BAD_ADDRESS;
8942 
8943 		if (bufferSize > B_PATH_NAME_LENGTH)
8944 			bufferSize = B_PATH_NAME_LENGTH;
8945 	}
8946 
8947 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8948 		&bufferSize, false);
8949 
8950 	// we also update the bufferSize in case of errors
8951 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8952 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
8953 		return B_BAD_ADDRESS;
8954 
8955 	if (status != B_OK)
8956 		return status;
8957 
8958 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8959 		return B_BAD_ADDRESS;
8960 
8961 	return B_OK;
8962 }
8963 
8964 
8965 status_t
8966 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8967 	int mode)
8968 {
8969 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8970 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8971 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8972 		return B_NO_MEMORY;
8973 
8974 	char* path = pathBuffer.LockBuffer();
8975 	char* toPath = toPathBuffer.LockBuffer();
8976 
8977 	if (!IS_USER_ADDRESS(userPath)
8978 		|| !IS_USER_ADDRESS(userToPath)
8979 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8980 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8981 		return B_BAD_ADDRESS;
8982 
8983 	return common_create_symlink(fd, path, toPath, mode, false);
8984 }
8985 
8986 
8987 status_t
8988 _user_create_link(int pathFD, const char* userPath, int toFD,
8989 	const char* userToPath, bool traverseLeafLink)
8990 {
8991 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8992 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8993 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8994 		return B_NO_MEMORY;
8995 
8996 	char* path = pathBuffer.LockBuffer();
8997 	char* toPath = toPathBuffer.LockBuffer();
8998 
8999 	if (!IS_USER_ADDRESS(userPath)
9000 		|| !IS_USER_ADDRESS(userToPath)
9001 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9002 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9003 		return B_BAD_ADDRESS;
9004 
9005 	status_t status = check_path(toPath);
9006 	if (status != B_OK)
9007 		return status;
9008 
9009 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9010 		false);
9011 }
9012 
9013 
9014 status_t
9015 _user_unlink(int fd, const char* userPath)
9016 {
9017 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9018 	if (pathBuffer.InitCheck() != B_OK)
9019 		return B_NO_MEMORY;
9020 
9021 	char* path = pathBuffer.LockBuffer();
9022 
9023 	if (!IS_USER_ADDRESS(userPath)
9024 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9025 		return B_BAD_ADDRESS;
9026 
9027 	return common_unlink(fd, path, false);
9028 }
9029 
9030 
9031 status_t
9032 _user_rename(int oldFD, const char* userOldPath, int newFD,
9033 	const char* userNewPath)
9034 {
9035 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9036 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9037 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9038 		return B_NO_MEMORY;
9039 
9040 	char* oldPath = oldPathBuffer.LockBuffer();
9041 	char* newPath = newPathBuffer.LockBuffer();
9042 
9043 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9044 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9045 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9046 		return B_BAD_ADDRESS;
9047 
9048 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9049 }
9050 
9051 
9052 status_t
9053 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9054 {
9055 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9056 	if (pathBuffer.InitCheck() != B_OK)
9057 		return B_NO_MEMORY;
9058 
9059 	char* path = pathBuffer.LockBuffer();
9060 
9061 	if (!IS_USER_ADDRESS(userPath)
9062 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9063 		return B_BAD_ADDRESS;
9064 	}
9065 
9066 	// split into directory vnode and filename path
9067 	char filename[B_FILE_NAME_LENGTH];
9068 	struct vnode* dir;
9069 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9070 	if (status != B_OK)
9071 		return status;
9072 
9073 	VNodePutter _(dir);
9074 
9075 	// the underlying FS needs to support creating FIFOs
9076 	if (!HAS_FS_CALL(dir, create_special_node))
9077 		return B_UNSUPPORTED;
9078 
9079 	// create the entry	-- the FIFO sub node is set up automatically
9080 	fs_vnode superVnode;
9081 	ino_t nodeID;
9082 	status = FS_CALL(dir, create_special_node, filename, NULL,
9083 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9084 
9085 	// create_special_node() acquired a reference for us that we don't need.
9086 	if (status == B_OK)
9087 		put_vnode(dir->mount->volume, nodeID);
9088 
9089 	return status;
9090 }
9091 
9092 
9093 status_t
9094 _user_create_pipe(int* userFDs)
9095 {
9096 	// rootfs should support creating FIFOs, but let's be sure
9097 	if (!HAS_FS_CALL(sRoot, create_special_node))
9098 		return B_UNSUPPORTED;
9099 
9100 	// create the node	-- the FIFO sub node is set up automatically
9101 	fs_vnode superVnode;
9102 	ino_t nodeID;
9103 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9104 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9105 	if (status != B_OK)
9106 		return status;
9107 
9108 	// We've got one reference to the node and need another one.
9109 	struct vnode* vnode;
9110 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9111 	if (status != B_OK) {
9112 		// that should not happen
9113 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9114 			sRoot->mount->id, sRoot->id);
9115 		return status;
9116 	}
9117 
9118 	// Everything looks good so far. Open two FDs for reading respectively
9119 	// writing.
9120 	int fds[2];
9121 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9122 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9123 
9124 	FDCloser closer0(fds[0], false);
9125 	FDCloser closer1(fds[1], false);
9126 
9127 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9128 
9129 	// copy FDs to userland
9130 	if (status == B_OK) {
9131 		if (!IS_USER_ADDRESS(userFDs)
9132 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9133 			status = B_BAD_ADDRESS;
9134 		}
9135 	}
9136 
9137 	// keep FDs, if everything went fine
9138 	if (status == B_OK) {
9139 		closer0.Detach();
9140 		closer1.Detach();
9141 	}
9142 
9143 	return status;
9144 }
9145 
9146 
9147 status_t
9148 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9149 {
9150 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9151 	if (pathBuffer.InitCheck() != B_OK)
9152 		return B_NO_MEMORY;
9153 
9154 	char* path = pathBuffer.LockBuffer();
9155 
9156 	if (!IS_USER_ADDRESS(userPath)
9157 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9158 		return B_BAD_ADDRESS;
9159 
9160 	return common_access(fd, path, mode, effectiveUserGroup, false);
9161 }
9162 
9163 
9164 status_t
9165 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9166 	struct stat* userStat, size_t statSize)
9167 {
9168 	struct stat stat;
9169 	status_t status;
9170 
9171 	if (statSize > sizeof(struct stat))
9172 		return B_BAD_VALUE;
9173 
9174 	if (!IS_USER_ADDRESS(userStat))
9175 		return B_BAD_ADDRESS;
9176 
9177 	if (userPath) {
9178 		// path given: get the stat of the node referred to by (fd, path)
9179 		if (!IS_USER_ADDRESS(userPath))
9180 			return B_BAD_ADDRESS;
9181 
9182 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9183 		if (pathBuffer.InitCheck() != B_OK)
9184 			return B_NO_MEMORY;
9185 
9186 		char* path = pathBuffer.LockBuffer();
9187 
9188 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9189 		if (length < B_OK)
9190 			return length;
9191 		if (length >= B_PATH_NAME_LENGTH)
9192 			return B_NAME_TOO_LONG;
9193 
9194 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9195 	} else {
9196 		// no path given: get the FD and use the FD operation
9197 		struct file_descriptor* descriptor
9198 			= get_fd(get_current_io_context(false), fd);
9199 		if (descriptor == NULL)
9200 			return B_FILE_ERROR;
9201 
9202 		if (descriptor->ops->fd_read_stat)
9203 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9204 		else
9205 			status = B_NOT_SUPPORTED;
9206 
9207 		put_fd(descriptor);
9208 	}
9209 
9210 	if (status != B_OK)
9211 		return status;
9212 
9213 	return user_memcpy(userStat, &stat, statSize);
9214 }
9215 
9216 
9217 status_t
9218 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9219 	const struct stat* userStat, size_t statSize, int statMask)
9220 {
9221 	if (statSize > sizeof(struct stat))
9222 		return B_BAD_VALUE;
9223 
9224 	struct stat stat;
9225 
9226 	if (!IS_USER_ADDRESS(userStat)
9227 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9228 		return B_BAD_ADDRESS;
9229 
9230 	// clear additional stat fields
9231 	if (statSize < sizeof(struct stat))
9232 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9233 
9234 	status_t status;
9235 
9236 	if (userPath) {
9237 		// path given: write the stat of the node referred to by (fd, path)
9238 		if (!IS_USER_ADDRESS(userPath))
9239 			return B_BAD_ADDRESS;
9240 
9241 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9242 		if (pathBuffer.InitCheck() != B_OK)
9243 			return B_NO_MEMORY;
9244 
9245 		char* path = pathBuffer.LockBuffer();
9246 
9247 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9248 		if (length < B_OK)
9249 			return length;
9250 		if (length >= B_PATH_NAME_LENGTH)
9251 			return B_NAME_TOO_LONG;
9252 
9253 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9254 			statMask, false);
9255 	} else {
9256 		// no path given: get the FD and use the FD operation
9257 		struct file_descriptor* descriptor
9258 			= get_fd(get_current_io_context(false), fd);
9259 		if (descriptor == NULL)
9260 			return B_FILE_ERROR;
9261 
9262 		if (descriptor->ops->fd_write_stat) {
9263 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9264 				statMask);
9265 		} else
9266 			status = B_NOT_SUPPORTED;
9267 
9268 		put_fd(descriptor);
9269 	}
9270 
9271 	return status;
9272 }
9273 
9274 
9275 int
9276 _user_open_attr_dir(int fd, const char* userPath)
9277 {
9278 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9279 	if (pathBuffer.InitCheck() != B_OK)
9280 		return B_NO_MEMORY;
9281 
9282 	char* path = pathBuffer.LockBuffer();
9283 
9284 	if (userPath != NULL) {
9285 		if (!IS_USER_ADDRESS(userPath)
9286 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9287 			return B_BAD_ADDRESS;
9288 	}
9289 
9290 	return attr_dir_open(fd, userPath ? path : NULL, false);
9291 }
9292 
9293 
9294 ssize_t
9295 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9296 	size_t readBytes)
9297 {
9298 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9299 	if (attr < 0)
9300 		return attr;
9301 
9302 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9303 	_user_close(attr);
9304 
9305 	return bytes;
9306 }
9307 
9308 
9309 ssize_t
9310 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9311 	const void* buffer, size_t writeBytes)
9312 {
9313 	// Try to support the BeOS typical truncation as well as the position
9314 	// argument
9315 	int attr = attr_create(fd, NULL, attribute, type,
9316 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9317 	if (attr < 0)
9318 		return attr;
9319 
9320 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9321 	_user_close(attr);
9322 
9323 	return bytes;
9324 }
9325 
9326 
9327 status_t
9328 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9329 {
9330 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9331 	if (attr < 0)
9332 		return attr;
9333 
9334 	struct file_descriptor* descriptor
9335 		= get_fd(get_current_io_context(false), attr);
9336 	if (descriptor == NULL) {
9337 		_user_close(attr);
9338 		return B_FILE_ERROR;
9339 	}
9340 
9341 	struct stat stat;
9342 	status_t status;
9343 	if (descriptor->ops->fd_read_stat)
9344 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9345 	else
9346 		status = B_NOT_SUPPORTED;
9347 
9348 	put_fd(descriptor);
9349 	_user_close(attr);
9350 
9351 	if (status == B_OK) {
9352 		attr_info info;
9353 		info.type = stat.st_type;
9354 		info.size = stat.st_size;
9355 
9356 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9357 			return B_BAD_ADDRESS;
9358 	}
9359 
9360 	return status;
9361 }
9362 
9363 
9364 int
9365 _user_open_attr(int fd, const char* userPath, const char* userName,
9366 	uint32 type, int openMode)
9367 {
9368 	char name[B_FILE_NAME_LENGTH];
9369 
9370 	if (!IS_USER_ADDRESS(userName)
9371 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9372 		return B_BAD_ADDRESS;
9373 
9374 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9375 	if (pathBuffer.InitCheck() != B_OK)
9376 		return B_NO_MEMORY;
9377 
9378 	char* path = pathBuffer.LockBuffer();
9379 
9380 	if (userPath != NULL) {
9381 		if (!IS_USER_ADDRESS(userPath)
9382 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9383 			return B_BAD_ADDRESS;
9384 	}
9385 
9386 	if ((openMode & O_CREAT) != 0) {
9387 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9388 			false);
9389 	}
9390 
9391 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9392 }
9393 
9394 
9395 status_t
9396 _user_remove_attr(int fd, const char* userName)
9397 {
9398 	char name[B_FILE_NAME_LENGTH];
9399 
9400 	if (!IS_USER_ADDRESS(userName)
9401 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9402 		return B_BAD_ADDRESS;
9403 
9404 	return attr_remove(fd, name, false);
9405 }
9406 
9407 
9408 status_t
9409 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9410 	const char* userToName)
9411 {
9412 	if (!IS_USER_ADDRESS(userFromName)
9413 		|| !IS_USER_ADDRESS(userToName))
9414 		return B_BAD_ADDRESS;
9415 
9416 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9417 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9418 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9419 		return B_NO_MEMORY;
9420 
9421 	char* fromName = fromNameBuffer.LockBuffer();
9422 	char* toName = toNameBuffer.LockBuffer();
9423 
9424 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9425 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9426 		return B_BAD_ADDRESS;
9427 
9428 	return attr_rename(fromFile, fromName, toFile, toName, false);
9429 }
9430 
9431 
9432 int
9433 _user_open_index_dir(dev_t device)
9434 {
9435 	return index_dir_open(device, false);
9436 }
9437 
9438 
9439 status_t
9440 _user_create_index(dev_t device, const char* userName, uint32 type,
9441 	uint32 flags)
9442 {
9443 	char name[B_FILE_NAME_LENGTH];
9444 
9445 	if (!IS_USER_ADDRESS(userName)
9446 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9447 		return B_BAD_ADDRESS;
9448 
9449 	return index_create(device, name, type, flags, false);
9450 }
9451 
9452 
9453 status_t
9454 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9455 {
9456 	char name[B_FILE_NAME_LENGTH];
9457 	struct stat stat;
9458 	status_t status;
9459 
9460 	if (!IS_USER_ADDRESS(userName)
9461 		|| !IS_USER_ADDRESS(userStat)
9462 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9463 		return B_BAD_ADDRESS;
9464 
9465 	status = index_name_read_stat(device, name, &stat, false);
9466 	if (status == B_OK) {
9467 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9468 			return B_BAD_ADDRESS;
9469 	}
9470 
9471 	return status;
9472 }
9473 
9474 
9475 status_t
9476 _user_remove_index(dev_t device, const char* userName)
9477 {
9478 	char name[B_FILE_NAME_LENGTH];
9479 
9480 	if (!IS_USER_ADDRESS(userName)
9481 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9482 		return B_BAD_ADDRESS;
9483 
9484 	return index_remove(device, name, false);
9485 }
9486 
9487 
9488 status_t
9489 _user_getcwd(char* userBuffer, size_t size)
9490 {
9491 	if (size == 0)
9492 		return B_BAD_VALUE;
9493 	if (!IS_USER_ADDRESS(userBuffer))
9494 		return B_BAD_ADDRESS;
9495 
9496 	if (size > kMaxPathLength)
9497 		size = kMaxPathLength;
9498 
9499 	KPath pathBuffer(size);
9500 	if (pathBuffer.InitCheck() != B_OK)
9501 		return B_NO_MEMORY;
9502 
9503 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9504 
9505 	char* path = pathBuffer.LockBuffer();
9506 
9507 	status_t status = get_cwd(path, size, false);
9508 	if (status != B_OK)
9509 		return status;
9510 
9511 	// Copy back the result
9512 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9513 		return B_BAD_ADDRESS;
9514 
9515 	return status;
9516 }
9517 
9518 
9519 status_t
9520 _user_setcwd(int fd, const char* userPath)
9521 {
9522 	TRACE(("user_setcwd: path = %p\n", userPath));
9523 
9524 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9525 	if (pathBuffer.InitCheck() != B_OK)
9526 		return B_NO_MEMORY;
9527 
9528 	char* path = pathBuffer.LockBuffer();
9529 
9530 	if (userPath != NULL) {
9531 		if (!IS_USER_ADDRESS(userPath)
9532 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9533 			return B_BAD_ADDRESS;
9534 	}
9535 
9536 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9537 }
9538 
9539 
9540 status_t
9541 _user_change_root(const char* userPath)
9542 {
9543 	// only root is allowed to chroot()
9544 	if (geteuid() != 0)
9545 		return B_NOT_ALLOWED;
9546 
9547 	// alloc path buffer
9548 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9549 	if (pathBuffer.InitCheck() != B_OK)
9550 		return B_NO_MEMORY;
9551 
9552 	// copy userland path to kernel
9553 	char* path = pathBuffer.LockBuffer();
9554 	if (userPath != NULL) {
9555 		if (!IS_USER_ADDRESS(userPath)
9556 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9557 			return B_BAD_ADDRESS;
9558 	}
9559 
9560 	// get the vnode
9561 	struct vnode* vnode;
9562 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9563 	if (status != B_OK)
9564 		return status;
9565 
9566 	// set the new root
9567 	struct io_context* context = get_current_io_context(false);
9568 	mutex_lock(&sIOContextRootLock);
9569 	struct vnode* oldRoot = context->root;
9570 	context->root = vnode;
9571 	mutex_unlock(&sIOContextRootLock);
9572 
9573 	put_vnode(oldRoot);
9574 
9575 	return B_OK;
9576 }
9577 
9578 
9579 int
9580 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9581 	uint32 flags, port_id port, int32 token)
9582 {
9583 	char* query;
9584 
9585 	if (device < 0 || userQuery == NULL || queryLength == 0)
9586 		return B_BAD_VALUE;
9587 
9588 	// this is a safety restriction
9589 	if (queryLength >= 65536)
9590 		return B_NAME_TOO_LONG;
9591 
9592 	query = (char*)malloc(queryLength + 1);
9593 	if (query == NULL)
9594 		return B_NO_MEMORY;
9595 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9596 		free(query);
9597 		return B_BAD_ADDRESS;
9598 	}
9599 
9600 	int fd = query_open(device, query, flags, port, token, false);
9601 
9602 	free(query);
9603 	return fd;
9604 }
9605 
9606 
9607 #include "vfs_request_io.cpp"
9608