xref: /haiku/src/system/kernel/fs/vfs.cpp (revision b08627f310bb2e80bca50176e7a758182384735a)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <vfs.h>
53 #include <vm/vm.h>
54 #include <vm/VMCache.h>
55 #include <wait_for_objects.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 typedef DoublyLinkedList<vnode> VnodeList;
116 
117 /*!	\brief Structure to manage a mounted file system
118 
119 	Note: The root_vnode and root_vnode->covers fields (what others?) are
120 	initialized in fs_mount() and not changed afterwards. That is as soon
121 	as the mount is mounted and it is made sure it won't be unmounted
122 	(e.g. by holding a reference to a vnode of that mount) (read) access
123 	to those fields is always safe, even without additional locking. Morever
124 	while mounted the mount holds a reference to the root_vnode->covers vnode,
125 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
126 	safe if a reference to vnode is held (note that for the root mount
127 	root_vnode->covers is NULL, though).
128 */
129 struct fs_mount {
130 	fs_mount()
131 		:
132 		volume(NULL),
133 		device_name(NULL)
134 	{
135 		recursive_lock_init(&rlock, "mount rlock");
136 	}
137 
138 	~fs_mount()
139 	{
140 		recursive_lock_destroy(&rlock);
141 		free(device_name);
142 
143 		while (volume) {
144 			fs_volume* superVolume = volume->super_volume;
145 
146 			if (volume->file_system != NULL)
147 				put_module(volume->file_system->info.name);
148 
149 			free(volume->file_system_name);
150 			free(volume);
151 			volume = superVolume;
152 		}
153 	}
154 
155 	struct fs_mount* next;
156 	dev_t			id;
157 	fs_volume*		volume;
158 	char*			device_name;
159 	recursive_lock	rlock;	// guards the vnodes list
160 		// TODO: Make this a mutex! It is never used recursively.
161 	struct vnode*	root_vnode;
162 	struct vnode*	covers_vnode;	// immutable
163 	KPartition*		partition;
164 	VnodeList		vnodes;
165 	EntryCache		entry_cache;
166 	bool			unmounting;
167 	bool			owns_file_device;
168 };
169 
170 
171 namespace {
172 
173 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
174 	list_link		link;
175 	team_id			team;
176 	pid_t			session;
177 	off_t			start;
178 	off_t			end;
179 	bool			shared;
180 };
181 
182 typedef DoublyLinkedList<advisory_lock> LockList;
183 
184 } // namespace
185 
186 
187 struct advisory_locking {
188 	sem_id			lock;
189 	sem_id			wait_sem;
190 	LockList		locks;
191 
192 	advisory_locking()
193 		:
194 		lock(-1),
195 		wait_sem(-1)
196 	{
197 	}
198 
199 	~advisory_locking()
200 	{
201 		if (lock >= 0)
202 			delete_sem(lock);
203 		if (wait_sem >= 0)
204 			delete_sem(wait_sem);
205 	}
206 };
207 
208 /*!	\brief Guards sMountsTable.
209 
210 	The holder is allowed to read/write access the sMountsTable.
211 	Manipulation of the fs_mount structures themselves
212 	(and their destruction) requires different locks though.
213 */
214 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
215 
216 /*!	\brief Guards mount/unmount operations.
217 
218 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
219 	That is locking the lock ensures that no FS is mounted/unmounted. In
220 	particular this means that
221 	- sMountsTable will not be modified,
222 	- the fields immutable after initialization of the fs_mount structures in
223 	  sMountsTable will not be modified,
224 
225 	The thread trying to lock the lock must not hold sVnodeLock or
226 	sMountMutex.
227 */
228 static recursive_lock sMountOpLock;
229 
230 /*!	\brief Guards sVnodeTable.
231 
232 	The holder is allowed read/write access to sVnodeTable and to
233 	any unbusy vnode in that table, save to the immutable fields (device, id,
234 	private_node, mount) to which only read-only access is allowed.
235 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
236 	well as the busy, removed, unused flags, and the vnode's type can also be
237 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
238 	locked. Write access to covered_by and covers requires to write lock
239 	sVnodeLock.
240 
241 	The thread trying to acquire the lock must not hold sMountMutex.
242 	You must not hold this lock when calling create_sem(), as this might call
243 	vfs_free_unused_vnodes() and thus cause a deadlock.
244 */
245 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
246 
247 /*!	\brief Guards io_context::root.
248 
249 	Must be held when setting or getting the io_context::root field.
250 	The only operation allowed while holding this lock besides getting or
251 	setting the field is inc_vnode_ref_count() on io_context::root.
252 */
253 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
254 
255 
256 namespace {
257 
258 struct vnode_hash_key {
259 	dev_t	device;
260 	ino_t	vnode;
261 };
262 
263 struct VnodeHash {
264 	typedef vnode_hash_key	KeyType;
265 	typedef	struct vnode	ValueType;
266 
267 #define VHASH(mountid, vnodeid) \
268 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
269 
270 	size_t HashKey(KeyType key) const
271 	{
272 		return VHASH(key.device, key.vnode);
273 	}
274 
275 	size_t Hash(ValueType* vnode) const
276 	{
277 		return VHASH(vnode->device, vnode->id);
278 	}
279 
280 #undef VHASH
281 
282 	bool Compare(KeyType key, ValueType* vnode) const
283 	{
284 		return vnode->device == key.device && vnode->id == key.vnode;
285 	}
286 
287 	ValueType*& GetLink(ValueType* value) const
288 	{
289 		return value->next;
290 	}
291 };
292 
293 typedef BOpenHashTable<VnodeHash> VnodeTable;
294 
295 
296 struct MountHash {
297 	typedef dev_t			KeyType;
298 	typedef	struct fs_mount	ValueType;
299 
300 	size_t HashKey(KeyType key) const
301 	{
302 		return key;
303 	}
304 
305 	size_t Hash(ValueType* mount) const
306 	{
307 		return mount->id;
308 	}
309 
310 	bool Compare(KeyType key, ValueType* mount) const
311 	{
312 		return mount->id == key;
313 	}
314 
315 	ValueType*& GetLink(ValueType* value) const
316 	{
317 		return value->next;
318 	}
319 };
320 
321 typedef BOpenHashTable<MountHash> MountTable;
322 
323 } // namespace
324 
325 
326 #define VNODE_HASH_TABLE_SIZE 1024
327 static VnodeTable* sVnodeTable;
328 static struct vnode* sRoot;
329 
330 #define MOUNTS_HASH_TABLE_SIZE 16
331 static MountTable* sMountsTable;
332 static dev_t sNextMountID = 1;
333 
334 #define MAX_TEMP_IO_VECS 8
335 
336 // How long to wait for busy vnodes (10s)
337 #define BUSY_VNODE_RETRIES 2000
338 #define BUSY_VNODE_DELAY 5000
339 
340 mode_t __gUmask = 022;
341 
342 /* function declarations */
343 
344 static void free_unused_vnodes();
345 
346 // file descriptor operation prototypes
347 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
348 	void* buffer, size_t* _bytes);
349 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
350 	const void* buffer, size_t* _bytes);
351 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
352 	int seekType);
353 static void file_free_fd(struct file_descriptor* descriptor);
354 static status_t file_close(struct file_descriptor* descriptor);
355 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
356 	struct selectsync* sync);
357 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
358 	struct selectsync* sync);
359 static status_t dir_read(struct io_context* context,
360 	struct file_descriptor* descriptor, struct dirent* buffer,
361 	size_t bufferSize, uint32* _count);
362 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
363 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
364 static status_t dir_rewind(struct file_descriptor* descriptor);
365 static void dir_free_fd(struct file_descriptor* descriptor);
366 static status_t dir_close(struct file_descriptor* descriptor);
367 static status_t attr_dir_read(struct io_context* context,
368 	struct file_descriptor* descriptor, struct dirent* buffer,
369 	size_t bufferSize, uint32* _count);
370 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
371 static void attr_dir_free_fd(struct file_descriptor* descriptor);
372 static status_t attr_dir_close(struct file_descriptor* descriptor);
373 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
374 	void* buffer, size_t* _bytes);
375 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
376 	const void* buffer, size_t* _bytes);
377 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
378 	int seekType);
379 static void attr_free_fd(struct file_descriptor* descriptor);
380 static status_t attr_close(struct file_descriptor* descriptor);
381 static status_t attr_read_stat(struct file_descriptor* descriptor,
382 	struct stat* statData);
383 static status_t attr_write_stat(struct file_descriptor* descriptor,
384 	const struct stat* stat, int statMask);
385 static status_t index_dir_read(struct io_context* context,
386 	struct file_descriptor* descriptor, struct dirent* buffer,
387 	size_t bufferSize, uint32* _count);
388 static status_t index_dir_rewind(struct file_descriptor* descriptor);
389 static void index_dir_free_fd(struct file_descriptor* descriptor);
390 static status_t index_dir_close(struct file_descriptor* descriptor);
391 static status_t query_read(struct io_context* context,
392 	struct file_descriptor* descriptor, struct dirent* buffer,
393 	size_t bufferSize, uint32* _count);
394 static status_t query_rewind(struct file_descriptor* descriptor);
395 static void query_free_fd(struct file_descriptor* descriptor);
396 static status_t query_close(struct file_descriptor* descriptor);
397 
398 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
399 	void* buffer, size_t length);
400 static status_t common_read_stat(struct file_descriptor* descriptor,
401 	struct stat* statData);
402 static status_t common_write_stat(struct file_descriptor* descriptor,
403 	const struct stat* statData, int statMask);
404 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
405 	struct stat* stat, bool kernel);
406 
407 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
408 	bool traverseLeafLink, int count, bool kernel,
409 	struct vnode** _vnode, ino_t* _parentID);
410 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
411 	size_t bufferSize, bool kernel);
412 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
413 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
414 static void inc_vnode_ref_count(struct vnode* vnode);
415 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
416 	bool reenter);
417 static inline void put_vnode(struct vnode* vnode);
418 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
419 	bool kernel);
420 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
421 
422 
423 static struct fd_ops sFileOps = {
424 	file_read,
425 	file_write,
426 	file_seek,
427 	common_ioctl,
428 	NULL,		// set_flags
429 	file_select,
430 	file_deselect,
431 	NULL,		// read_dir()
432 	NULL,		// rewind_dir()
433 	common_read_stat,
434 	common_write_stat,
435 	file_close,
436 	file_free_fd
437 };
438 
439 static struct fd_ops sDirectoryOps = {
440 	NULL,		// read()
441 	NULL,		// write()
442 	NULL,		// seek()
443 	common_ioctl,
444 	NULL,		// set_flags
445 	NULL,		// select()
446 	NULL,		// deselect()
447 	dir_read,
448 	dir_rewind,
449 	common_read_stat,
450 	common_write_stat,
451 	dir_close,
452 	dir_free_fd
453 };
454 
455 static struct fd_ops sAttributeDirectoryOps = {
456 	NULL,		// read()
457 	NULL,		// write()
458 	NULL,		// seek()
459 	common_ioctl,
460 	NULL,		// set_flags
461 	NULL,		// select()
462 	NULL,		// deselect()
463 	attr_dir_read,
464 	attr_dir_rewind,
465 	common_read_stat,
466 	common_write_stat,
467 	attr_dir_close,
468 	attr_dir_free_fd
469 };
470 
471 static struct fd_ops sAttributeOps = {
472 	attr_read,
473 	attr_write,
474 	attr_seek,
475 	common_ioctl,
476 	NULL,		// set_flags
477 	NULL,		// select()
478 	NULL,		// deselect()
479 	NULL,		// read_dir()
480 	NULL,		// rewind_dir()
481 	attr_read_stat,
482 	attr_write_stat,
483 	attr_close,
484 	attr_free_fd
485 };
486 
487 static struct fd_ops sIndexDirectoryOps = {
488 	NULL,		// read()
489 	NULL,		// write()
490 	NULL,		// seek()
491 	NULL,		// ioctl()
492 	NULL,		// set_flags
493 	NULL,		// select()
494 	NULL,		// deselect()
495 	index_dir_read,
496 	index_dir_rewind,
497 	NULL,		// read_stat()
498 	NULL,		// write_stat()
499 	index_dir_close,
500 	index_dir_free_fd
501 };
502 
503 #if 0
504 static struct fd_ops sIndexOps = {
505 	NULL,		// read()
506 	NULL,		// write()
507 	NULL,		// seek()
508 	NULL,		// ioctl()
509 	NULL,		// set_flags
510 	NULL,		// select()
511 	NULL,		// deselect()
512 	NULL,		// dir_read()
513 	NULL,		// dir_rewind()
514 	index_read_stat,	// read_stat()
515 	NULL,		// write_stat()
516 	NULL,		// dir_close()
517 	NULL		// free_fd()
518 };
519 #endif
520 
521 static struct fd_ops sQueryOps = {
522 	NULL,		// read()
523 	NULL,		// write()
524 	NULL,		// seek()
525 	NULL,		// ioctl()
526 	NULL,		// set_flags
527 	NULL,		// select()
528 	NULL,		// deselect()
529 	query_read,
530 	query_rewind,
531 	NULL,		// read_stat()
532 	NULL,		// write_stat()
533 	query_close,
534 	query_free_fd
535 };
536 
537 
538 namespace {
539 
540 class VNodePutter {
541 public:
542 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
543 
544 	~VNodePutter()
545 	{
546 		Put();
547 	}
548 
549 	void SetTo(struct vnode* vnode)
550 	{
551 		Put();
552 		fVNode = vnode;
553 	}
554 
555 	void Put()
556 	{
557 		if (fVNode) {
558 			put_vnode(fVNode);
559 			fVNode = NULL;
560 		}
561 	}
562 
563 	struct vnode* Detach()
564 	{
565 		struct vnode* vnode = fVNode;
566 		fVNode = NULL;
567 		return vnode;
568 	}
569 
570 private:
571 	struct vnode* fVNode;
572 };
573 
574 
575 class FDCloser {
576 public:
577 	FDCloser() : fFD(-1), fKernel(true) {}
578 
579 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
580 
581 	~FDCloser()
582 	{
583 		Close();
584 	}
585 
586 	void SetTo(int fd, bool kernel)
587 	{
588 		Close();
589 		fFD = fd;
590 		fKernel = kernel;
591 	}
592 
593 	void Close()
594 	{
595 		if (fFD >= 0) {
596 			if (fKernel)
597 				_kern_close(fFD);
598 			else
599 				_user_close(fFD);
600 			fFD = -1;
601 		}
602 	}
603 
604 	int Detach()
605 	{
606 		int fd = fFD;
607 		fFD = -1;
608 		return fd;
609 	}
610 
611 private:
612 	int		fFD;
613 	bool	fKernel;
614 };
615 
616 } // namespace
617 
618 
619 #if VFS_PAGES_IO_TRACING
620 
621 namespace VFSPagesIOTracing {
622 
623 class PagesIOTraceEntry : public AbstractTraceEntry {
624 protected:
625 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
626 		const generic_io_vec* vecs, uint32 count, uint32 flags,
627 		generic_size_t bytesRequested, status_t status,
628 		generic_size_t bytesTransferred)
629 		:
630 		fVnode(vnode),
631 		fMountID(vnode->mount->id),
632 		fNodeID(vnode->id),
633 		fCookie(cookie),
634 		fPos(pos),
635 		fCount(count),
636 		fFlags(flags),
637 		fBytesRequested(bytesRequested),
638 		fStatus(status),
639 		fBytesTransferred(bytesTransferred)
640 	{
641 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
642 			sizeof(generic_io_vec) * count, false);
643 	}
644 
645 	void AddDump(TraceOutput& out, const char* mode)
646 	{
647 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
648 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
649 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
650 			(uint64)fBytesRequested);
651 
652 		if (fVecs != NULL) {
653 			for (uint32 i = 0; i < fCount; i++) {
654 				if (i > 0)
655 					out.Print(", ");
656 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
657 					(uint64)fVecs[i].length);
658 			}
659 		}
660 
661 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
662 			"transferred: %" B_PRIu64, fFlags, fStatus,
663 			(uint64)fBytesTransferred);
664 	}
665 
666 protected:
667 	struct vnode*	fVnode;
668 	dev_t			fMountID;
669 	ino_t			fNodeID;
670 	void*			fCookie;
671 	off_t			fPos;
672 	generic_io_vec*	fVecs;
673 	uint32			fCount;
674 	uint32			fFlags;
675 	generic_size_t	fBytesRequested;
676 	status_t		fStatus;
677 	generic_size_t	fBytesTransferred;
678 };
679 
680 
681 class ReadPages : public PagesIOTraceEntry {
682 public:
683 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
684 		const generic_io_vec* vecs, uint32 count, uint32 flags,
685 		generic_size_t bytesRequested, status_t status,
686 		generic_size_t bytesTransferred)
687 		:
688 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
689 			bytesRequested, status, bytesTransferred)
690 	{
691 		Initialized();
692 	}
693 
694 	virtual void AddDump(TraceOutput& out)
695 	{
696 		PagesIOTraceEntry::AddDump(out, "read");
697 	}
698 };
699 
700 
701 class WritePages : public PagesIOTraceEntry {
702 public:
703 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
704 		const generic_io_vec* vecs, uint32 count, uint32 flags,
705 		generic_size_t bytesRequested, status_t status,
706 		generic_size_t bytesTransferred)
707 		:
708 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
709 			bytesRequested, status, bytesTransferred)
710 	{
711 		Initialized();
712 	}
713 
714 	virtual void AddDump(TraceOutput& out)
715 	{
716 		PagesIOTraceEntry::AddDump(out, "write");
717 	}
718 };
719 
720 }	// namespace VFSPagesIOTracing
721 
722 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
723 #else
724 #	define TPIO(x) ;
725 #endif	// VFS_PAGES_IO_TRACING
726 
727 
728 /*! Finds the mounted device (the fs_mount structure) with the given ID.
729 	Note, you must hold the gMountMutex lock when you call this function.
730 */
731 static struct fs_mount*
732 find_mount(dev_t id)
733 {
734 	ASSERT_LOCKED_MUTEX(&sMountMutex);
735 
736 	return sMountsTable->Lookup(id);
737 }
738 
739 
740 static status_t
741 get_mount(dev_t id, struct fs_mount** _mount)
742 {
743 	struct fs_mount* mount;
744 
745 	ReadLocker nodeLocker(sVnodeLock);
746 	MutexLocker mountLocker(sMountMutex);
747 
748 	mount = find_mount(id);
749 	if (mount == NULL)
750 		return B_BAD_VALUE;
751 
752 	struct vnode* rootNode = mount->root_vnode;
753 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
754 		|| rootNode->ref_count == 0) {
755 		// might have been called during a mount/unmount operation
756 		return B_BUSY;
757 	}
758 
759 	inc_vnode_ref_count(rootNode);
760 	*_mount = mount;
761 	return B_OK;
762 }
763 
764 
765 static void
766 put_mount(struct fs_mount* mount)
767 {
768 	if (mount)
769 		put_vnode(mount->root_vnode);
770 }
771 
772 
773 /*!	Tries to open the specified file system module.
774 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
775 	Returns a pointer to file system module interface, or NULL if it
776 	could not open the module.
777 */
778 static file_system_module_info*
779 get_file_system(const char* fsName)
780 {
781 	char name[B_FILE_NAME_LENGTH];
782 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
783 		// construct module name if we didn't get one
784 		// (we currently support only one API)
785 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
786 		fsName = NULL;
787 	}
788 
789 	file_system_module_info* info;
790 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
791 		return NULL;
792 
793 	return info;
794 }
795 
796 
797 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
798 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
799 	The name is allocated for you, and you have to free() it when you're
800 	done with it.
801 	Returns NULL if the required memory is not available.
802 */
803 static char*
804 get_file_system_name(const char* fsName)
805 {
806 	const size_t length = strlen("file_systems/");
807 
808 	if (strncmp(fsName, "file_systems/", length)) {
809 		// the name already seems to be the module's file name
810 		return strdup(fsName);
811 	}
812 
813 	fsName += length;
814 	const char* end = strchr(fsName, '/');
815 	if (end == NULL) {
816 		// this doesn't seem to be a valid name, but well...
817 		return strdup(fsName);
818 	}
819 
820 	// cut off the trailing /v1
821 
822 	char* name = (char*)malloc(end + 1 - fsName);
823 	if (name == NULL)
824 		return NULL;
825 
826 	strlcpy(name, fsName, end + 1 - fsName);
827 	return name;
828 }
829 
830 
831 /*!	Accepts a list of file system names separated by a colon, one for each
832 	layer and returns the file system name for the specified layer.
833 	The name is allocated for you, and you have to free() it when you're
834 	done with it.
835 	Returns NULL if the required memory is not available or if there is no
836 	name for the specified layer.
837 */
838 static char*
839 get_file_system_name_for_layer(const char* fsNames, int32 layer)
840 {
841 	while (layer >= 0) {
842 		const char* end = strchr(fsNames, ':');
843 		if (end == NULL) {
844 			if (layer == 0)
845 				return strdup(fsNames);
846 			return NULL;
847 		}
848 
849 		if (layer == 0) {
850 			size_t length = end - fsNames + 1;
851 			char* result = (char*)malloc(length);
852 			strlcpy(result, fsNames, length);
853 			return result;
854 		}
855 
856 		fsNames = end + 1;
857 		layer--;
858 	}
859 
860 	return NULL;
861 }
862 
863 
864 static void
865 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
866 {
867 	RecursiveLocker _(mount->rlock);
868 	mount->vnodes.Add(vnode);
869 }
870 
871 
872 static void
873 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
874 {
875 	RecursiveLocker _(mount->rlock);
876 	mount->vnodes.Remove(vnode);
877 }
878 
879 
880 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
881 
882 	The caller must hold the sVnodeLock (read lock at least).
883 
884 	\param mountID the mount ID.
885 	\param vnodeID the node ID.
886 
887 	\return The vnode structure, if it was found in the hash table, \c NULL
888 			otherwise.
889 */
890 static struct vnode*
891 lookup_vnode(dev_t mountID, ino_t vnodeID)
892 {
893 	struct vnode_hash_key key;
894 
895 	key.device = mountID;
896 	key.vnode = vnodeID;
897 
898 	return sVnodeTable->Lookup(key);
899 }
900 
901 
902 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
903 
904 	This will also wait for BUSY_VNODE_DELAY before returning if one should
905 	still wait for the vnode becoming unbusy.
906 
907 	\return \c true if one should retry, \c false if not.
908 */
909 static bool
910 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
911 {
912 	if (--tries < 0) {
913 		// vnode doesn't seem to become unbusy
914 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
915 			" is not becoming unbusy!\n", mountID, vnodeID);
916 		return false;
917 	}
918 	snooze(BUSY_VNODE_DELAY);
919 	return true;
920 }
921 
922 
923 /*!	Creates a new vnode with the given mount and node ID.
924 	If the node already exists, it is returned instead and no new node is
925 	created. In either case -- but not, if an error occurs -- the function write
926 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
927 	error the lock is not held on return.
928 
929 	\param mountID The mount ID.
930 	\param vnodeID The vnode ID.
931 	\param _vnode Will be set to the new vnode on success.
932 	\param _nodeCreated Will be set to \c true when the returned vnode has
933 		been newly created, \c false when it already existed. Will not be
934 		changed on error.
935 	\return \c B_OK, when the vnode was successfully created and inserted or
936 		a node with the given ID was found, \c B_NO_MEMORY or
937 		\c B_ENTRY_NOT_FOUND on error.
938 */
939 static status_t
940 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
941 	bool& _nodeCreated)
942 {
943 	FUNCTION(("create_new_vnode_and_lock()\n"));
944 
945 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
946 	if (vnode == NULL)
947 		return B_NO_MEMORY;
948 
949 	// initialize basic values
950 	memset(vnode, 0, sizeof(struct vnode));
951 	vnode->device = mountID;
952 	vnode->id = vnodeID;
953 	vnode->ref_count = 1;
954 	vnode->SetBusy(true);
955 
956 	// look up the node -- it might have been added by someone else in the
957 	// meantime
958 	rw_lock_write_lock(&sVnodeLock);
959 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
960 	if (existingVnode != NULL) {
961 		free(vnode);
962 		_vnode = existingVnode;
963 		_nodeCreated = false;
964 		return B_OK;
965 	}
966 
967 	// get the mount structure
968 	mutex_lock(&sMountMutex);
969 	vnode->mount = find_mount(mountID);
970 	if (!vnode->mount || vnode->mount->unmounting) {
971 		mutex_unlock(&sMountMutex);
972 		rw_lock_write_unlock(&sVnodeLock);
973 		free(vnode);
974 		return B_ENTRY_NOT_FOUND;
975 	}
976 
977 	// add the vnode to the mount's node list and the hash table
978 	sVnodeTable->Insert(vnode);
979 	add_vnode_to_mount_list(vnode, vnode->mount);
980 
981 	mutex_unlock(&sMountMutex);
982 
983 	_vnode = vnode;
984 	_nodeCreated = true;
985 
986 	// keep the vnode lock locked
987 	return B_OK;
988 }
989 
990 
991 /*!	Frees the vnode and all resources it has acquired, and removes
992 	it from the vnode hash as well as from its mount structure.
993 	Will also make sure that any cache modifications are written back.
994 */
995 static void
996 free_vnode(struct vnode* vnode, bool reenter)
997 {
998 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
999 		vnode);
1000 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1001 
1002 	// write back any changes in this vnode's cache -- but only
1003 	// if the vnode won't be deleted, in which case the changes
1004 	// will be discarded
1005 
1006 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1007 		FS_CALL_NO_PARAMS(vnode, fsync);
1008 
1009 	// Note: If this vnode has a cache attached, there will still be two
1010 	// references to that cache at this point. The last one belongs to the vnode
1011 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1012 	// cache. Each but the last reference to a cache also includes a reference
1013 	// to the vnode. The file cache, however, released its reference (cf.
1014 	// file_cache_create()), so that this vnode's ref count has the chance to
1015 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1016 	// cache reference to be released, which will also release a (no longer
1017 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1018 	// count, so that it will neither become negative nor 0.
1019 	vnode->ref_count = 2;
1020 
1021 	if (!vnode->IsUnpublished()) {
1022 		if (vnode->IsRemoved())
1023 			FS_CALL(vnode, remove_vnode, reenter);
1024 		else
1025 			FS_CALL(vnode, put_vnode, reenter);
1026 	}
1027 
1028 	// If the vnode has a VMCache attached, make sure that it won't try to get
1029 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1030 	// long as the vnode is busy and in the hash, that won't happen, but as
1031 	// soon as we've removed it from the hash, it could reload the vnode -- with
1032 	// a new cache attached!
1033 	if (vnode->cache != NULL)
1034 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1035 
1036 	// The file system has removed the resources of the vnode now, so we can
1037 	// make it available again (by removing the busy vnode from the hash).
1038 	rw_lock_write_lock(&sVnodeLock);
1039 	sVnodeTable->Remove(vnode);
1040 	rw_lock_write_unlock(&sVnodeLock);
1041 
1042 	// if we have a VMCache attached, remove it
1043 	if (vnode->cache)
1044 		vnode->cache->ReleaseRef();
1045 
1046 	vnode->cache = NULL;
1047 
1048 	remove_vnode_from_mount_list(vnode, vnode->mount);
1049 
1050 	free(vnode);
1051 }
1052 
1053 
1054 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1055 	if the counter dropped to 0.
1056 
1057 	The caller must, of course, own a reference to the vnode to call this
1058 	function.
1059 	The caller must not hold the sVnodeLock or the sMountMutex.
1060 
1061 	\param vnode the vnode.
1062 	\param alwaysFree don't move this vnode into the unused list, but really
1063 		   delete it if possible.
1064 	\param reenter \c true, if this function is called (indirectly) from within
1065 		   a file system. This will be passed to file system hooks only.
1066 	\return \c B_OK, if everything went fine, an error code otherwise.
1067 */
1068 static status_t
1069 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1070 {
1071 	ReadLocker locker(sVnodeLock);
1072 	AutoLocker<Vnode> nodeLocker(vnode);
1073 
1074 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1075 
1076 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1077 
1078 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1079 		vnode->ref_count));
1080 
1081 	if (oldRefCount != 1)
1082 		return B_OK;
1083 
1084 	if (vnode->IsBusy())
1085 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1086 
1087 	bool freeNode = false;
1088 	bool freeUnusedNodes = false;
1089 
1090 	// Just insert the vnode into an unused list if we don't need
1091 	// to delete it
1092 	if (vnode->IsRemoved() || alwaysFree) {
1093 		vnode_to_be_freed(vnode);
1094 		vnode->SetBusy(true);
1095 		freeNode = true;
1096 	} else
1097 		freeUnusedNodes = vnode_unused(vnode);
1098 
1099 	nodeLocker.Unlock();
1100 	locker.Unlock();
1101 
1102 	if (freeNode)
1103 		free_vnode(vnode, reenter);
1104 	else if (freeUnusedNodes)
1105 		free_unused_vnodes();
1106 
1107 	return B_OK;
1108 }
1109 
1110 
1111 /*!	\brief Increments the reference counter of the given vnode.
1112 
1113 	The caller must make sure that the node isn't deleted while this function
1114 	is called. This can be done either:
1115 	- by ensuring that a reference to the node exists and remains in existence,
1116 	  or
1117 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1118 	  or by holding sVnodeLock write locked.
1119 
1120 	In the second case the caller is responsible for dealing with the ref count
1121 	0 -> 1 transition. That is 1. this function must not be invoked when the
1122 	node is busy in the first place and 2. vnode_used() must be called for the
1123 	node.
1124 
1125 	\param vnode the vnode.
1126 */
1127 static void
1128 inc_vnode_ref_count(struct vnode* vnode)
1129 {
1130 	atomic_add(&vnode->ref_count, 1);
1131 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1132 		vnode->ref_count));
1133 }
1134 
1135 
1136 static bool
1137 is_special_node_type(int type)
1138 {
1139 	// at the moment only FIFOs are supported
1140 	return S_ISFIFO(type);
1141 }
1142 
1143 
1144 static status_t
1145 create_special_sub_node(struct vnode* vnode, uint32 flags)
1146 {
1147 	if (S_ISFIFO(vnode->Type()))
1148 		return create_fifo_vnode(vnode->mount->volume, vnode);
1149 
1150 	return B_BAD_VALUE;
1151 }
1152 
1153 
1154 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1155 
1156 	If the node is not yet in memory, it will be loaded.
1157 
1158 	The caller must not hold the sVnodeLock or the sMountMutex.
1159 
1160 	\param mountID the mount ID.
1161 	\param vnodeID the node ID.
1162 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1163 		   retrieved vnode structure shall be written.
1164 	\param reenter \c true, if this function is called (indirectly) from within
1165 		   a file system.
1166 	\return \c B_OK, if everything when fine, an error code otherwise.
1167 */
1168 static status_t
1169 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1170 	int reenter)
1171 {
1172 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1173 		mountID, vnodeID, _vnode));
1174 
1175 	rw_lock_read_lock(&sVnodeLock);
1176 
1177 	int32 tries = BUSY_VNODE_RETRIES;
1178 restart:
1179 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1180 	AutoLocker<Vnode> nodeLocker(vnode);
1181 
1182 	if (vnode && vnode->IsBusy()) {
1183 		nodeLocker.Unlock();
1184 		rw_lock_read_unlock(&sVnodeLock);
1185 		if (!canWait) {
1186 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1187 				mountID, vnodeID);
1188 			return B_BUSY;
1189 		}
1190 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1191 			return B_BUSY;
1192 
1193 		rw_lock_read_lock(&sVnodeLock);
1194 		goto restart;
1195 	}
1196 
1197 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1198 
1199 	status_t status;
1200 
1201 	if (vnode) {
1202 		if (vnode->ref_count == 0) {
1203 			// this vnode has been unused before
1204 			vnode_used(vnode);
1205 		}
1206 		inc_vnode_ref_count(vnode);
1207 
1208 		nodeLocker.Unlock();
1209 		rw_lock_read_unlock(&sVnodeLock);
1210 	} else {
1211 		// we need to create a new vnode and read it in
1212 		rw_lock_read_unlock(&sVnodeLock);
1213 			// unlock -- create_new_vnode_and_lock() write-locks on success
1214 		bool nodeCreated;
1215 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1216 			nodeCreated);
1217 		if (status != B_OK)
1218 			return status;
1219 
1220 		if (!nodeCreated) {
1221 			rw_lock_read_lock(&sVnodeLock);
1222 			rw_lock_write_unlock(&sVnodeLock);
1223 			goto restart;
1224 		}
1225 
1226 		rw_lock_write_unlock(&sVnodeLock);
1227 
1228 		int type;
1229 		uint32 flags;
1230 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1231 			&flags, reenter);
1232 		if (status == B_OK && vnode->private_node == NULL)
1233 			status = B_BAD_VALUE;
1234 
1235 		bool gotNode = status == B_OK;
1236 		bool publishSpecialSubNode = false;
1237 		if (gotNode) {
1238 			vnode->SetType(type);
1239 			publishSpecialSubNode = is_special_node_type(type)
1240 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1241 		}
1242 
1243 		if (gotNode && publishSpecialSubNode)
1244 			status = create_special_sub_node(vnode, flags);
1245 
1246 		if (status != B_OK) {
1247 			if (gotNode)
1248 				FS_CALL(vnode, put_vnode, reenter);
1249 
1250 			rw_lock_write_lock(&sVnodeLock);
1251 			sVnodeTable->Remove(vnode);
1252 			remove_vnode_from_mount_list(vnode, vnode->mount);
1253 			rw_lock_write_unlock(&sVnodeLock);
1254 
1255 			free(vnode);
1256 			return status;
1257 		}
1258 
1259 		rw_lock_read_lock(&sVnodeLock);
1260 		vnode->Lock();
1261 
1262 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1263 		vnode->SetBusy(false);
1264 
1265 		vnode->Unlock();
1266 		rw_lock_read_unlock(&sVnodeLock);
1267 	}
1268 
1269 	TRACE(("get_vnode: returning %p\n", vnode));
1270 
1271 	*_vnode = vnode;
1272 	return B_OK;
1273 }
1274 
1275 
1276 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1277 	if the counter dropped to 0.
1278 
1279 	The caller must, of course, own a reference to the vnode to call this
1280 	function.
1281 	The caller must not hold the sVnodeLock or the sMountMutex.
1282 
1283 	\param vnode the vnode.
1284 */
1285 static inline void
1286 put_vnode(struct vnode* vnode)
1287 {
1288 	dec_vnode_ref_count(vnode, false, false);
1289 }
1290 
1291 
1292 static void
1293 free_unused_vnodes(int32 level)
1294 {
1295 	unused_vnodes_check_started();
1296 
1297 	if (level == B_NO_LOW_RESOURCE) {
1298 		unused_vnodes_check_done();
1299 		return;
1300 	}
1301 
1302 	flush_hot_vnodes();
1303 
1304 	// determine how many nodes to free
1305 	uint32 count = 1;
1306 	{
1307 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1308 
1309 		switch (level) {
1310 			case B_LOW_RESOURCE_NOTE:
1311 				count = sUnusedVnodes / 100;
1312 				break;
1313 			case B_LOW_RESOURCE_WARNING:
1314 				count = sUnusedVnodes / 10;
1315 				break;
1316 			case B_LOW_RESOURCE_CRITICAL:
1317 				count = sUnusedVnodes;
1318 				break;
1319 		}
1320 
1321 		if (count > sUnusedVnodes)
1322 			count = sUnusedVnodes;
1323 	}
1324 
1325 	// Write back the modified pages of some unused vnodes and free them.
1326 
1327 	for (uint32 i = 0; i < count; i++) {
1328 		ReadLocker vnodesReadLocker(sVnodeLock);
1329 
1330 		// get the first node
1331 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1332 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1333 			&sUnusedVnodeList);
1334 		unusedVnodesLocker.Unlock();
1335 
1336 		if (vnode == NULL)
1337 			break;
1338 
1339 		// lock the node
1340 		AutoLocker<Vnode> nodeLocker(vnode);
1341 
1342 		// Check whether the node is still unused -- since we only append to the
1343 		// tail of the unused queue, the vnode should still be at its head.
1344 		// Alternatively we could check its ref count for 0 and its busy flag,
1345 		// but if the node is no longer at the head of the queue, it means it
1346 		// has been touched in the meantime, i.e. it is no longer the least
1347 		// recently used unused vnode and we rather don't free it.
1348 		unusedVnodesLocker.Lock();
1349 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1350 			continue;
1351 		unusedVnodesLocker.Unlock();
1352 
1353 		ASSERT(!vnode->IsBusy());
1354 
1355 		// grab a reference
1356 		inc_vnode_ref_count(vnode);
1357 		vnode_used(vnode);
1358 
1359 		// write back changes and free the node
1360 		nodeLocker.Unlock();
1361 		vnodesReadLocker.Unlock();
1362 
1363 		if (vnode->cache != NULL)
1364 			vnode->cache->WriteModified();
1365 
1366 		dec_vnode_ref_count(vnode, true, false);
1367 			// this should free the vnode when it's still unused
1368 	}
1369 
1370 	unused_vnodes_check_done();
1371 }
1372 
1373 
1374 /*!	Gets the vnode the given vnode is covering.
1375 
1376 	The caller must have \c sVnodeLock read-locked at least.
1377 
1378 	The function returns a reference to the retrieved vnode (if any), the caller
1379 	is responsible to free.
1380 
1381 	\param vnode The vnode whose covered node shall be returned.
1382 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1383 		vnode.
1384 */
1385 static inline Vnode*
1386 get_covered_vnode_locked(Vnode* vnode)
1387 {
1388 	if (Vnode* coveredNode = vnode->covers) {
1389 		while (coveredNode->covers != NULL)
1390 			coveredNode = coveredNode->covers;
1391 
1392 		inc_vnode_ref_count(coveredNode);
1393 		return coveredNode;
1394 	}
1395 
1396 	return NULL;
1397 }
1398 
1399 
1400 /*!	Gets the vnode the given vnode is covering.
1401 
1402 	The caller must not hold \c sVnodeLock. Note that this implies a race
1403 	condition, since the situation can change at any time.
1404 
1405 	The function returns a reference to the retrieved vnode (if any), the caller
1406 	is responsible to free.
1407 
1408 	\param vnode The vnode whose covered node shall be returned.
1409 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1410 		vnode.
1411 */
1412 static inline Vnode*
1413 get_covered_vnode(Vnode* vnode)
1414 {
1415 	if (!vnode->IsCovering())
1416 		return NULL;
1417 
1418 	ReadLocker vnodeReadLocker(sVnodeLock);
1419 	return get_covered_vnode_locked(vnode);
1420 }
1421 
1422 
1423 /*!	Gets the vnode the given vnode is covered by.
1424 
1425 	The caller must have \c sVnodeLock read-locked at least.
1426 
1427 	The function returns a reference to the retrieved vnode (if any), the caller
1428 	is responsible to free.
1429 
1430 	\param vnode The vnode whose covering node shall be returned.
1431 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1432 		any vnode.
1433 */
1434 static Vnode*
1435 get_covering_vnode_locked(Vnode* vnode)
1436 {
1437 	if (Vnode* coveringNode = vnode->covered_by) {
1438 		while (coveringNode->covered_by != NULL)
1439 			coveringNode = coveringNode->covered_by;
1440 
1441 		inc_vnode_ref_count(coveringNode);
1442 		return coveringNode;
1443 	}
1444 
1445 	return NULL;
1446 }
1447 
1448 
1449 /*!	Gets the vnode the given vnode is covered by.
1450 
1451 	The caller must not hold \c sVnodeLock. Note that this implies a race
1452 	condition, since the situation can change at any time.
1453 
1454 	The function returns a reference to the retrieved vnode (if any), the caller
1455 	is responsible to free.
1456 
1457 	\param vnode The vnode whose covering node shall be returned.
1458 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1459 		any vnode.
1460 */
1461 static inline Vnode*
1462 get_covering_vnode(Vnode* vnode)
1463 {
1464 	if (!vnode->IsCovered())
1465 		return NULL;
1466 
1467 	ReadLocker vnodeReadLocker(sVnodeLock);
1468 	return get_covering_vnode_locked(vnode);
1469 }
1470 
1471 
1472 static void
1473 free_unused_vnodes()
1474 {
1475 	free_unused_vnodes(
1476 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1477 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1478 }
1479 
1480 
1481 static void
1482 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1483 {
1484 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1485 
1486 	free_unused_vnodes(level);
1487 }
1488 
1489 
1490 static inline void
1491 put_advisory_locking(struct advisory_locking* locking)
1492 {
1493 	release_sem(locking->lock);
1494 }
1495 
1496 
1497 /*!	Returns the advisory_locking object of the \a vnode in case it
1498 	has one, and locks it.
1499 	You have to call put_advisory_locking() when you're done with
1500 	it.
1501 	Note, you must not have the vnode mutex locked when calling
1502 	this function.
1503 */
1504 static struct advisory_locking*
1505 get_advisory_locking(struct vnode* vnode)
1506 {
1507 	rw_lock_read_lock(&sVnodeLock);
1508 	vnode->Lock();
1509 
1510 	struct advisory_locking* locking = vnode->advisory_locking;
1511 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1512 
1513 	vnode->Unlock();
1514 	rw_lock_read_unlock(&sVnodeLock);
1515 
1516 	if (lock >= 0)
1517 		lock = acquire_sem(lock);
1518 	if (lock < 0) {
1519 		// This means the locking has been deleted in the mean time
1520 		// or had never existed in the first place - otherwise, we
1521 		// would get the lock at some point.
1522 		return NULL;
1523 	}
1524 
1525 	return locking;
1526 }
1527 
1528 
1529 /*!	Creates a locked advisory_locking object, and attaches it to the
1530 	given \a vnode.
1531 	Returns B_OK in case of success - also if the vnode got such an
1532 	object from someone else in the mean time, you'll still get this
1533 	one locked then.
1534 */
1535 static status_t
1536 create_advisory_locking(struct vnode* vnode)
1537 {
1538 	if (vnode == NULL)
1539 		return B_FILE_ERROR;
1540 
1541 	ObjectDeleter<advisory_locking> lockingDeleter;
1542 	struct advisory_locking* locking = NULL;
1543 
1544 	while (get_advisory_locking(vnode) == NULL) {
1545 		// no locking object set on the vnode yet, create one
1546 		if (locking == NULL) {
1547 			locking = new(std::nothrow) advisory_locking;
1548 			if (locking == NULL)
1549 				return B_NO_MEMORY;
1550 			lockingDeleter.SetTo(locking);
1551 
1552 			locking->wait_sem = create_sem(0, "advisory lock");
1553 			if (locking->wait_sem < 0)
1554 				return locking->wait_sem;
1555 
1556 			locking->lock = create_sem(0, "advisory locking");
1557 			if (locking->lock < 0)
1558 				return locking->lock;
1559 		}
1560 
1561 		// set our newly created locking object
1562 		ReadLocker _(sVnodeLock);
1563 		AutoLocker<Vnode> nodeLocker(vnode);
1564 		if (vnode->advisory_locking == NULL) {
1565 			vnode->advisory_locking = locking;
1566 			lockingDeleter.Detach();
1567 			return B_OK;
1568 		}
1569 	}
1570 
1571 	// The vnode already had a locking object. That's just as well.
1572 
1573 	return B_OK;
1574 }
1575 
1576 
1577 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1578 	with the advisory_lock \a lock.
1579 */
1580 static bool
1581 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1582 {
1583 	if (flock == NULL)
1584 		return true;
1585 
1586 	return lock->start <= flock->l_start - 1 + flock->l_len
1587 		&& lock->end >= flock->l_start;
1588 }
1589 
1590 
1591 /*!	Tests whether acquiring a lock would block.
1592 */
1593 static status_t
1594 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1595 {
1596 	flock->l_type = F_UNLCK;
1597 
1598 	struct advisory_locking* locking = get_advisory_locking(vnode);
1599 	if (locking == NULL)
1600 		return B_OK;
1601 
1602 	team_id team = team_get_current_team_id();
1603 
1604 	LockList::Iterator iterator = locking->locks.GetIterator();
1605 	while (iterator.HasNext()) {
1606 		struct advisory_lock* lock = iterator.Next();
1607 
1608 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1609 			// locks do overlap
1610 			if (flock->l_type != F_RDLCK || !lock->shared) {
1611 				// collision
1612 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1613 				flock->l_whence = SEEK_SET;
1614 				flock->l_start = lock->start;
1615 				flock->l_len = lock->end - lock->start + 1;
1616 				flock->l_pid = lock->team;
1617 				break;
1618 			}
1619 		}
1620 	}
1621 
1622 	put_advisory_locking(locking);
1623 	return B_OK;
1624 }
1625 
1626 
1627 /*!	Removes the specified lock, or all locks of the calling team
1628 	if \a flock is NULL.
1629 */
1630 static status_t
1631 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1632 {
1633 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1634 
1635 	struct advisory_locking* locking = get_advisory_locking(vnode);
1636 	if (locking == NULL)
1637 		return B_OK;
1638 
1639 	// TODO: use the thread ID instead??
1640 	team_id team = team_get_current_team_id();
1641 	pid_t session = thread_get_current_thread()->team->session_id;
1642 
1643 	// find matching lock entries
1644 
1645 	LockList::Iterator iterator = locking->locks.GetIterator();
1646 	while (iterator.HasNext()) {
1647 		struct advisory_lock* lock = iterator.Next();
1648 		bool removeLock = false;
1649 
1650 		if (lock->session == session)
1651 			removeLock = true;
1652 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1653 			bool endsBeyond = false;
1654 			bool startsBefore = false;
1655 			if (flock != NULL) {
1656 				startsBefore = lock->start < flock->l_start;
1657 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1658 			}
1659 
1660 			if (!startsBefore && !endsBeyond) {
1661 				// lock is completely contained in flock
1662 				removeLock = true;
1663 			} else if (startsBefore && !endsBeyond) {
1664 				// cut the end of the lock
1665 				lock->end = flock->l_start - 1;
1666 			} else if (!startsBefore && endsBeyond) {
1667 				// cut the start of the lock
1668 				lock->start = flock->l_start + flock->l_len;
1669 			} else {
1670 				// divide the lock into two locks
1671 				struct advisory_lock* secondLock = new advisory_lock;
1672 				if (secondLock == NULL) {
1673 					// TODO: we should probably revert the locks we already
1674 					// changed... (ie. allocate upfront)
1675 					put_advisory_locking(locking);
1676 					return B_NO_MEMORY;
1677 				}
1678 
1679 				lock->end = flock->l_start - 1;
1680 
1681 				secondLock->team = lock->team;
1682 				secondLock->session = lock->session;
1683 				// values must already be normalized when getting here
1684 				secondLock->start = flock->l_start + flock->l_len;
1685 				secondLock->end = lock->end;
1686 				secondLock->shared = lock->shared;
1687 
1688 				locking->locks.Add(secondLock);
1689 			}
1690 		}
1691 
1692 		if (removeLock) {
1693 			// this lock is no longer used
1694 			iterator.Remove();
1695 			free(lock);
1696 		}
1697 	}
1698 
1699 	bool removeLocking = locking->locks.IsEmpty();
1700 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1701 
1702 	put_advisory_locking(locking);
1703 
1704 	if (removeLocking) {
1705 		// We can remove the whole advisory locking structure; it's no
1706 		// longer used
1707 		locking = get_advisory_locking(vnode);
1708 		if (locking != NULL) {
1709 			ReadLocker locker(sVnodeLock);
1710 			AutoLocker<Vnode> nodeLocker(vnode);
1711 
1712 			// the locking could have been changed in the mean time
1713 			if (locking->locks.IsEmpty()) {
1714 				vnode->advisory_locking = NULL;
1715 				nodeLocker.Unlock();
1716 				locker.Unlock();
1717 
1718 				// we've detached the locking from the vnode, so we can
1719 				// safely delete it
1720 				delete locking;
1721 			} else {
1722 				// the locking is in use again
1723 				nodeLocker.Unlock();
1724 				locker.Unlock();
1725 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1726 			}
1727 		}
1728 	}
1729 
1730 	return B_OK;
1731 }
1732 
1733 
1734 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1735 	will wait for the lock to become available, if there are any collisions
1736 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1737 
1738 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1739 	BSD flock() semantics are used, that is, all children can unlock the file
1740 	in question (we even allow parents to remove the lock, though, but that
1741 	seems to be in line to what the BSD's are doing).
1742 */
1743 static status_t
1744 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1745 	bool wait)
1746 {
1747 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1748 		vnode, flock, wait ? "yes" : "no"));
1749 
1750 	bool shared = flock->l_type == F_RDLCK;
1751 	status_t status = B_OK;
1752 
1753 	// TODO: do deadlock detection!
1754 
1755 	struct advisory_locking* locking;
1756 
1757 	while (true) {
1758 		// if this vnode has an advisory_locking structure attached,
1759 		// lock that one and search for any colliding file lock
1760 		status = create_advisory_locking(vnode);
1761 		if (status != B_OK)
1762 			return status;
1763 
1764 		locking = vnode->advisory_locking;
1765 		team_id team = team_get_current_team_id();
1766 		sem_id waitForLock = -1;
1767 
1768 		// test for collisions
1769 		LockList::Iterator iterator = locking->locks.GetIterator();
1770 		while (iterator.HasNext()) {
1771 			struct advisory_lock* lock = iterator.Next();
1772 
1773 			// TODO: locks from the same team might be joinable!
1774 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1775 				// locks do overlap
1776 				if (!shared || !lock->shared) {
1777 					// we need to wait
1778 					waitForLock = locking->wait_sem;
1779 					break;
1780 				}
1781 			}
1782 		}
1783 
1784 		if (waitForLock < 0)
1785 			break;
1786 
1787 		// We need to wait. Do that or fail now, if we've been asked not to.
1788 
1789 		if (!wait) {
1790 			put_advisory_locking(locking);
1791 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1792 		}
1793 
1794 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1795 			B_CAN_INTERRUPT, 0);
1796 		if (status != B_OK && status != B_BAD_SEM_ID)
1797 			return status;
1798 
1799 		// We have been notified, but we need to re-lock the locking object. So
1800 		// go another round...
1801 	}
1802 
1803 	// install new lock
1804 
1805 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1806 		sizeof(struct advisory_lock));
1807 	if (lock == NULL) {
1808 		put_advisory_locking(locking);
1809 		return B_NO_MEMORY;
1810 	}
1811 
1812 	lock->team = team_get_current_team_id();
1813 	lock->session = session;
1814 	// values must already be normalized when getting here
1815 	lock->start = flock->l_start;
1816 	lock->end = flock->l_start - 1 + flock->l_len;
1817 	lock->shared = shared;
1818 
1819 	locking->locks.Add(lock);
1820 	put_advisory_locking(locking);
1821 
1822 	return status;
1823 }
1824 
1825 
1826 /*!	Normalizes the \a flock structure to make it easier to compare the
1827 	structure with others. The l_start and l_len fields are set to absolute
1828 	values according to the l_whence field.
1829 */
1830 static status_t
1831 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1832 {
1833 	switch (flock->l_whence) {
1834 		case SEEK_SET:
1835 			break;
1836 		case SEEK_CUR:
1837 			flock->l_start += descriptor->pos;
1838 			break;
1839 		case SEEK_END:
1840 		{
1841 			struct vnode* vnode = descriptor->u.vnode;
1842 			struct stat stat;
1843 			status_t status;
1844 
1845 			if (!HAS_FS_CALL(vnode, read_stat))
1846 				return B_UNSUPPORTED;
1847 
1848 			status = FS_CALL(vnode, read_stat, &stat);
1849 			if (status != B_OK)
1850 				return status;
1851 
1852 			flock->l_start += stat.st_size;
1853 			break;
1854 		}
1855 		default:
1856 			return B_BAD_VALUE;
1857 	}
1858 
1859 	if (flock->l_start < 0)
1860 		flock->l_start = 0;
1861 	if (flock->l_len == 0)
1862 		flock->l_len = OFF_MAX;
1863 
1864 	// don't let the offset and length overflow
1865 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1866 		flock->l_len = OFF_MAX - flock->l_start;
1867 
1868 	if (flock->l_len < 0) {
1869 		// a negative length reverses the region
1870 		flock->l_start += flock->l_len;
1871 		flock->l_len = -flock->l_len;
1872 	}
1873 
1874 	return B_OK;
1875 }
1876 
1877 
1878 static void
1879 replace_vnode_if_disconnected(struct fs_mount* mount,
1880 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1881 	struct vnode* fallBack, bool lockRootLock)
1882 {
1883 	struct vnode* givenVnode = vnode;
1884 	bool vnodeReplaced = false;
1885 
1886 	ReadLocker vnodeReadLocker(sVnodeLock);
1887 
1888 	if (lockRootLock)
1889 		mutex_lock(&sIOContextRootLock);
1890 
1891 	while (vnode != NULL && vnode->mount == mount
1892 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1893 		if (vnode->covers != NULL) {
1894 			// redirect the vnode to the covered vnode
1895 			vnode = vnode->covers;
1896 		} else
1897 			vnode = fallBack;
1898 
1899 		vnodeReplaced = true;
1900 	}
1901 
1902 	// If we've replaced the node, grab a reference for the new one.
1903 	if (vnodeReplaced && vnode != NULL)
1904 		inc_vnode_ref_count(vnode);
1905 
1906 	if (lockRootLock)
1907 		mutex_unlock(&sIOContextRootLock);
1908 
1909 	vnodeReadLocker.Unlock();
1910 
1911 	if (vnodeReplaced)
1912 		put_vnode(givenVnode);
1913 }
1914 
1915 
1916 /*!	Disconnects all file descriptors that are associated with the
1917 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1918 	\a mount object.
1919 
1920 	Note, after you've called this function, there might still be ongoing
1921 	accesses - they won't be interrupted if they already happened before.
1922 	However, any subsequent access will fail.
1923 
1924 	This is not a cheap function and should be used with care and rarely.
1925 	TODO: there is currently no means to stop a blocking read/write!
1926 */
1927 static void
1928 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1929 	struct vnode* vnodeToDisconnect)
1930 {
1931 	// iterate over all teams and peek into their file descriptors
1932 	TeamListIterator teamIterator;
1933 	while (Team* team = teamIterator.Next()) {
1934 		BReference<Team> teamReference(team, true);
1935 		TeamLocker teamLocker(team);
1936 
1937 		// lock the I/O context
1938 		io_context* context = team->io_context;
1939 		if (context == NULL)
1940 			continue;
1941 		MutexLocker contextLocker(context->io_mutex);
1942 
1943 		teamLocker.Unlock();
1944 
1945 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1946 			sRoot, true);
1947 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1948 			sRoot, false);
1949 
1950 		for (uint32 i = 0; i < context->table_size; i++) {
1951 			if (struct file_descriptor* descriptor = context->fds[i]) {
1952 				inc_fd_ref_count(descriptor);
1953 
1954 				// if this descriptor points at this mount, we
1955 				// need to disconnect it to be able to unmount
1956 				struct vnode* vnode = fd_vnode(descriptor);
1957 				if (vnodeToDisconnect != NULL) {
1958 					if (vnode == vnodeToDisconnect)
1959 						disconnect_fd(descriptor);
1960 				} else if ((vnode != NULL && vnode->mount == mount)
1961 					|| (vnode == NULL && descriptor->u.mount == mount))
1962 					disconnect_fd(descriptor);
1963 
1964 				put_fd(descriptor);
1965 			}
1966 		}
1967 	}
1968 }
1969 
1970 
1971 /*!	\brief Gets the root node of the current IO context.
1972 	If \a kernel is \c true, the kernel IO context will be used.
1973 	The caller obtains a reference to the returned node.
1974 */
1975 struct vnode*
1976 get_root_vnode(bool kernel)
1977 {
1978 	if (!kernel) {
1979 		// Get current working directory from io context
1980 		struct io_context* context = get_current_io_context(kernel);
1981 
1982 		mutex_lock(&sIOContextRootLock);
1983 
1984 		struct vnode* root = context->root;
1985 		if (root != NULL)
1986 			inc_vnode_ref_count(root);
1987 
1988 		mutex_unlock(&sIOContextRootLock);
1989 
1990 		if (root != NULL)
1991 			return root;
1992 
1993 		// That should never happen.
1994 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1995 			"have a root\n", team_get_current_team_id());
1996 	}
1997 
1998 	inc_vnode_ref_count(sRoot);
1999 	return sRoot;
2000 }
2001 
2002 
2003 /*!	\brief Gets the directory path and leaf name for a given path.
2004 
2005 	The supplied \a path is transformed to refer to the directory part of
2006 	the entry identified by the original path, and into the buffer \a filename
2007 	the leaf name of the original entry is written.
2008 	Neither the returned path nor the leaf name can be expected to be
2009 	canonical.
2010 
2011 	\param path The path to be analyzed. Must be able to store at least one
2012 		   additional character.
2013 	\param filename The buffer into which the leaf name will be written.
2014 		   Must be of size B_FILE_NAME_LENGTH at least.
2015 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2016 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2017 		   if the given path name is empty.
2018 */
2019 static status_t
2020 get_dir_path_and_leaf(char* path, char* filename)
2021 {
2022 	if (*path == '\0')
2023 		return B_ENTRY_NOT_FOUND;
2024 
2025 	char* last = strrchr(path, '/');
2026 		// '/' are not allowed in file names!
2027 
2028 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2029 
2030 	if (last == NULL) {
2031 		// this path is single segment with no '/' in it
2032 		// ex. "foo"
2033 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2034 			return B_NAME_TOO_LONG;
2035 
2036 		strcpy(path, ".");
2037 	} else {
2038 		last++;
2039 		if (last[0] == '\0') {
2040 			// special case: the path ends in one or more '/' - remove them
2041 			while (*--last == '/' && last != path);
2042 			last[1] = '\0';
2043 
2044 			if (last == path && last[0] == '/') {
2045 				// This path points to the root of the file system
2046 				strcpy(filename, ".");
2047 				return B_OK;
2048 			}
2049 			for (; last != path && *(last - 1) != '/'; last--);
2050 				// rewind to the start of the leaf before the '/'
2051 		}
2052 
2053 		// normal leaf: replace the leaf portion of the path with a '.'
2054 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2055 			return B_NAME_TOO_LONG;
2056 
2057 		last[0] = '.';
2058 		last[1] = '\0';
2059 	}
2060 	return B_OK;
2061 }
2062 
2063 
2064 static status_t
2065 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2066 	bool traverse, bool kernel, struct vnode** _vnode)
2067 {
2068 	char clonedName[B_FILE_NAME_LENGTH + 1];
2069 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2070 		return B_NAME_TOO_LONG;
2071 
2072 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2073 	struct vnode* directory;
2074 
2075 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2076 	if (status < 0)
2077 		return status;
2078 
2079 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2080 		_vnode, NULL);
2081 }
2082 
2083 
2084 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2085 	and returns the respective vnode.
2086 	On success a reference to the vnode is acquired for the caller.
2087 */
2088 static status_t
2089 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2090 {
2091 	ino_t id;
2092 	bool missing;
2093 
2094 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2095 		return missing ? B_ENTRY_NOT_FOUND
2096 			: get_vnode(dir->device, id, _vnode, true, false);
2097 	}
2098 
2099 	status_t status = FS_CALL(dir, lookup, name, &id);
2100 	if (status != B_OK)
2101 		return status;
2102 
2103 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2104 	// have a reference and just need to look the node up.
2105 	rw_lock_read_lock(&sVnodeLock);
2106 	*_vnode = lookup_vnode(dir->device, id);
2107 	rw_lock_read_unlock(&sVnodeLock);
2108 
2109 	if (*_vnode == NULL) {
2110 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2111 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2112 		return B_ENTRY_NOT_FOUND;
2113 	}
2114 
2115 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2116 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2117 //		(*_vnode)->mount->id, (*_vnode)->id);
2118 
2119 	return B_OK;
2120 }
2121 
2122 
2123 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2124 	\a path must not be NULL.
2125 	If it returns successfully, \a path contains the name of the last path
2126 	component. This function clobbers the buffer pointed to by \a path only
2127 	if it does contain more than one component.
2128 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2129 	it is successful or not!
2130 */
2131 static status_t
2132 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2133 	int count, struct io_context* ioContext, struct vnode** _vnode,
2134 	ino_t* _parentID)
2135 {
2136 	status_t status = B_OK;
2137 	ino_t lastParentID = vnode->id;
2138 
2139 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2140 
2141 	if (path == NULL) {
2142 		put_vnode(vnode);
2143 		return B_BAD_VALUE;
2144 	}
2145 
2146 	if (*path == '\0') {
2147 		put_vnode(vnode);
2148 		return B_ENTRY_NOT_FOUND;
2149 	}
2150 
2151 	while (true) {
2152 		struct vnode* nextVnode;
2153 		char* nextPath;
2154 
2155 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2156 			path));
2157 
2158 		// done?
2159 		if (path[0] == '\0')
2160 			break;
2161 
2162 		// walk to find the next path component ("path" will point to a single
2163 		// path component), and filter out multiple slashes
2164 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2165 				nextPath++);
2166 
2167 		if (*nextPath == '/') {
2168 			*nextPath = '\0';
2169 			do
2170 				nextPath++;
2171 			while (*nextPath == '/');
2172 		}
2173 
2174 		// See if the '..' is at a covering vnode move to the covered
2175 		// vnode so we pass the '..' path to the underlying filesystem.
2176 		// Also prevent breaking the root of the IO context.
2177 		if (strcmp("..", path) == 0) {
2178 			if (vnode == ioContext->root) {
2179 				// Attempted prison break! Keep it contained.
2180 				path = nextPath;
2181 				continue;
2182 			}
2183 
2184 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2185 				nextVnode = coveredVnode;
2186 				put_vnode(vnode);
2187 				vnode = nextVnode;
2188 			}
2189 		}
2190 
2191 		// check if vnode is really a directory
2192 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2193 			status = B_NOT_A_DIRECTORY;
2194 
2195 		// Check if we have the right to search the current directory vnode.
2196 		// If a file system doesn't have the access() function, we assume that
2197 		// searching a directory is always allowed
2198 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2199 			status = FS_CALL(vnode, access, X_OK);
2200 
2201 		// Tell the filesystem to get the vnode of this path component (if we
2202 		// got the permission from the call above)
2203 		if (status == B_OK)
2204 			status = lookup_dir_entry(vnode, path, &nextVnode);
2205 
2206 		if (status != B_OK) {
2207 			put_vnode(vnode);
2208 			return status;
2209 		}
2210 
2211 		// If the new node is a symbolic link, resolve it (if we've been told
2212 		// to do it)
2213 		if (S_ISLNK(nextVnode->Type())
2214 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2215 			size_t bufferSize;
2216 			char* buffer;
2217 
2218 			TRACE(("traverse link\n"));
2219 
2220 			// it's not exactly nice style using goto in this way, but hey,
2221 			// it works :-/
2222 			if (count + 1 > B_MAX_SYMLINKS) {
2223 				status = B_LINK_LIMIT;
2224 				goto resolve_link_error;
2225 			}
2226 
2227 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2228 			if (buffer == NULL) {
2229 				status = B_NO_MEMORY;
2230 				goto resolve_link_error;
2231 			}
2232 
2233 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2234 				bufferSize--;
2235 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2236 				// null-terminate
2237 				if (status >= 0)
2238 					buffer[bufferSize] = '\0';
2239 			} else
2240 				status = B_BAD_VALUE;
2241 
2242 			if (status != B_OK) {
2243 				free(buffer);
2244 
2245 		resolve_link_error:
2246 				put_vnode(vnode);
2247 				put_vnode(nextVnode);
2248 
2249 				return status;
2250 			}
2251 			put_vnode(nextVnode);
2252 
2253 			// Check if we start from the root directory or the current
2254 			// directory ("vnode" still points to that one).
2255 			// Cut off all leading slashes if it's the root directory
2256 			path = buffer;
2257 			bool absoluteSymlink = false;
2258 			if (path[0] == '/') {
2259 				// we don't need the old directory anymore
2260 				put_vnode(vnode);
2261 
2262 				while (*++path == '/')
2263 					;
2264 
2265 				mutex_lock(&sIOContextRootLock);
2266 				vnode = ioContext->root;
2267 				inc_vnode_ref_count(vnode);
2268 				mutex_unlock(&sIOContextRootLock);
2269 
2270 				absoluteSymlink = true;
2271 			}
2272 
2273 			inc_vnode_ref_count(vnode);
2274 				// balance the next recursion - we will decrement the
2275 				// ref_count of the vnode, no matter if we succeeded or not
2276 
2277 			if (absoluteSymlink && *path == '\0') {
2278 				// symlink was just "/"
2279 				nextVnode = vnode;
2280 			} else {
2281 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2282 					ioContext, &nextVnode, &lastParentID);
2283 			}
2284 
2285 			free(buffer);
2286 
2287 			if (status != B_OK) {
2288 				put_vnode(vnode);
2289 				return status;
2290 			}
2291 		} else
2292 			lastParentID = vnode->id;
2293 
2294 		// decrease the ref count on the old dir we just looked up into
2295 		put_vnode(vnode);
2296 
2297 		path = nextPath;
2298 		vnode = nextVnode;
2299 
2300 		// see if we hit a covered node
2301 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2302 			put_vnode(vnode);
2303 			vnode = coveringNode;
2304 		}
2305 	}
2306 
2307 	*_vnode = vnode;
2308 	if (_parentID)
2309 		*_parentID = lastParentID;
2310 
2311 	return B_OK;
2312 }
2313 
2314 
2315 static status_t
2316 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2317 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2318 {
2319 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2320 		get_current_io_context(kernel), _vnode, _parentID);
2321 }
2322 
2323 
2324 static status_t
2325 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2326 	ino_t* _parentID, bool kernel)
2327 {
2328 	struct vnode* start = NULL;
2329 
2330 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2331 
2332 	if (!path)
2333 		return B_BAD_VALUE;
2334 
2335 	if (*path == '\0')
2336 		return B_ENTRY_NOT_FOUND;
2337 
2338 	// figure out if we need to start at root or at cwd
2339 	if (*path == '/') {
2340 		if (sRoot == NULL) {
2341 			// we're a bit early, aren't we?
2342 			return B_ERROR;
2343 		}
2344 
2345 		while (*++path == '/')
2346 			;
2347 		start = get_root_vnode(kernel);
2348 
2349 		if (*path == '\0') {
2350 			*_vnode = start;
2351 			return B_OK;
2352 		}
2353 
2354 	} else {
2355 		struct io_context* context = get_current_io_context(kernel);
2356 
2357 		mutex_lock(&context->io_mutex);
2358 		start = context->cwd;
2359 		if (start != NULL)
2360 			inc_vnode_ref_count(start);
2361 		mutex_unlock(&context->io_mutex);
2362 
2363 		if (start == NULL)
2364 			return B_ERROR;
2365 	}
2366 
2367 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2368 		_parentID);
2369 }
2370 
2371 
2372 /*! Returns the vnode in the next to last segment of the path, and returns
2373 	the last portion in filename.
2374 	The path buffer must be able to store at least one additional character.
2375 */
2376 static status_t
2377 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2378 	bool kernel)
2379 {
2380 	status_t status = get_dir_path_and_leaf(path, filename);
2381 	if (status != B_OK)
2382 		return status;
2383 
2384 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2385 }
2386 
2387 
2388 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2389 		   to by a FD + path pair.
2390 
2391 	\a path must be given in either case. \a fd might be omitted, in which
2392 	case \a path is either an absolute path or one relative to the current
2393 	directory. If both a supplied and \a path is relative it is reckoned off
2394 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2395 	ignored.
2396 
2397 	The caller has the responsibility to call put_vnode() on the returned
2398 	directory vnode.
2399 
2400 	\param fd The FD. May be < 0.
2401 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2402 	       is modified by this function. It must have at least room for a
2403 	       string one character longer than the path it contains.
2404 	\param _vnode A pointer to a variable the directory vnode shall be written
2405 		   into.
2406 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2407 		   the leaf name of the specified entry will be written.
2408 	\param kernel \c true, if invoked from inside the kernel, \c false if
2409 		   invoked from userland.
2410 	\return \c B_OK, if everything went fine, another error code otherwise.
2411 */
2412 static status_t
2413 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2414 	char* filename, bool kernel)
2415 {
2416 	if (!path)
2417 		return B_BAD_VALUE;
2418 	if (*path == '\0')
2419 		return B_ENTRY_NOT_FOUND;
2420 	if (fd < 0)
2421 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2422 
2423 	status_t status = get_dir_path_and_leaf(path, filename);
2424 	if (status != B_OK)
2425 		return status;
2426 
2427 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2428 }
2429 
2430 
2431 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2432 		   to by a vnode + path pair.
2433 
2434 	\a path must be given in either case. \a vnode might be omitted, in which
2435 	case \a path is either an absolute path or one relative to the current
2436 	directory. If both a supplied and \a path is relative it is reckoned off
2437 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2438 	ignored.
2439 
2440 	The caller has the responsibility to call put_vnode() on the returned
2441 	directory vnode.
2442 
2443 	\param vnode The vnode. May be \c NULL.
2444 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2445 	       is modified by this function. It must have at least room for a
2446 	       string one character longer than the path it contains.
2447 	\param _vnode A pointer to a variable the directory vnode shall be written
2448 		   into.
2449 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2450 		   the leaf name of the specified entry will be written.
2451 	\param kernel \c true, if invoked from inside the kernel, \c false if
2452 		   invoked from userland.
2453 	\return \c B_OK, if everything went fine, another error code otherwise.
2454 */
2455 static status_t
2456 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2457 	struct vnode** _vnode, char* filename, bool kernel)
2458 {
2459 	if (!path)
2460 		return B_BAD_VALUE;
2461 	if (*path == '\0')
2462 		return B_ENTRY_NOT_FOUND;
2463 	if (vnode == NULL || path[0] == '/')
2464 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2465 
2466 	status_t status = get_dir_path_and_leaf(path, filename);
2467 	if (status != B_OK)
2468 		return status;
2469 
2470 	inc_vnode_ref_count(vnode);
2471 		// vnode_path_to_vnode() always decrements the ref count
2472 
2473 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2474 }
2475 
2476 
2477 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2478 */
2479 static status_t
2480 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2481 	size_t bufferSize, struct io_context* ioContext)
2482 {
2483 	if (bufferSize < sizeof(struct dirent))
2484 		return B_BAD_VALUE;
2485 
2486 	// See if the vnode is covering another vnode and move to the covered
2487 	// vnode so we get the underlying file system
2488 	VNodePutter vnodePutter;
2489 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2490 		vnode = coveredVnode;
2491 		vnodePutter.SetTo(vnode);
2492 	}
2493 
2494 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2495 		// The FS supports getting the name of a vnode.
2496 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2497 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2498 			return B_OK;
2499 	}
2500 
2501 	// The FS doesn't support getting the name of a vnode. So we search the
2502 	// parent directory for the vnode, if the caller let us.
2503 
2504 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2505 		return B_UNSUPPORTED;
2506 
2507 	void* cookie;
2508 
2509 	status_t status = FS_CALL(parent, open_dir, &cookie);
2510 	if (status >= B_OK) {
2511 		while (true) {
2512 			uint32 num = 1;
2513 			// We use the FS hook directly instead of dir_read(), since we don't
2514 			// want the entries to be fixed. We have already resolved vnode to
2515 			// the covered node.
2516 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2517 				&num);
2518 			if (status != B_OK)
2519 				break;
2520 			if (num == 0) {
2521 				status = B_ENTRY_NOT_FOUND;
2522 				break;
2523 			}
2524 
2525 			if (vnode->id == buffer->d_ino) {
2526 				// found correct entry!
2527 				break;
2528 			}
2529 		}
2530 
2531 		FS_CALL(parent, close_dir, cookie);
2532 		FS_CALL(parent, free_dir_cookie, cookie);
2533 	}
2534 	return status;
2535 }
2536 
2537 
2538 static status_t
2539 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2540 	size_t nameSize, bool kernel)
2541 {
2542 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2543 	struct dirent* dirent = (struct dirent*)buffer;
2544 
2545 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2546 		get_current_io_context(kernel));
2547 	if (status != B_OK)
2548 		return status;
2549 
2550 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2551 		return B_BUFFER_OVERFLOW;
2552 
2553 	return B_OK;
2554 }
2555 
2556 
2557 /*!	Gets the full path to a given directory vnode.
2558 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2559 	file system doesn't support this call, it will fall back to iterating
2560 	through the parent directory to get the name of the child.
2561 
2562 	To protect against circular loops, it supports a maximum tree depth
2563 	of 256 levels.
2564 
2565 	Note that the path may not be correct the time this function returns!
2566 	It doesn't use any locking to prevent returning the correct path, as
2567 	paths aren't safe anyway: the path to a file can change at any time.
2568 
2569 	It might be a good idea, though, to check if the returned path exists
2570 	in the calling function (it's not done here because of efficiency)
2571 */
2572 static status_t
2573 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2574 	bool kernel)
2575 {
2576 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2577 
2578 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2579 		return B_BAD_VALUE;
2580 
2581 	if (!S_ISDIR(vnode->Type()))
2582 		return B_NOT_A_DIRECTORY;
2583 
2584 	char* path = buffer;
2585 	int32 insert = bufferSize;
2586 	int32 maxLevel = 256;
2587 	int32 length;
2588 	status_t status = B_OK;
2589 	struct io_context* ioContext = get_current_io_context(kernel);
2590 
2591 	// we don't use get_vnode() here because this call is more
2592 	// efficient and does all we need from get_vnode()
2593 	inc_vnode_ref_count(vnode);
2594 
2595 	path[--insert] = '\0';
2596 		// the path is filled right to left
2597 
2598 	while (true) {
2599 		// If the node is the context's root, bail out. Otherwise resolve mount
2600 		// points.
2601 		if (vnode == ioContext->root)
2602 			break;
2603 
2604 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2605 			put_vnode(vnode);
2606 			vnode = coveredVnode;
2607 		}
2608 
2609 		// lookup the parent vnode
2610 		struct vnode* parentVnode;
2611 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2612 		if (status != B_OK)
2613 			goto out;
2614 
2615 		if (parentVnode == vnode) {
2616 			// The caller apparently got their hands on a node outside of their
2617 			// context's root. Now we've hit the global root.
2618 			put_vnode(parentVnode);
2619 			break;
2620 		}
2621 
2622 		// get the node's name
2623 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2624 			// also used for fs_read_dir()
2625 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2626 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2627 			sizeof(nameBuffer), ioContext);
2628 
2629 		// release the current vnode, we only need its parent from now on
2630 		put_vnode(vnode);
2631 		vnode = parentVnode;
2632 
2633 		if (status != B_OK)
2634 			goto out;
2635 
2636 		// TODO: add an explicit check for loops in about 10 levels to do
2637 		// real loop detection
2638 
2639 		// don't go deeper as 'maxLevel' to prevent circular loops
2640 		if (maxLevel-- < 0) {
2641 			status = B_LINK_LIMIT;
2642 			goto out;
2643 		}
2644 
2645 		// add the name in front of the current path
2646 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2647 		length = strlen(name);
2648 		insert -= length;
2649 		if (insert <= 0) {
2650 			status = B_RESULT_NOT_REPRESENTABLE;
2651 			goto out;
2652 		}
2653 		memcpy(path + insert, name, length);
2654 		path[--insert] = '/';
2655 	}
2656 
2657 	// the root dir will result in an empty path: fix it
2658 	if (path[insert] == '\0')
2659 		path[--insert] = '/';
2660 
2661 	TRACE(("  path is: %s\n", path + insert));
2662 
2663 	// move the path to the start of the buffer
2664 	length = bufferSize - insert;
2665 	memmove(buffer, path + insert, length);
2666 
2667 out:
2668 	put_vnode(vnode);
2669 	return status;
2670 }
2671 
2672 
2673 /*!	Checks the length of every path component, and adds a '.'
2674 	if the path ends in a slash.
2675 	The given path buffer must be able to store at least one
2676 	additional character.
2677 */
2678 static status_t
2679 check_path(char* to)
2680 {
2681 	int32 length = 0;
2682 
2683 	// check length of every path component
2684 
2685 	while (*to) {
2686 		char* begin;
2687 		if (*to == '/')
2688 			to++, length++;
2689 
2690 		begin = to;
2691 		while (*to != '/' && *to)
2692 			to++, length++;
2693 
2694 		if (to - begin > B_FILE_NAME_LENGTH)
2695 			return B_NAME_TOO_LONG;
2696 	}
2697 
2698 	if (length == 0)
2699 		return B_ENTRY_NOT_FOUND;
2700 
2701 	// complete path if there is a slash at the end
2702 
2703 	if (*(to - 1) == '/') {
2704 		if (length > B_PATH_NAME_LENGTH - 2)
2705 			return B_NAME_TOO_LONG;
2706 
2707 		to[0] = '.';
2708 		to[1] = '\0';
2709 	}
2710 
2711 	return B_OK;
2712 }
2713 
2714 
2715 static struct file_descriptor*
2716 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2717 {
2718 	struct file_descriptor* descriptor
2719 		= get_fd(get_current_io_context(kernel), fd);
2720 	if (descriptor == NULL)
2721 		return NULL;
2722 
2723 	struct vnode* vnode = fd_vnode(descriptor);
2724 	if (vnode == NULL) {
2725 		put_fd(descriptor);
2726 		return NULL;
2727 	}
2728 
2729 	// ToDo: when we can close a file descriptor at any point, investigate
2730 	//	if this is still valid to do (accessing the vnode without ref_count
2731 	//	or locking)
2732 	*_vnode = vnode;
2733 	return descriptor;
2734 }
2735 
2736 
2737 static struct vnode*
2738 get_vnode_from_fd(int fd, bool kernel)
2739 {
2740 	struct file_descriptor* descriptor;
2741 	struct vnode* vnode;
2742 
2743 	descriptor = get_fd(get_current_io_context(kernel), fd);
2744 	if (descriptor == NULL)
2745 		return NULL;
2746 
2747 	vnode = fd_vnode(descriptor);
2748 	if (vnode != NULL)
2749 		inc_vnode_ref_count(vnode);
2750 
2751 	put_fd(descriptor);
2752 	return vnode;
2753 }
2754 
2755 
2756 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2757 	only the path will be considered. In this case, the \a path must not be
2758 	NULL.
2759 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2760 	and should be NULL for files.
2761 */
2762 static status_t
2763 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2764 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2765 {
2766 	if (fd < 0 && !path)
2767 		return B_BAD_VALUE;
2768 
2769 	if (path != NULL && *path == '\0')
2770 		return B_ENTRY_NOT_FOUND;
2771 
2772 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2773 		// no FD or absolute path
2774 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2775 	}
2776 
2777 	// FD only, or FD + relative path
2778 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2779 	if (vnode == NULL)
2780 		return B_FILE_ERROR;
2781 
2782 	if (path != NULL) {
2783 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2784 			_vnode, _parentID);
2785 	}
2786 
2787 	// there is no relative path to take into account
2788 
2789 	*_vnode = vnode;
2790 	if (_parentID)
2791 		*_parentID = -1;
2792 
2793 	return B_OK;
2794 }
2795 
2796 
2797 static int
2798 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2799 	void* cookie, int openMode, bool kernel)
2800 {
2801 	struct file_descriptor* descriptor;
2802 	int fd;
2803 
2804 	// If the vnode is locked, we don't allow creating a new file/directory
2805 	// file_descriptor for it
2806 	if (vnode && vnode->mandatory_locked_by != NULL
2807 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2808 		return B_BUSY;
2809 
2810 	descriptor = alloc_fd();
2811 	if (!descriptor)
2812 		return B_NO_MEMORY;
2813 
2814 	if (vnode)
2815 		descriptor->u.vnode = vnode;
2816 	else
2817 		descriptor->u.mount = mount;
2818 	descriptor->cookie = cookie;
2819 
2820 	switch (type) {
2821 		// vnode types
2822 		case FDTYPE_FILE:
2823 			descriptor->ops = &sFileOps;
2824 			break;
2825 		case FDTYPE_DIR:
2826 			descriptor->ops = &sDirectoryOps;
2827 			break;
2828 		case FDTYPE_ATTR:
2829 			descriptor->ops = &sAttributeOps;
2830 			break;
2831 		case FDTYPE_ATTR_DIR:
2832 			descriptor->ops = &sAttributeDirectoryOps;
2833 			break;
2834 
2835 		// mount types
2836 		case FDTYPE_INDEX_DIR:
2837 			descriptor->ops = &sIndexDirectoryOps;
2838 			break;
2839 		case FDTYPE_QUERY:
2840 			descriptor->ops = &sQueryOps;
2841 			break;
2842 
2843 		default:
2844 			panic("get_new_fd() called with unknown type %d\n", type);
2845 			break;
2846 	}
2847 	descriptor->type = type;
2848 	descriptor->open_mode = openMode;
2849 
2850 	io_context* context = get_current_io_context(kernel);
2851 	fd = new_fd(context, descriptor);
2852 	if (fd < 0) {
2853 		free(descriptor);
2854 		return B_NO_MORE_FDS;
2855 	}
2856 
2857 	mutex_lock(&context->io_mutex);
2858 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2859 	mutex_unlock(&context->io_mutex);
2860 
2861 	return fd;
2862 }
2863 
2864 
2865 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2866 	vfs_normalize_path(). See there for more documentation.
2867 */
2868 static status_t
2869 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2870 {
2871 	VNodePutter dirPutter;
2872 	struct vnode* dir = NULL;
2873 	status_t error;
2874 
2875 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2876 		// get dir vnode + leaf name
2877 		struct vnode* nextDir;
2878 		char leaf[B_FILE_NAME_LENGTH];
2879 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2880 		if (error != B_OK)
2881 			return error;
2882 
2883 		dir = nextDir;
2884 		strcpy(path, leaf);
2885 		dirPutter.SetTo(dir);
2886 
2887 		// get file vnode, if we shall resolve links
2888 		bool fileExists = false;
2889 		struct vnode* fileVnode;
2890 		VNodePutter fileVnodePutter;
2891 		if (traverseLink) {
2892 			inc_vnode_ref_count(dir);
2893 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2894 					NULL) == B_OK) {
2895 				fileVnodePutter.SetTo(fileVnode);
2896 				fileExists = true;
2897 			}
2898 		}
2899 
2900 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2901 			// we're done -- construct the path
2902 			bool hasLeaf = true;
2903 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2904 				// special cases "." and ".." -- get the dir, forget the leaf
2905 				inc_vnode_ref_count(dir);
2906 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2907 					&nextDir, NULL);
2908 				if (error != B_OK)
2909 					return error;
2910 				dir = nextDir;
2911 				dirPutter.SetTo(dir);
2912 				hasLeaf = false;
2913 			}
2914 
2915 			// get the directory path
2916 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2917 			if (error != B_OK)
2918 				return error;
2919 
2920 			// append the leaf name
2921 			if (hasLeaf) {
2922 				// insert a directory separator if this is not the file system
2923 				// root
2924 				if ((strcmp(path, "/") != 0
2925 					&& strlcat(path, "/", pathSize) >= pathSize)
2926 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2927 					return B_NAME_TOO_LONG;
2928 				}
2929 			}
2930 
2931 			return B_OK;
2932 		}
2933 
2934 		// read link
2935 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2936 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2937 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2938 			if (error != B_OK)
2939 				return error;
2940 			path[bufferSize] = '\0';
2941 		} else
2942 			return B_BAD_VALUE;
2943 	}
2944 
2945 	return B_LINK_LIMIT;
2946 }
2947 
2948 
2949 static status_t
2950 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2951 	struct io_context* ioContext)
2952 {
2953 	// Make sure the IO context root is not bypassed.
2954 	if (parent == ioContext->root) {
2955 		*_device = parent->device;
2956 		*_node = parent->id;
2957 		return B_OK;
2958 	}
2959 
2960 	inc_vnode_ref_count(parent);
2961 		// vnode_path_to_vnode() puts the node
2962 
2963 	// ".." is guaranteed not to be clobbered by this call
2964 	struct vnode* vnode;
2965 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2966 		ioContext, &vnode, NULL);
2967 	if (status == B_OK) {
2968 		*_device = vnode->device;
2969 		*_node = vnode->id;
2970 		put_vnode(vnode);
2971 	}
2972 
2973 	return status;
2974 }
2975 
2976 
2977 #ifdef ADD_DEBUGGER_COMMANDS
2978 
2979 
2980 static void
2981 _dump_advisory_locking(advisory_locking* locking)
2982 {
2983 	if (locking == NULL)
2984 		return;
2985 
2986 	kprintf("   lock:        %" B_PRId32, locking->lock);
2987 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2988 
2989 	int32 index = 0;
2990 	LockList::Iterator iterator = locking->locks.GetIterator();
2991 	while (iterator.HasNext()) {
2992 		struct advisory_lock* lock = iterator.Next();
2993 
2994 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2995 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2996 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2997 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2998 	}
2999 }
3000 
3001 
3002 static void
3003 _dump_mount(struct fs_mount* mount)
3004 {
3005 	kprintf("MOUNT: %p\n", mount);
3006 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3007 	kprintf(" device_name:   %s\n", mount->device_name);
3008 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3009 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3010 	kprintf(" partition:     %p\n", mount->partition);
3011 	kprintf(" lock:          %p\n", &mount->rlock);
3012 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3013 		mount->owns_file_device ? " owns_file_device" : "");
3014 
3015 	fs_volume* volume = mount->volume;
3016 	while (volume != NULL) {
3017 		kprintf(" volume %p:\n", volume);
3018 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3019 		kprintf("  private_volume:   %p\n", volume->private_volume);
3020 		kprintf("  ops:              %p\n", volume->ops);
3021 		kprintf("  file_system:      %p\n", volume->file_system);
3022 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3023 		volume = volume->super_volume;
3024 	}
3025 
3026 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3027 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3028 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3029 	set_debug_variable("_partition", (addr_t)mount->partition);
3030 }
3031 
3032 
3033 static bool
3034 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3035 	const char* name)
3036 {
3037 	bool insertSlash = buffer[bufferSize] != '\0';
3038 	size_t nameLength = strlen(name);
3039 
3040 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3041 		return false;
3042 
3043 	if (insertSlash)
3044 		buffer[--bufferSize] = '/';
3045 
3046 	bufferSize -= nameLength;
3047 	memcpy(buffer + bufferSize, name, nameLength);
3048 
3049 	return true;
3050 }
3051 
3052 
3053 static bool
3054 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3055 	ino_t nodeID)
3056 {
3057 	if (bufferSize == 0)
3058 		return false;
3059 
3060 	bool insertSlash = buffer[bufferSize] != '\0';
3061 	if (insertSlash)
3062 		buffer[--bufferSize] = '/';
3063 
3064 	size_t size = snprintf(buffer, bufferSize,
3065 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3066 	if (size > bufferSize) {
3067 		if (insertSlash)
3068 			bufferSize++;
3069 		return false;
3070 	}
3071 
3072 	if (size < bufferSize)
3073 		memmove(buffer + bufferSize - size, buffer, size);
3074 
3075 	bufferSize -= size;
3076 	return true;
3077 }
3078 
3079 
3080 static char*
3081 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3082 	bool& _truncated)
3083 {
3084 	// null-terminate the path
3085 	buffer[--bufferSize] = '\0';
3086 
3087 	while (true) {
3088 		while (vnode->covers != NULL)
3089 			vnode = vnode->covers;
3090 
3091 		if (vnode == sRoot) {
3092 			_truncated = bufferSize == 0;
3093 			if (!_truncated)
3094 				buffer[--bufferSize] = '/';
3095 			return buffer + bufferSize;
3096 		}
3097 
3098 		// resolve the name
3099 		ino_t dirID;
3100 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3101 			vnode->id, dirID);
3102 		if (name == NULL) {
3103 			// Failed to resolve the name -- prepend "<dev,node>/".
3104 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3105 				vnode->mount->id, vnode->id);
3106 			return buffer + bufferSize;
3107 		}
3108 
3109 		// prepend the name
3110 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3111 			_truncated = true;
3112 			return buffer + bufferSize;
3113 		}
3114 
3115 		// resolve the directory node
3116 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3117 		if (nextVnode == NULL) {
3118 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3119 				vnode->mount->id, dirID);
3120 			return buffer + bufferSize;
3121 		}
3122 
3123 		vnode = nextVnode;
3124 	}
3125 }
3126 
3127 
3128 static void
3129 _dump_vnode(struct vnode* vnode, bool printPath)
3130 {
3131 	kprintf("VNODE: %p\n", vnode);
3132 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3133 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3134 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3135 	kprintf(" private_node:  %p\n", vnode->private_node);
3136 	kprintf(" mount:         %p\n", vnode->mount);
3137 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3138 	kprintf(" covers:        %p\n", vnode->covers);
3139 	kprintf(" cache:         %p\n", vnode->cache);
3140 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3141 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3142 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3143 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3144 
3145 	_dump_advisory_locking(vnode->advisory_locking);
3146 
3147 	if (printPath) {
3148 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3149 		if (buffer != NULL) {
3150 			bool truncated;
3151 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3152 				B_PATH_NAME_LENGTH, truncated);
3153 			if (path != NULL) {
3154 				kprintf(" path:          ");
3155 				if (truncated)
3156 					kputs("<truncated>/");
3157 				kputs(path);
3158 				kputs("\n");
3159 			} else
3160 				kprintf("Failed to resolve vnode path.\n");
3161 
3162 			debug_free(buffer);
3163 		} else
3164 			kprintf("Failed to allocate memory for constructing the path.\n");
3165 	}
3166 
3167 	set_debug_variable("_node", (addr_t)vnode->private_node);
3168 	set_debug_variable("_mount", (addr_t)vnode->mount);
3169 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3170 	set_debug_variable("_covers", (addr_t)vnode->covers);
3171 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3172 }
3173 
3174 
3175 static int
3176 dump_mount(int argc, char** argv)
3177 {
3178 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3179 		kprintf("usage: %s [id|address]\n", argv[0]);
3180 		return 0;
3181 	}
3182 
3183 	ulong val = parse_expression(argv[1]);
3184 	uint32 id = val;
3185 
3186 	struct fs_mount* mount = sMountsTable->Lookup(id);
3187 	if (mount == NULL) {
3188 		if (IS_USER_ADDRESS(id)) {
3189 			kprintf("fs_mount not found\n");
3190 			return 0;
3191 		}
3192 		mount = (fs_mount*)val;
3193 	}
3194 
3195 	_dump_mount(mount);
3196 	return 0;
3197 }
3198 
3199 
3200 static int
3201 dump_mounts(int argc, char** argv)
3202 {
3203 	if (argc != 1) {
3204 		kprintf("usage: %s\n", argv[0]);
3205 		return 0;
3206 	}
3207 
3208 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3209 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3210 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3211 
3212 	struct fs_mount* mount;
3213 
3214 	MountTable::Iterator iterator(sMountsTable);
3215 	while (iterator.HasNext()) {
3216 		mount = iterator.Next();
3217 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3218 			mount->root_vnode->covers, mount->volume->private_volume,
3219 			mount->volume->file_system_name);
3220 
3221 		fs_volume* volume = mount->volume;
3222 		while (volume->super_volume != NULL) {
3223 			volume = volume->super_volume;
3224 			kprintf("                                     %p %s\n",
3225 				volume->private_volume, volume->file_system_name);
3226 		}
3227 	}
3228 
3229 	return 0;
3230 }
3231 
3232 
3233 static int
3234 dump_vnode(int argc, char** argv)
3235 {
3236 	bool printPath = false;
3237 	int argi = 1;
3238 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3239 		printPath = true;
3240 		argi++;
3241 	}
3242 
3243 	if (argi >= argc || argi + 2 < argc) {
3244 		print_debugger_command_usage(argv[0]);
3245 		return 0;
3246 	}
3247 
3248 	struct vnode* vnode = NULL;
3249 
3250 	if (argi + 1 == argc) {
3251 		vnode = (struct vnode*)parse_expression(argv[argi]);
3252 		if (IS_USER_ADDRESS(vnode)) {
3253 			kprintf("invalid vnode address\n");
3254 			return 0;
3255 		}
3256 		_dump_vnode(vnode, printPath);
3257 		return 0;
3258 	}
3259 
3260 	dev_t device = parse_expression(argv[argi]);
3261 	ino_t id = parse_expression(argv[argi + 1]);
3262 
3263 	VnodeTable::Iterator iterator(sVnodeTable);
3264 	while (iterator.HasNext()) {
3265 		vnode = iterator.Next();
3266 		if (vnode->id != id || vnode->device != device)
3267 			continue;
3268 
3269 		_dump_vnode(vnode, printPath);
3270 	}
3271 
3272 	return 0;
3273 }
3274 
3275 
3276 static int
3277 dump_vnodes(int argc, char** argv)
3278 {
3279 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3280 		kprintf("usage: %s [device]\n", argv[0]);
3281 		return 0;
3282 	}
3283 
3284 	// restrict dumped nodes to a certain device if requested
3285 	dev_t device = parse_expression(argv[1]);
3286 
3287 	struct vnode* vnode;
3288 
3289 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3290 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3291 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3292 
3293 	VnodeTable::Iterator iterator(sVnodeTable);
3294 	while (iterator.HasNext()) {
3295 		vnode = iterator.Next();
3296 		if (vnode->device != device)
3297 			continue;
3298 
3299 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3300 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3301 			vnode->private_node, vnode->advisory_locking,
3302 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3303 			vnode->IsUnpublished() ? "u" : "-");
3304 	}
3305 
3306 	return 0;
3307 }
3308 
3309 
3310 static int
3311 dump_vnode_caches(int argc, char** argv)
3312 {
3313 	struct vnode* vnode;
3314 
3315 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3316 		kprintf("usage: %s [device]\n", argv[0]);
3317 		return 0;
3318 	}
3319 
3320 	// restrict dumped nodes to a certain device if requested
3321 	dev_t device = -1;
3322 	if (argc > 1)
3323 		device = parse_expression(argv[1]);
3324 
3325 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3326 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3327 
3328 	VnodeTable::Iterator iterator(sVnodeTable);
3329 	while (iterator.HasNext()) {
3330 		vnode = iterator.Next();
3331 		if (vnode->cache == NULL)
3332 			continue;
3333 		if (device != -1 && vnode->device != device)
3334 			continue;
3335 
3336 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3337 			vnode, vnode->device, vnode->id, vnode->cache,
3338 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3339 			vnode->cache->page_count);
3340 	}
3341 
3342 	return 0;
3343 }
3344 
3345 
3346 int
3347 dump_io_context(int argc, char** argv)
3348 {
3349 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3350 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3351 		return 0;
3352 	}
3353 
3354 	struct io_context* context = NULL;
3355 
3356 	if (argc > 1) {
3357 		ulong num = parse_expression(argv[1]);
3358 		if (IS_KERNEL_ADDRESS(num))
3359 			context = (struct io_context*)num;
3360 		else {
3361 			Team* team = team_get_team_struct_locked(num);
3362 			if (team == NULL) {
3363 				kprintf("could not find team with ID %lu\n", num);
3364 				return 0;
3365 			}
3366 			context = (struct io_context*)team->io_context;
3367 		}
3368 	} else
3369 		context = get_current_io_context(true);
3370 
3371 	kprintf("I/O CONTEXT: %p\n", context);
3372 	kprintf(" root vnode:\t%p\n", context->root);
3373 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3374 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3375 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3376 
3377 	if (context->num_used_fds) {
3378 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3379 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3380 	}
3381 
3382 	for (uint32 i = 0; i < context->table_size; i++) {
3383 		struct file_descriptor* fd = context->fds[i];
3384 		if (fd == NULL)
3385 			continue;
3386 
3387 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3388 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3389 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3390 			fd->pos, fd->cookie,
3391 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3392 				? "mount" : "vnode",
3393 			fd->u.vnode);
3394 	}
3395 
3396 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3397 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3398 
3399 	set_debug_variable("_cwd", (addr_t)context->cwd);
3400 
3401 	return 0;
3402 }
3403 
3404 
3405 int
3406 dump_vnode_usage(int argc, char** argv)
3407 {
3408 	if (argc != 1) {
3409 		kprintf("usage: %s\n", argv[0]);
3410 		return 0;
3411 	}
3412 
3413 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3414 		sUnusedVnodes, kMaxUnusedVnodes);
3415 
3416 	uint32 count = sVnodeTable->CountElements();
3417 
3418 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3419 		count - sUnusedVnodes);
3420 	return 0;
3421 }
3422 
3423 #endif	// ADD_DEBUGGER_COMMANDS
3424 
3425 
3426 /*!	Clears memory specified by an iovec array.
3427 */
3428 static void
3429 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3430 {
3431 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3432 		size_t length = std::min(vecs[i].iov_len, bytes);
3433 		memset(vecs[i].iov_base, 0, length);
3434 		bytes -= length;
3435 	}
3436 }
3437 
3438 
3439 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3440 	and calls the file system hooks to read/write the request to disk.
3441 */
3442 static status_t
3443 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3444 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3445 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3446 	bool doWrite)
3447 {
3448 	if (fileVecCount == 0) {
3449 		// There are no file vecs at this offset, so we're obviously trying
3450 		// to access the file outside of its bounds
3451 		return B_BAD_VALUE;
3452 	}
3453 
3454 	size_t numBytes = *_numBytes;
3455 	uint32 fileVecIndex;
3456 	size_t vecOffset = *_vecOffset;
3457 	uint32 vecIndex = *_vecIndex;
3458 	status_t status;
3459 	size_t size;
3460 
3461 	if (!doWrite && vecOffset == 0) {
3462 		// now directly read the data from the device
3463 		// the first file_io_vec can be read directly
3464 
3465 		if (fileVecs[0].length < (off_t)numBytes)
3466 			size = fileVecs[0].length;
3467 		else
3468 			size = numBytes;
3469 
3470 		if (fileVecs[0].offset >= 0) {
3471 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3472 				&vecs[vecIndex], vecCount - vecIndex, &size);
3473 		} else {
3474 			// sparse read
3475 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3476 			status = B_OK;
3477 		}
3478 		if (status != B_OK)
3479 			return status;
3480 
3481 		// TODO: this is a work-around for buggy device drivers!
3482 		//	When our own drivers honour the length, we can:
3483 		//	a) also use this direct I/O for writes (otherwise, it would
3484 		//	   overwrite precious data)
3485 		//	b) panic if the term below is true (at least for writes)
3486 		if ((off_t)size > fileVecs[0].length) {
3487 			//dprintf("warning: device driver %p doesn't respect total length "
3488 			//	"in read_pages() call!\n", ref->device);
3489 			size = fileVecs[0].length;
3490 		}
3491 
3492 		ASSERT((off_t)size <= fileVecs[0].length);
3493 
3494 		// If the file portion was contiguous, we're already done now
3495 		if (size == numBytes)
3496 			return B_OK;
3497 
3498 		// if we reached the end of the file, we can return as well
3499 		if ((off_t)size != fileVecs[0].length) {
3500 			*_numBytes = size;
3501 			return B_OK;
3502 		}
3503 
3504 		fileVecIndex = 1;
3505 
3506 		// first, find out where we have to continue in our iovecs
3507 		for (; vecIndex < vecCount; vecIndex++) {
3508 			if (size < vecs[vecIndex].iov_len)
3509 				break;
3510 
3511 			size -= vecs[vecIndex].iov_len;
3512 		}
3513 
3514 		vecOffset = size;
3515 	} else {
3516 		fileVecIndex = 0;
3517 		size = 0;
3518 	}
3519 
3520 	// Too bad, let's process the rest of the file_io_vecs
3521 
3522 	size_t totalSize = size;
3523 	size_t bytesLeft = numBytes - size;
3524 
3525 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3526 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3527 		off_t fileOffset = fileVec.offset;
3528 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3529 
3530 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3531 			fileLeft));
3532 
3533 		// process the complete fileVec
3534 		while (fileLeft > 0) {
3535 			iovec tempVecs[MAX_TEMP_IO_VECS];
3536 			uint32 tempCount = 0;
3537 
3538 			// size tracks how much of what is left of the current fileVec
3539 			// (fileLeft) has been assigned to tempVecs
3540 			size = 0;
3541 
3542 			// assign what is left of the current fileVec to the tempVecs
3543 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3544 					&& tempCount < MAX_TEMP_IO_VECS;) {
3545 				// try to satisfy one iovec per iteration (or as much as
3546 				// possible)
3547 
3548 				// bytes left of the current iovec
3549 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3550 				if (vecLeft == 0) {
3551 					vecOffset = 0;
3552 					vecIndex++;
3553 					continue;
3554 				}
3555 
3556 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3557 					vecIndex, vecOffset, size));
3558 
3559 				// actually available bytes
3560 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3561 
3562 				tempVecs[tempCount].iov_base
3563 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3564 				tempVecs[tempCount].iov_len = tempVecSize;
3565 				tempCount++;
3566 
3567 				size += tempVecSize;
3568 				vecOffset += tempVecSize;
3569 			}
3570 
3571 			size_t bytes = size;
3572 
3573 			if (fileOffset == -1) {
3574 				if (doWrite) {
3575 					panic("sparse write attempt: vnode %p", vnode);
3576 					status = B_IO_ERROR;
3577 				} else {
3578 					// sparse read
3579 					zero_iovecs(tempVecs, tempCount, bytes);
3580 					status = B_OK;
3581 				}
3582 			} else if (doWrite) {
3583 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3584 					tempVecs, tempCount, &bytes);
3585 			} else {
3586 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3587 					tempVecs, tempCount, &bytes);
3588 			}
3589 			if (status != B_OK)
3590 				return status;
3591 
3592 			totalSize += bytes;
3593 			bytesLeft -= size;
3594 			if (fileOffset >= 0)
3595 				fileOffset += size;
3596 			fileLeft -= size;
3597 			//dprintf("-> file left = %Lu\n", fileLeft);
3598 
3599 			if (size != bytes || vecIndex >= vecCount) {
3600 				// there are no more bytes or iovecs, let's bail out
3601 				*_numBytes = totalSize;
3602 				return B_OK;
3603 			}
3604 		}
3605 	}
3606 
3607 	*_vecIndex = vecIndex;
3608 	*_vecOffset = vecOffset;
3609 	*_numBytes = totalSize;
3610 	return B_OK;
3611 }
3612 
3613 
3614 static bool
3615 is_user_in_group(gid_t gid)
3616 {
3617 	if (gid == getegid())
3618 		return true;
3619 
3620 	gid_t groups[NGROUPS_MAX];
3621 	int groupCount = getgroups(NGROUPS_MAX, groups);
3622 	for (int i = 0; i < groupCount; i++) {
3623 		if (gid == groups[i])
3624 			return true;
3625 	}
3626 
3627 	return false;
3628 }
3629 
3630 
3631 static status_t
3632 free_io_context(io_context* context)
3633 {
3634 	uint32 i;
3635 
3636 	TIOC(FreeIOContext(context));
3637 
3638 	if (context->root)
3639 		put_vnode(context->root);
3640 
3641 	if (context->cwd)
3642 		put_vnode(context->cwd);
3643 
3644 	mutex_lock(&context->io_mutex);
3645 
3646 	for (i = 0; i < context->table_size; i++) {
3647 		if (struct file_descriptor* descriptor = context->fds[i]) {
3648 			close_fd(descriptor);
3649 			put_fd(descriptor);
3650 		}
3651 	}
3652 
3653 	mutex_destroy(&context->io_mutex);
3654 
3655 	remove_node_monitors(context);
3656 	free(context->fds);
3657 	free(context);
3658 
3659 	return B_OK;
3660 }
3661 
3662 
3663 static status_t
3664 resize_monitor_table(struct io_context* context, const int newSize)
3665 {
3666 	int	status = B_OK;
3667 
3668 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3669 		return B_BAD_VALUE;
3670 
3671 	mutex_lock(&context->io_mutex);
3672 
3673 	if ((size_t)newSize < context->num_monitors) {
3674 		status = B_BUSY;
3675 		goto out;
3676 	}
3677 	context->max_monitors = newSize;
3678 
3679 out:
3680 	mutex_unlock(&context->io_mutex);
3681 	return status;
3682 }
3683 
3684 
3685 //	#pragma mark - public API for file systems
3686 
3687 
3688 extern "C" status_t
3689 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3690 	fs_vnode_ops* ops)
3691 {
3692 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3693 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3694 
3695 	if (privateNode == NULL)
3696 		return B_BAD_VALUE;
3697 
3698 	int32 tries = BUSY_VNODE_RETRIES;
3699 restart:
3700 	// create the node
3701 	bool nodeCreated;
3702 	struct vnode* vnode;
3703 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3704 		nodeCreated);
3705 	if (status != B_OK)
3706 		return status;
3707 
3708 	WriteLocker nodeLocker(sVnodeLock, true);
3709 		// create_new_vnode_and_lock() has locked for us
3710 
3711 	if (!nodeCreated && vnode->IsBusy()) {
3712 		nodeLocker.Unlock();
3713 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3714 			return B_BUSY;
3715 		goto restart;
3716 	}
3717 
3718 	// file system integrity check:
3719 	// test if the vnode already exists and bail out if this is the case!
3720 	if (!nodeCreated) {
3721 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3722 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3723 			vnode->private_node);
3724 		return B_ERROR;
3725 	}
3726 
3727 	vnode->private_node = privateNode;
3728 	vnode->ops = ops;
3729 	vnode->SetUnpublished(true);
3730 
3731 	TRACE(("returns: %s\n", strerror(status)));
3732 
3733 	return status;
3734 }
3735 
3736 
3737 extern "C" status_t
3738 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3739 	fs_vnode_ops* ops, int type, uint32 flags)
3740 {
3741 	FUNCTION(("publish_vnode()\n"));
3742 
3743 	int32 tries = BUSY_VNODE_RETRIES;
3744 restart:
3745 	WriteLocker locker(sVnodeLock);
3746 
3747 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3748 
3749 	bool nodeCreated = false;
3750 	if (vnode == NULL) {
3751 		if (privateNode == NULL)
3752 			return B_BAD_VALUE;
3753 
3754 		// create the node
3755 		locker.Unlock();
3756 			// create_new_vnode_and_lock() will re-lock for us on success
3757 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3758 			nodeCreated);
3759 		if (status != B_OK)
3760 			return status;
3761 
3762 		locker.SetTo(sVnodeLock, true);
3763 	}
3764 
3765 	if (nodeCreated) {
3766 		vnode->private_node = privateNode;
3767 		vnode->ops = ops;
3768 		vnode->SetUnpublished(true);
3769 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3770 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3771 		// already known, but not published
3772 	} else if (vnode->IsBusy()) {
3773 		locker.Unlock();
3774 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3775 			return B_BUSY;
3776 		goto restart;
3777 	} else
3778 		return B_BAD_VALUE;
3779 
3780 	bool publishSpecialSubNode = false;
3781 
3782 	vnode->SetType(type);
3783 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3784 	publishSpecialSubNode = is_special_node_type(type)
3785 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3786 
3787 	status_t status = B_OK;
3788 
3789 	// create sub vnodes, if necessary
3790 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3791 		locker.Unlock();
3792 
3793 		fs_volume* subVolume = volume;
3794 		if (volume->sub_volume != NULL) {
3795 			while (status == B_OK && subVolume->sub_volume != NULL) {
3796 				subVolume = subVolume->sub_volume;
3797 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3798 					vnode);
3799 			}
3800 		}
3801 
3802 		if (status == B_OK && publishSpecialSubNode)
3803 			status = create_special_sub_node(vnode, flags);
3804 
3805 		if (status != B_OK) {
3806 			// error -- clean up the created sub vnodes
3807 			while (subVolume->super_volume != volume) {
3808 				subVolume = subVolume->super_volume;
3809 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3810 			}
3811 		}
3812 
3813 		if (status == B_OK) {
3814 			ReadLocker vnodesReadLocker(sVnodeLock);
3815 			AutoLocker<Vnode> nodeLocker(vnode);
3816 			vnode->SetBusy(false);
3817 			vnode->SetUnpublished(false);
3818 		} else {
3819 			locker.Lock();
3820 			sVnodeTable->Remove(vnode);
3821 			remove_vnode_from_mount_list(vnode, vnode->mount);
3822 			free(vnode);
3823 		}
3824 	} else {
3825 		// we still hold the write lock -- mark the node unbusy and published
3826 		vnode->SetBusy(false);
3827 		vnode->SetUnpublished(false);
3828 	}
3829 
3830 	TRACE(("returns: %s\n", strerror(status)));
3831 
3832 	return status;
3833 }
3834 
3835 
3836 extern "C" status_t
3837 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3838 {
3839 	struct vnode* vnode;
3840 
3841 	if (volume == NULL)
3842 		return B_BAD_VALUE;
3843 
3844 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3845 	if (status != B_OK)
3846 		return status;
3847 
3848 	// If this is a layered FS, we need to get the node cookie for the requested
3849 	// layer.
3850 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3851 		fs_vnode resolvedNode;
3852 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3853 			&resolvedNode);
3854 		if (status != B_OK) {
3855 			panic("get_vnode(): Failed to get super node for vnode %p, "
3856 				"volume: %p", vnode, volume);
3857 			put_vnode(vnode);
3858 			return status;
3859 		}
3860 
3861 		if (_privateNode != NULL)
3862 			*_privateNode = resolvedNode.private_node;
3863 	} else if (_privateNode != NULL)
3864 		*_privateNode = vnode->private_node;
3865 
3866 	return B_OK;
3867 }
3868 
3869 
3870 extern "C" status_t
3871 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3872 {
3873 	struct vnode* vnode;
3874 
3875 	rw_lock_read_lock(&sVnodeLock);
3876 	vnode = lookup_vnode(volume->id, vnodeID);
3877 	rw_lock_read_unlock(&sVnodeLock);
3878 
3879 	if (vnode == NULL)
3880 		return B_BAD_VALUE;
3881 
3882 	inc_vnode_ref_count(vnode);
3883 	return B_OK;
3884 }
3885 
3886 
3887 extern "C" status_t
3888 put_vnode(fs_volume* volume, ino_t vnodeID)
3889 {
3890 	struct vnode* vnode;
3891 
3892 	rw_lock_read_lock(&sVnodeLock);
3893 	vnode = lookup_vnode(volume->id, vnodeID);
3894 	rw_lock_read_unlock(&sVnodeLock);
3895 
3896 	if (vnode == NULL)
3897 		return B_BAD_VALUE;
3898 
3899 	dec_vnode_ref_count(vnode, false, true);
3900 	return B_OK;
3901 }
3902 
3903 
3904 extern "C" status_t
3905 remove_vnode(fs_volume* volume, ino_t vnodeID)
3906 {
3907 	ReadLocker locker(sVnodeLock);
3908 
3909 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3910 	if (vnode == NULL)
3911 		return B_ENTRY_NOT_FOUND;
3912 
3913 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3914 		// this vnode is in use
3915 		return B_BUSY;
3916 	}
3917 
3918 	vnode->Lock();
3919 
3920 	vnode->SetRemoved(true);
3921 	bool removeUnpublished = false;
3922 
3923 	if (vnode->IsUnpublished()) {
3924 		// prepare the vnode for deletion
3925 		removeUnpublished = true;
3926 		vnode->SetBusy(true);
3927 	}
3928 
3929 	vnode->Unlock();
3930 	locker.Unlock();
3931 
3932 	if (removeUnpublished) {
3933 		// If the vnode hasn't been published yet, we delete it here
3934 		atomic_add(&vnode->ref_count, -1);
3935 		free_vnode(vnode, true);
3936 	}
3937 
3938 	return B_OK;
3939 }
3940 
3941 
3942 extern "C" status_t
3943 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3944 {
3945 	struct vnode* vnode;
3946 
3947 	rw_lock_read_lock(&sVnodeLock);
3948 
3949 	vnode = lookup_vnode(volume->id, vnodeID);
3950 	if (vnode) {
3951 		AutoLocker<Vnode> nodeLocker(vnode);
3952 		vnode->SetRemoved(false);
3953 	}
3954 
3955 	rw_lock_read_unlock(&sVnodeLock);
3956 	return B_OK;
3957 }
3958 
3959 
3960 extern "C" status_t
3961 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3962 {
3963 	ReadLocker _(sVnodeLock);
3964 
3965 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3966 		if (_removed != NULL)
3967 			*_removed = vnode->IsRemoved();
3968 		return B_OK;
3969 	}
3970 
3971 	return B_BAD_VALUE;
3972 }
3973 
3974 
3975 extern "C" fs_volume*
3976 volume_for_vnode(fs_vnode* _vnode)
3977 {
3978 	if (_vnode == NULL)
3979 		return NULL;
3980 
3981 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3982 	return vnode->mount->volume;
3983 }
3984 
3985 
3986 extern "C" status_t
3987 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3988 	uid_t nodeUserID)
3989 {
3990 	// get node permissions
3991 	int userPermissions = (mode & S_IRWXU) >> 6;
3992 	int groupPermissions = (mode & S_IRWXG) >> 3;
3993 	int otherPermissions = mode & S_IRWXO;
3994 
3995 	// get the node permissions for this uid/gid
3996 	int permissions = 0;
3997 	uid_t uid = geteuid();
3998 
3999 	if (uid == 0) {
4000 		// user is root
4001 		// root has always read/write permission, but at least one of the
4002 		// X bits must be set for execute permission
4003 		permissions = userPermissions | groupPermissions | otherPermissions
4004 			| S_IROTH | S_IWOTH;
4005 		if (S_ISDIR(mode))
4006 			permissions |= S_IXOTH;
4007 	} else if (uid == nodeUserID) {
4008 		// user is node owner
4009 		permissions = userPermissions;
4010 	} else if (is_user_in_group(nodeGroupID)) {
4011 		// user is in owning group
4012 		permissions = groupPermissions;
4013 	} else {
4014 		// user is one of the others
4015 		permissions = otherPermissions;
4016 	}
4017 
4018 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4019 }
4020 
4021 
4022 #if 0
4023 extern "C" status_t
4024 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4025 	size_t* _numBytes)
4026 {
4027 	struct file_descriptor* descriptor;
4028 	struct vnode* vnode;
4029 
4030 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4031 	if (descriptor == NULL)
4032 		return B_FILE_ERROR;
4033 
4034 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4035 		count, 0, _numBytes);
4036 
4037 	put_fd(descriptor);
4038 	return status;
4039 }
4040 
4041 
4042 extern "C" status_t
4043 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4044 	size_t* _numBytes)
4045 {
4046 	struct file_descriptor* descriptor;
4047 	struct vnode* vnode;
4048 
4049 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4050 	if (descriptor == NULL)
4051 		return B_FILE_ERROR;
4052 
4053 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4054 		count, 0, _numBytes);
4055 
4056 	put_fd(descriptor);
4057 	return status;
4058 }
4059 #endif
4060 
4061 
4062 extern "C" status_t
4063 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4064 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4065 	size_t* _bytes)
4066 {
4067 	struct file_descriptor* descriptor;
4068 	struct vnode* vnode;
4069 
4070 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4071 	if (descriptor == NULL)
4072 		return B_FILE_ERROR;
4073 
4074 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4075 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4076 		false);
4077 
4078 	put_fd(descriptor);
4079 	return status;
4080 }
4081 
4082 
4083 extern "C" status_t
4084 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4085 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4086 	size_t* _bytes)
4087 {
4088 	struct file_descriptor* descriptor;
4089 	struct vnode* vnode;
4090 
4091 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4092 	if (descriptor == NULL)
4093 		return B_FILE_ERROR;
4094 
4095 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4096 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4097 		true);
4098 
4099 	put_fd(descriptor);
4100 	return status;
4101 }
4102 
4103 
4104 extern "C" status_t
4105 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4106 {
4107 	// lookup mount -- the caller is required to make sure that the mount
4108 	// won't go away
4109 	MutexLocker locker(sMountMutex);
4110 	struct fs_mount* mount = find_mount(mountID);
4111 	if (mount == NULL)
4112 		return B_BAD_VALUE;
4113 	locker.Unlock();
4114 
4115 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4116 }
4117 
4118 
4119 extern "C" status_t
4120 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4121 {
4122 	// lookup mount -- the caller is required to make sure that the mount
4123 	// won't go away
4124 	MutexLocker locker(sMountMutex);
4125 	struct fs_mount* mount = find_mount(mountID);
4126 	if (mount == NULL)
4127 		return B_BAD_VALUE;
4128 	locker.Unlock();
4129 
4130 	return mount->entry_cache.Add(dirID, name, -1, true);
4131 }
4132 
4133 
4134 extern "C" status_t
4135 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4136 {
4137 	// lookup mount -- the caller is required to make sure that the mount
4138 	// won't go away
4139 	MutexLocker locker(sMountMutex);
4140 	struct fs_mount* mount = find_mount(mountID);
4141 	if (mount == NULL)
4142 		return B_BAD_VALUE;
4143 	locker.Unlock();
4144 
4145 	return mount->entry_cache.Remove(dirID, name);
4146 }
4147 
4148 
4149 //	#pragma mark - private VFS API
4150 //	Functions the VFS exports for other parts of the kernel
4151 
4152 
4153 /*! Acquires another reference to the vnode that has to be released
4154 	by calling vfs_put_vnode().
4155 */
4156 void
4157 vfs_acquire_vnode(struct vnode* vnode)
4158 {
4159 	inc_vnode_ref_count(vnode);
4160 }
4161 
4162 
4163 /*! This is currently called from file_cache_create() only.
4164 	It's probably a temporary solution as long as devfs requires that
4165 	fs_read_pages()/fs_write_pages() are called with the standard
4166 	open cookie and not with a device cookie.
4167 	If that's done differently, remove this call; it has no other
4168 	purpose.
4169 */
4170 extern "C" status_t
4171 vfs_get_cookie_from_fd(int fd, void** _cookie)
4172 {
4173 	struct file_descriptor* descriptor;
4174 
4175 	descriptor = get_fd(get_current_io_context(true), fd);
4176 	if (descriptor == NULL)
4177 		return B_FILE_ERROR;
4178 
4179 	*_cookie = descriptor->cookie;
4180 	return B_OK;
4181 }
4182 
4183 
4184 extern "C" status_t
4185 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4186 {
4187 	*vnode = get_vnode_from_fd(fd, kernel);
4188 
4189 	if (*vnode == NULL)
4190 		return B_FILE_ERROR;
4191 
4192 	return B_NO_ERROR;
4193 }
4194 
4195 
4196 extern "C" status_t
4197 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4198 {
4199 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4200 		path, kernel));
4201 
4202 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4203 	if (pathBuffer.InitCheck() != B_OK)
4204 		return B_NO_MEMORY;
4205 
4206 	char* buffer = pathBuffer.LockBuffer();
4207 	strlcpy(buffer, path, pathBuffer.BufferSize());
4208 
4209 	struct vnode* vnode;
4210 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4211 	if (status != B_OK)
4212 		return status;
4213 
4214 	*_vnode = vnode;
4215 	return B_OK;
4216 }
4217 
4218 
4219 extern "C" status_t
4220 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4221 {
4222 	struct vnode* vnode = NULL;
4223 
4224 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4225 	if (status != B_OK)
4226 		return status;
4227 
4228 	*_vnode = vnode;
4229 	return B_OK;
4230 }
4231 
4232 
4233 extern "C" status_t
4234 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4235 	const char* name, struct vnode** _vnode)
4236 {
4237 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4238 }
4239 
4240 
4241 extern "C" void
4242 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4243 {
4244 	*_mountID = vnode->device;
4245 	*_vnodeID = vnode->id;
4246 }
4247 
4248 
4249 /*!
4250 	Helper function abstracting the process of "converting" a given
4251 	vnode-pointer to a fs_vnode-pointer.
4252 	Currently only used in bindfs.
4253 */
4254 extern "C" fs_vnode*
4255 vfs_fsnode_for_vnode(struct vnode* vnode)
4256 {
4257 	return vnode;
4258 }
4259 
4260 
4261 /*!
4262 	Calls fs_open() on the given vnode and returns a new
4263 	file descriptor for it
4264 */
4265 int
4266 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4267 {
4268 	return open_vnode(vnode, openMode, kernel);
4269 }
4270 
4271 
4272 /*!	Looks up a vnode with the given mount and vnode ID.
4273 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4274 	to the node.
4275 	It's currently only be used by file_cache_create().
4276 */
4277 extern "C" status_t
4278 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4279 {
4280 	rw_lock_read_lock(&sVnodeLock);
4281 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4282 	rw_lock_read_unlock(&sVnodeLock);
4283 
4284 	if (vnode == NULL)
4285 		return B_ERROR;
4286 
4287 	*_vnode = vnode;
4288 	return B_OK;
4289 }
4290 
4291 
4292 extern "C" status_t
4293 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4294 	bool traverseLeafLink, bool kernel, void** _node)
4295 {
4296 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4297 		volume, path, kernel));
4298 
4299 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4300 	if (pathBuffer.InitCheck() != B_OK)
4301 		return B_NO_MEMORY;
4302 
4303 	fs_mount* mount;
4304 	status_t status = get_mount(volume->id, &mount);
4305 	if (status != B_OK)
4306 		return status;
4307 
4308 	char* buffer = pathBuffer.LockBuffer();
4309 	strlcpy(buffer, path, pathBuffer.BufferSize());
4310 
4311 	struct vnode* vnode = mount->root_vnode;
4312 
4313 	if (buffer[0] == '/')
4314 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4315 	else {
4316 		inc_vnode_ref_count(vnode);
4317 			// vnode_path_to_vnode() releases a reference to the starting vnode
4318 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4319 			kernel, &vnode, NULL);
4320 	}
4321 
4322 	put_mount(mount);
4323 
4324 	if (status != B_OK)
4325 		return status;
4326 
4327 	if (vnode->device != volume->id) {
4328 		// wrong mount ID - must not gain access on foreign file system nodes
4329 		put_vnode(vnode);
4330 		return B_BAD_VALUE;
4331 	}
4332 
4333 	// Use get_vnode() to resolve the cookie for the right layer.
4334 	status = get_vnode(volume, vnode->id, _node);
4335 	put_vnode(vnode);
4336 
4337 	return status;
4338 }
4339 
4340 
4341 status_t
4342 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4343 	struct stat* stat, bool kernel)
4344 {
4345 	status_t status;
4346 
4347 	if (path != NULL) {
4348 		// path given: get the stat of the node referred to by (fd, path)
4349 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
4350 		if (pathBuffer.InitCheck() != B_OK)
4351 			return B_NO_MEMORY;
4352 
4353 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4354 			traverseLeafLink, stat, kernel);
4355 	} else {
4356 		// no path given: get the FD and use the FD operation
4357 		struct file_descriptor* descriptor
4358 			= get_fd(get_current_io_context(kernel), fd);
4359 		if (descriptor == NULL)
4360 			return B_FILE_ERROR;
4361 
4362 		if (descriptor->ops->fd_read_stat)
4363 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4364 		else
4365 			status = B_UNSUPPORTED;
4366 
4367 		put_fd(descriptor);
4368 	}
4369 
4370 	return status;
4371 }
4372 
4373 
4374 /*!	Finds the full path to the file that contains the module \a moduleName,
4375 	puts it into \a pathBuffer, and returns B_OK for success.
4376 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4377 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4378 	\a pathBuffer is clobbered in any case and must not be relied on if this
4379 	functions returns unsuccessfully.
4380 	\a basePath and \a pathBuffer must not point to the same space.
4381 */
4382 status_t
4383 vfs_get_module_path(const char* basePath, const char* moduleName,
4384 	char* pathBuffer, size_t bufferSize)
4385 {
4386 	struct vnode* dir;
4387 	struct vnode* file;
4388 	status_t status;
4389 	size_t length;
4390 	char* path;
4391 
4392 	if (bufferSize == 0
4393 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4394 		return B_BUFFER_OVERFLOW;
4395 
4396 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4397 	if (status != B_OK)
4398 		return status;
4399 
4400 	// the path buffer had been clobbered by the above call
4401 	length = strlcpy(pathBuffer, basePath, bufferSize);
4402 	if (pathBuffer[length - 1] != '/')
4403 		pathBuffer[length++] = '/';
4404 
4405 	path = pathBuffer + length;
4406 	bufferSize -= length;
4407 
4408 	while (moduleName) {
4409 		char* nextPath = strchr(moduleName, '/');
4410 		if (nextPath == NULL)
4411 			length = strlen(moduleName);
4412 		else {
4413 			length = nextPath - moduleName;
4414 			nextPath++;
4415 		}
4416 
4417 		if (length + 1 >= bufferSize) {
4418 			status = B_BUFFER_OVERFLOW;
4419 			goto err;
4420 		}
4421 
4422 		memcpy(path, moduleName, length);
4423 		path[length] = '\0';
4424 		moduleName = nextPath;
4425 
4426 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4427 		if (status != B_OK) {
4428 			// vnode_path_to_vnode() has already released the reference to dir
4429 			return status;
4430 		}
4431 
4432 		if (S_ISDIR(file->Type())) {
4433 			// goto the next directory
4434 			path[length] = '/';
4435 			path[length + 1] = '\0';
4436 			path += length + 1;
4437 			bufferSize -= length + 1;
4438 
4439 			dir = file;
4440 		} else if (S_ISREG(file->Type())) {
4441 			// it's a file so it should be what we've searched for
4442 			put_vnode(file);
4443 
4444 			return B_OK;
4445 		} else {
4446 			TRACE(("vfs_get_module_path(): something is strange here: "
4447 				"0x%08" B_PRIx32 "...\n", file->Type()));
4448 			status = B_ERROR;
4449 			dir = file;
4450 			goto err;
4451 		}
4452 	}
4453 
4454 	// if we got here, the moduleName just pointed to a directory, not to
4455 	// a real module - what should we do in this case?
4456 	status = B_ENTRY_NOT_FOUND;
4457 
4458 err:
4459 	put_vnode(dir);
4460 	return status;
4461 }
4462 
4463 
4464 /*!	\brief Normalizes a given path.
4465 
4466 	The path must refer to an existing or non-existing entry in an existing
4467 	directory, that is chopping off the leaf component the remaining path must
4468 	refer to an existing directory.
4469 
4470 	The returned will be canonical in that it will be absolute, will not
4471 	contain any "." or ".." components or duplicate occurrences of '/'s,
4472 	and none of the directory components will by symbolic links.
4473 
4474 	Any two paths referring to the same entry, will result in the same
4475 	normalized path (well, that is pretty much the definition of `normalized',
4476 	isn't it :-).
4477 
4478 	\param path The path to be normalized.
4479 	\param buffer The buffer into which the normalized path will be written.
4480 		   May be the same one as \a path.
4481 	\param bufferSize The size of \a buffer.
4482 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4483 	\param kernel \c true, if the IO context of the kernel shall be used,
4484 		   otherwise that of the team this thread belongs to. Only relevant,
4485 		   if the path is relative (to get the CWD).
4486 	\return \c B_OK if everything went fine, another error code otherwise.
4487 */
4488 status_t
4489 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4490 	bool traverseLink, bool kernel)
4491 {
4492 	if (!path || !buffer || bufferSize < 1)
4493 		return B_BAD_VALUE;
4494 
4495 	if (path != buffer) {
4496 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4497 			return B_BUFFER_OVERFLOW;
4498 	}
4499 
4500 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4501 }
4502 
4503 
4504 /*!	\brief Gets the parent of the passed in node.
4505 
4506 	Gets the parent of the passed in node, and correctly resolves covered
4507 	nodes.
4508 */
4509 extern "C" status_t
4510 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4511 {
4512 	return resolve_covered_parent(parent, device, node,
4513 		get_current_io_context(true));
4514 }
4515 
4516 
4517 /*!	\brief Creates a special node in the file system.
4518 
4519 	The caller gets a reference to the newly created node (which is passed
4520 	back through \a _createdVnode) and is responsible for releasing it.
4521 
4522 	\param path The path where to create the entry for the node. Can be \c NULL,
4523 		in which case the node is created without an entry in the root FS -- it
4524 		will automatically be deleted when the last reference has been released.
4525 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4526 		the target file system will just create the node with its standard
4527 		operations. Depending on the type of the node a subnode might be created
4528 		automatically, though.
4529 	\param mode The type and permissions for the node to be created.
4530 	\param flags Flags to be passed to the creating FS.
4531 	\param kernel \c true, if called in the kernel context (relevant only if
4532 		\a path is not \c NULL and not absolute).
4533 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4534 		file system creating the node, with the private data pointer and
4535 		operations for the super node. Can be \c NULL.
4536 	\param _createVnode Pointer to pre-allocated storage where to store the
4537 		pointer to the newly created node.
4538 	\return \c B_OK, if everything went fine, another error code otherwise.
4539 */
4540 status_t
4541 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4542 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4543 	struct vnode** _createdVnode)
4544 {
4545 	struct vnode* dirNode;
4546 	char _leaf[B_FILE_NAME_LENGTH];
4547 	char* leaf = NULL;
4548 
4549 	if (path) {
4550 		// We've got a path. Get the dir vnode and the leaf name.
4551 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4552 		if (tmpPathBuffer.InitCheck() != B_OK)
4553 			return B_NO_MEMORY;
4554 
4555 		char* tmpPath = tmpPathBuffer.LockBuffer();
4556 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4557 			return B_NAME_TOO_LONG;
4558 
4559 		// get the dir vnode and the leaf name
4560 		leaf = _leaf;
4561 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4562 		if (error != B_OK)
4563 			return error;
4564 	} else {
4565 		// No path. Create the node in the root FS.
4566 		dirNode = sRoot;
4567 		inc_vnode_ref_count(dirNode);
4568 	}
4569 
4570 	VNodePutter _(dirNode);
4571 
4572 	// check support for creating special nodes
4573 	if (!HAS_FS_CALL(dirNode, create_special_node))
4574 		return B_UNSUPPORTED;
4575 
4576 	// create the node
4577 	fs_vnode superVnode;
4578 	ino_t nodeID;
4579 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4580 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4581 	if (status != B_OK)
4582 		return status;
4583 
4584 	// lookup the node
4585 	rw_lock_read_lock(&sVnodeLock);
4586 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4587 	rw_lock_read_unlock(&sVnodeLock);
4588 
4589 	if (*_createdVnode == NULL) {
4590 		panic("vfs_create_special_node(): lookup of node failed");
4591 		return B_ERROR;
4592 	}
4593 
4594 	return B_OK;
4595 }
4596 
4597 
4598 extern "C" void
4599 vfs_put_vnode(struct vnode* vnode)
4600 {
4601 	put_vnode(vnode);
4602 }
4603 
4604 
4605 extern "C" status_t
4606 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4607 {
4608 	// Get current working directory from io context
4609 	struct io_context* context = get_current_io_context(false);
4610 	status_t status = B_OK;
4611 
4612 	mutex_lock(&context->io_mutex);
4613 
4614 	if (context->cwd != NULL) {
4615 		*_mountID = context->cwd->device;
4616 		*_vnodeID = context->cwd->id;
4617 	} else
4618 		status = B_ERROR;
4619 
4620 	mutex_unlock(&context->io_mutex);
4621 	return status;
4622 }
4623 
4624 
4625 status_t
4626 vfs_unmount(dev_t mountID, uint32 flags)
4627 {
4628 	return fs_unmount(NULL, mountID, flags, true);
4629 }
4630 
4631 
4632 extern "C" status_t
4633 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4634 {
4635 	struct vnode* vnode;
4636 
4637 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4638 	if (status != B_OK)
4639 		return status;
4640 
4641 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4642 	put_vnode(vnode);
4643 	return B_OK;
4644 }
4645 
4646 
4647 extern "C" void
4648 vfs_free_unused_vnodes(int32 level)
4649 {
4650 	vnode_low_resource_handler(NULL,
4651 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4652 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4653 		level);
4654 }
4655 
4656 
4657 extern "C" bool
4658 vfs_can_page(struct vnode* vnode, void* cookie)
4659 {
4660 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4661 
4662 	if (HAS_FS_CALL(vnode, can_page))
4663 		return FS_CALL(vnode, can_page, cookie);
4664 	return false;
4665 }
4666 
4667 
4668 extern "C" status_t
4669 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4670 	const generic_io_vec* vecs, size_t count, uint32 flags,
4671 	generic_size_t* _numBytes)
4672 {
4673 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4674 		vecs, pos));
4675 
4676 #if VFS_PAGES_IO_TRACING
4677 	generic_size_t bytesRequested = *_numBytes;
4678 #endif
4679 
4680 	IORequest request;
4681 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4682 	if (status == B_OK) {
4683 		status = vfs_vnode_io(vnode, cookie, &request);
4684 		if (status == B_OK)
4685 			status = request.Wait();
4686 		*_numBytes = request.TransferredBytes();
4687 	}
4688 
4689 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4690 		status, *_numBytes));
4691 
4692 	return status;
4693 }
4694 
4695 
4696 extern "C" status_t
4697 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4698 	const generic_io_vec* vecs, size_t count, uint32 flags,
4699 	generic_size_t* _numBytes)
4700 {
4701 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4702 		vecs, pos));
4703 
4704 #if VFS_PAGES_IO_TRACING
4705 	generic_size_t bytesRequested = *_numBytes;
4706 #endif
4707 
4708 	IORequest request;
4709 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4710 	if (status == B_OK) {
4711 		status = vfs_vnode_io(vnode, cookie, &request);
4712 		if (status == B_OK)
4713 			status = request.Wait();
4714 		*_numBytes = request.TransferredBytes();
4715 	}
4716 
4717 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4718 		status, *_numBytes));
4719 
4720 	return status;
4721 }
4722 
4723 
4724 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4725 	created if \a allocate is \c true.
4726 	In case it's successful, it will also grab a reference to the cache
4727 	it returns.
4728 */
4729 extern "C" status_t
4730 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4731 {
4732 	if (vnode->cache != NULL) {
4733 		vnode->cache->AcquireRef();
4734 		*_cache = vnode->cache;
4735 		return B_OK;
4736 	}
4737 
4738 	rw_lock_read_lock(&sVnodeLock);
4739 	vnode->Lock();
4740 
4741 	status_t status = B_OK;
4742 
4743 	// The cache could have been created in the meantime
4744 	if (vnode->cache == NULL) {
4745 		if (allocate) {
4746 			// TODO: actually the vnode needs to be busy already here, or
4747 			//	else this won't work...
4748 			bool wasBusy = vnode->IsBusy();
4749 			vnode->SetBusy(true);
4750 
4751 			vnode->Unlock();
4752 			rw_lock_read_unlock(&sVnodeLock);
4753 
4754 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4755 
4756 			rw_lock_read_lock(&sVnodeLock);
4757 			vnode->Lock();
4758 			vnode->SetBusy(wasBusy);
4759 		} else
4760 			status = B_BAD_VALUE;
4761 	}
4762 
4763 	vnode->Unlock();
4764 	rw_lock_read_unlock(&sVnodeLock);
4765 
4766 	if (status == B_OK) {
4767 		vnode->cache->AcquireRef();
4768 		*_cache = vnode->cache;
4769 	}
4770 
4771 	return status;
4772 }
4773 
4774 
4775 status_t
4776 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4777 	file_io_vec* vecs, size_t* _count)
4778 {
4779 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4780 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4781 
4782 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4783 }
4784 
4785 
4786 status_t
4787 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4788 {
4789 	status_t status = FS_CALL(vnode, read_stat, stat);
4790 
4791 	// fill in the st_dev and st_ino fields
4792 	if (status == B_OK) {
4793 		stat->st_dev = vnode->device;
4794 		stat->st_ino = vnode->id;
4795 		// the rdev field must stay unset for non-special files
4796 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4797 			stat->st_rdev = -1;
4798 	}
4799 
4800 	return status;
4801 }
4802 
4803 
4804 status_t
4805 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4806 {
4807 	struct vnode* vnode;
4808 	status_t status = get_vnode(device, inode, &vnode, true, false);
4809 	if (status != B_OK)
4810 		return status;
4811 
4812 	status = vfs_stat_vnode(vnode, stat);
4813 
4814 	put_vnode(vnode);
4815 	return status;
4816 }
4817 
4818 
4819 status_t
4820 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4821 {
4822 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4823 }
4824 
4825 
4826 status_t
4827 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4828 	bool kernel, char* path, size_t pathLength)
4829 {
4830 	struct vnode* vnode;
4831 	status_t status;
4832 
4833 	// filter invalid leaf names
4834 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4835 		return B_BAD_VALUE;
4836 
4837 	// get the vnode matching the dir's node_ref
4838 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4839 		// special cases "." and "..": we can directly get the vnode of the
4840 		// referenced directory
4841 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4842 		leaf = NULL;
4843 	} else
4844 		status = get_vnode(device, inode, &vnode, true, false);
4845 	if (status != B_OK)
4846 		return status;
4847 
4848 	// get the directory path
4849 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4850 	put_vnode(vnode);
4851 		// we don't need the vnode anymore
4852 	if (status != B_OK)
4853 		return status;
4854 
4855 	// append the leaf name
4856 	if (leaf) {
4857 		// insert a directory separator if this is not the file system root
4858 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4859 				>= pathLength)
4860 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4861 			return B_NAME_TOO_LONG;
4862 		}
4863 	}
4864 
4865 	return B_OK;
4866 }
4867 
4868 
4869 /*!	If the given descriptor locked its vnode, that lock will be released. */
4870 void
4871 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4872 {
4873 	struct vnode* vnode = fd_vnode(descriptor);
4874 
4875 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4876 		vnode->mandatory_locked_by = NULL;
4877 }
4878 
4879 
4880 /*!	Closes all file descriptors of the specified I/O context that
4881 	have the O_CLOEXEC flag set.
4882 */
4883 void
4884 vfs_exec_io_context(io_context* context)
4885 {
4886 	uint32 i;
4887 
4888 	for (i = 0; i < context->table_size; i++) {
4889 		mutex_lock(&context->io_mutex);
4890 
4891 		struct file_descriptor* descriptor = context->fds[i];
4892 		bool remove = false;
4893 
4894 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4895 			context->fds[i] = NULL;
4896 			context->num_used_fds--;
4897 
4898 			remove = true;
4899 		}
4900 
4901 		mutex_unlock(&context->io_mutex);
4902 
4903 		if (remove) {
4904 			close_fd(descriptor);
4905 			put_fd(descriptor);
4906 		}
4907 	}
4908 }
4909 
4910 
4911 /*! Sets up a new io_control structure, and inherits the properties
4912 	of the parent io_control if it is given.
4913 */
4914 io_context*
4915 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4916 {
4917 	io_context* context = (io_context*)malloc(sizeof(io_context));
4918 	if (context == NULL)
4919 		return NULL;
4920 
4921 	TIOC(NewIOContext(context, parentContext));
4922 
4923 	memset(context, 0, sizeof(io_context));
4924 	context->ref_count = 1;
4925 
4926 	MutexLocker parentLocker;
4927 
4928 	size_t tableSize;
4929 	if (parentContext != NULL) {
4930 		parentLocker.SetTo(parentContext->io_mutex, false);
4931 		tableSize = parentContext->table_size;
4932 	} else
4933 		tableSize = DEFAULT_FD_TABLE_SIZE;
4934 
4935 	// allocate space for FDs and their close-on-exec flag
4936 	context->fds = (file_descriptor**)malloc(
4937 		sizeof(struct file_descriptor*) * tableSize
4938 		+ sizeof(struct select_sync*) * tableSize
4939 		+ (tableSize + 7) / 8);
4940 	if (context->fds == NULL) {
4941 		free(context);
4942 		return NULL;
4943 	}
4944 
4945 	context->select_infos = (select_info**)(context->fds + tableSize);
4946 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4947 
4948 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4949 		+ sizeof(struct select_sync*) * tableSize
4950 		+ (tableSize + 7) / 8);
4951 
4952 	mutex_init(&context->io_mutex, "I/O context");
4953 
4954 	// Copy all parent file descriptors
4955 
4956 	if (parentContext != NULL) {
4957 		size_t i;
4958 
4959 		mutex_lock(&sIOContextRootLock);
4960 		context->root = parentContext->root;
4961 		if (context->root)
4962 			inc_vnode_ref_count(context->root);
4963 		mutex_unlock(&sIOContextRootLock);
4964 
4965 		context->cwd = parentContext->cwd;
4966 		if (context->cwd)
4967 			inc_vnode_ref_count(context->cwd);
4968 
4969 		if (parentContext->inherit_fds) {
4970 			for (i = 0; i < tableSize; i++) {
4971 				struct file_descriptor* descriptor = parentContext->fds[i];
4972 
4973 				if (descriptor != NULL
4974 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4975 					bool closeOnExec = fd_close_on_exec(parentContext, i);
4976 					if (closeOnExec && purgeCloseOnExec)
4977 						continue;
4978 
4979 					TFD(InheritFD(context, i, descriptor, parentContext));
4980 
4981 					context->fds[i] = descriptor;
4982 					context->num_used_fds++;
4983 					atomic_add(&descriptor->ref_count, 1);
4984 					atomic_add(&descriptor->open_count, 1);
4985 
4986 					if (closeOnExec)
4987 						fd_set_close_on_exec(context, i, true);
4988 				}
4989 			}
4990 		}
4991 
4992 		parentLocker.Unlock();
4993 	} else {
4994 		context->root = sRoot;
4995 		context->cwd = sRoot;
4996 
4997 		if (context->root)
4998 			inc_vnode_ref_count(context->root);
4999 
5000 		if (context->cwd)
5001 			inc_vnode_ref_count(context->cwd);
5002 	}
5003 
5004 	context->table_size = tableSize;
5005 	context->inherit_fds = parentContext != NULL;
5006 
5007 	list_init(&context->node_monitors);
5008 	context->max_monitors = DEFAULT_NODE_MONITORS;
5009 
5010 	return context;
5011 }
5012 
5013 
5014 void
5015 vfs_get_io_context(io_context* context)
5016 {
5017 	atomic_add(&context->ref_count, 1);
5018 }
5019 
5020 
5021 void
5022 vfs_put_io_context(io_context* context)
5023 {
5024 	if (atomic_add(&context->ref_count, -1) == 1)
5025 		free_io_context(context);
5026 }
5027 
5028 
5029 status_t
5030 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5031 {
5032 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5033 		return B_BAD_VALUE;
5034 
5035 	TIOC(ResizeIOContext(context, newSize));
5036 
5037 	MutexLocker _(context->io_mutex);
5038 
5039 	uint32 oldSize = context->table_size;
5040 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5041 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5042 
5043 	// If the tables shrink, make sure none of the fds being dropped are in use.
5044 	if (newSize < oldSize) {
5045 		for (uint32 i = oldSize; i-- > newSize;) {
5046 			if (context->fds[i])
5047 				return B_BUSY;
5048 		}
5049 	}
5050 
5051 	// store pointers to the old tables
5052 	file_descriptor** oldFDs = context->fds;
5053 	select_info** oldSelectInfos = context->select_infos;
5054 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5055 
5056 	// allocate new tables
5057 	file_descriptor** newFDs = (file_descriptor**)malloc(
5058 		sizeof(struct file_descriptor*) * newSize
5059 		+ sizeof(struct select_sync*) * newSize
5060 		+ newCloseOnExitBitmapSize);
5061 	if (newFDs == NULL)
5062 		return B_NO_MEMORY;
5063 
5064 	context->fds = newFDs;
5065 	context->select_infos = (select_info**)(context->fds + newSize);
5066 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5067 	context->table_size = newSize;
5068 
5069 	// copy entries from old tables
5070 	uint32 toCopy = min_c(oldSize, newSize);
5071 
5072 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5073 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5074 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5075 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5076 
5077 	// clear additional entries, if the tables grow
5078 	if (newSize > oldSize) {
5079 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5080 		memset(context->select_infos + oldSize, 0,
5081 			sizeof(void*) * (newSize - oldSize));
5082 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5083 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5084 	}
5085 
5086 	free(oldFDs);
5087 
5088 	return B_OK;
5089 }
5090 
5091 
5092 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5093 
5094 	Given an arbitrary vnode (identified by mount and node ID), the function
5095 	checks, whether the vnode is covered by another vnode. If it is, the
5096 	function returns the mount and node ID of the covering vnode. Otherwise
5097 	it simply returns the supplied mount and node ID.
5098 
5099 	In case of error (e.g. the supplied node could not be found) the variables
5100 	for storing the resolved mount and node ID remain untouched and an error
5101 	code is returned.
5102 
5103 	\param mountID The mount ID of the vnode in question.
5104 	\param nodeID The node ID of the vnode in question.
5105 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5106 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5107 	\return
5108 	- \c B_OK, if everything went fine,
5109 	- another error code, if something went wrong.
5110 */
5111 status_t
5112 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5113 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5114 {
5115 	// get the node
5116 	struct vnode* node;
5117 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5118 	if (error != B_OK)
5119 		return error;
5120 
5121 	// resolve the node
5122 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5123 		put_vnode(node);
5124 		node = coveringNode;
5125 	}
5126 
5127 	// set the return values
5128 	*resolvedMountID = node->device;
5129 	*resolvedNodeID = node->id;
5130 
5131 	put_vnode(node);
5132 
5133 	return B_OK;
5134 }
5135 
5136 
5137 status_t
5138 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5139 	ino_t* _mountPointNodeID)
5140 {
5141 	ReadLocker nodeLocker(sVnodeLock);
5142 	MutexLocker mountLocker(sMountMutex);
5143 
5144 	struct fs_mount* mount = find_mount(mountID);
5145 	if (mount == NULL)
5146 		return B_BAD_VALUE;
5147 
5148 	Vnode* mountPoint = mount->covers_vnode;
5149 
5150 	*_mountPointMountID = mountPoint->device;
5151 	*_mountPointNodeID = mountPoint->id;
5152 
5153 	return B_OK;
5154 }
5155 
5156 
5157 status_t
5158 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5159 	ino_t coveredNodeID)
5160 {
5161 	// get the vnodes
5162 	Vnode* vnode;
5163 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5164 	if (error != B_OK)
5165 		return B_BAD_VALUE;
5166 	VNodePutter vnodePutter(vnode);
5167 
5168 	Vnode* coveredVnode;
5169 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5170 		false);
5171 	if (error != B_OK)
5172 		return B_BAD_VALUE;
5173 	VNodePutter coveredVnodePutter(coveredVnode);
5174 
5175 	// establish the covered/covering links
5176 	WriteLocker locker(sVnodeLock);
5177 
5178 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5179 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5180 		return B_BUSY;
5181 	}
5182 
5183 	vnode->covers = coveredVnode;
5184 	vnode->SetCovering(true);
5185 
5186 	coveredVnode->covered_by = vnode;
5187 	coveredVnode->SetCovered(true);
5188 
5189 	// the vnodes do now reference each other
5190 	inc_vnode_ref_count(vnode);
5191 	inc_vnode_ref_count(coveredVnode);
5192 
5193 	return B_OK;
5194 }
5195 
5196 
5197 int
5198 vfs_getrlimit(int resource, struct rlimit* rlp)
5199 {
5200 	if (!rlp)
5201 		return B_BAD_ADDRESS;
5202 
5203 	switch (resource) {
5204 		case RLIMIT_NOFILE:
5205 		{
5206 			struct io_context* context = get_current_io_context(false);
5207 			MutexLocker _(context->io_mutex);
5208 
5209 			rlp->rlim_cur = context->table_size;
5210 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5211 			return 0;
5212 		}
5213 
5214 		case RLIMIT_NOVMON:
5215 		{
5216 			struct io_context* context = get_current_io_context(false);
5217 			MutexLocker _(context->io_mutex);
5218 
5219 			rlp->rlim_cur = context->max_monitors;
5220 			rlp->rlim_max = MAX_NODE_MONITORS;
5221 			return 0;
5222 		}
5223 
5224 		default:
5225 			return B_BAD_VALUE;
5226 	}
5227 }
5228 
5229 
5230 int
5231 vfs_setrlimit(int resource, const struct rlimit* rlp)
5232 {
5233 	if (!rlp)
5234 		return B_BAD_ADDRESS;
5235 
5236 	switch (resource) {
5237 		case RLIMIT_NOFILE:
5238 			/* TODO: check getuid() */
5239 			if (rlp->rlim_max != RLIM_SAVED_MAX
5240 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5241 				return B_NOT_ALLOWED;
5242 
5243 			return vfs_resize_fd_table(get_current_io_context(false),
5244 				rlp->rlim_cur);
5245 
5246 		case RLIMIT_NOVMON:
5247 			/* TODO: check getuid() */
5248 			if (rlp->rlim_max != RLIM_SAVED_MAX
5249 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5250 				return B_NOT_ALLOWED;
5251 
5252 			return resize_monitor_table(get_current_io_context(false),
5253 				rlp->rlim_cur);
5254 
5255 		default:
5256 			return B_BAD_VALUE;
5257 	}
5258 }
5259 
5260 
5261 status_t
5262 vfs_init(kernel_args* args)
5263 {
5264 	vnode::StaticInit();
5265 
5266 	sVnodeTable = new(std::nothrow) VnodeTable();
5267 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5268 		panic("vfs_init: error creating vnode hash table\n");
5269 
5270 	struct vnode dummy_vnode;
5271 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5272 
5273 	struct fs_mount dummyMount;
5274 	sMountsTable = new(std::nothrow) MountTable();
5275 	if (sMountsTable == NULL
5276 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5277 		panic("vfs_init: error creating mounts hash table\n");
5278 
5279 	node_monitor_init();
5280 
5281 	sRoot = NULL;
5282 
5283 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5284 
5285 	if (block_cache_init() != B_OK)
5286 		return B_ERROR;
5287 
5288 #ifdef ADD_DEBUGGER_COMMANDS
5289 	// add some debugger commands
5290 	add_debugger_command_etc("vnode", &dump_vnode,
5291 		"Print info about the specified vnode",
5292 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5293 		"Prints information about the vnode specified by address <vnode> or\n"
5294 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5295 		"constructed and printed. It might not be possible to construct a\n"
5296 		"complete path, though.\n",
5297 		0);
5298 	add_debugger_command("vnodes", &dump_vnodes,
5299 		"list all vnodes (from the specified device)");
5300 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5301 		"list all vnode caches");
5302 	add_debugger_command("mount", &dump_mount,
5303 		"info about the specified fs_mount");
5304 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5305 	add_debugger_command("io_context", &dump_io_context,
5306 		"info about the I/O context");
5307 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5308 		"info about vnode usage");
5309 #endif
5310 
5311 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5312 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5313 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5314 		0);
5315 
5316 	fifo_init();
5317 	file_map_init();
5318 
5319 	return file_cache_init();
5320 }
5321 
5322 
5323 //	#pragma mark - fd_ops implementations
5324 
5325 
5326 /*!
5327 	Calls fs_open() on the given vnode and returns a new
5328 	file descriptor for it
5329 */
5330 static int
5331 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5332 {
5333 	void* cookie;
5334 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5335 	if (status != B_OK)
5336 		return status;
5337 
5338 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5339 	if (fd < 0) {
5340 		FS_CALL(vnode, close, cookie);
5341 		FS_CALL(vnode, free_cookie, cookie);
5342 	}
5343 	return fd;
5344 }
5345 
5346 
5347 /*!
5348 	Calls fs_open() on the given vnode and returns a new
5349 	file descriptor for it
5350 */
5351 static int
5352 create_vnode(struct vnode* directory, const char* name, int openMode,
5353 	int perms, bool kernel)
5354 {
5355 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5356 	status_t status = B_ERROR;
5357 	struct vnode* vnode;
5358 	void* cookie;
5359 	ino_t newID;
5360 
5361 	// This is somewhat tricky: If the entry already exists, the FS responsible
5362 	// for the directory might not necessarily also be the one responsible for
5363 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5364 	// we can actually never call the create() hook without O_EXCL. Instead we
5365 	// try to look the entry up first. If it already exists, we just open the
5366 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5367 	// introduces a race condition, since someone else might have created the
5368 	// entry in the meantime. We hope the respective FS returns the correct
5369 	// error code and retry (up to 3 times) again.
5370 
5371 	for (int i = 0; i < 3 && status != B_OK; i++) {
5372 		// look the node up
5373 		status = lookup_dir_entry(directory, name, &vnode);
5374 		if (status == B_OK) {
5375 			VNodePutter putter(vnode);
5376 
5377 			if ((openMode & O_EXCL) != 0)
5378 				return B_FILE_EXISTS;
5379 
5380 			// If the node is a symlink, we have to follow it, unless
5381 			// O_NOTRAVERSE is set.
5382 			if (S_ISLNK(vnode->Type()) && traverse) {
5383 				putter.Put();
5384 				char clonedName[B_FILE_NAME_LENGTH + 1];
5385 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5386 						>= B_FILE_NAME_LENGTH) {
5387 					return B_NAME_TOO_LONG;
5388 				}
5389 
5390 				inc_vnode_ref_count(directory);
5391 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5392 					kernel, &vnode, NULL);
5393 				if (status != B_OK)
5394 					return status;
5395 
5396 				putter.SetTo(vnode);
5397 			}
5398 
5399 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5400 				return B_LINK_LIMIT;
5401 
5402 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5403 			// on success keep the vnode reference for the FD
5404 			if (fd >= 0)
5405 				putter.Detach();
5406 
5407 			return fd;
5408 		}
5409 
5410 		// it doesn't exist yet -- try to create it
5411 
5412 		if (!HAS_FS_CALL(directory, create))
5413 			return B_READ_ONLY_DEVICE;
5414 
5415 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5416 			&cookie, &newID);
5417 		if (status != B_OK
5418 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5419 			return status;
5420 		}
5421 	}
5422 
5423 	if (status != B_OK)
5424 		return status;
5425 
5426 	// the node has been created successfully
5427 
5428 	rw_lock_read_lock(&sVnodeLock);
5429 	vnode = lookup_vnode(directory->device, newID);
5430 	rw_lock_read_unlock(&sVnodeLock);
5431 
5432 	if (vnode == NULL) {
5433 		panic("vfs: fs_create() returned success but there is no vnode, "
5434 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5435 		return B_BAD_VALUE;
5436 	}
5437 
5438 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5439 	if (fd >= 0)
5440 		return fd;
5441 
5442 	status = fd;
5443 
5444 	// something went wrong, clean up
5445 
5446 	FS_CALL(vnode, close, cookie);
5447 	FS_CALL(vnode, free_cookie, cookie);
5448 	put_vnode(vnode);
5449 
5450 	FS_CALL(directory, unlink, name);
5451 
5452 	return status;
5453 }
5454 
5455 
5456 /*! Calls fs open_dir() on the given vnode and returns a new
5457 	file descriptor for it
5458 */
5459 static int
5460 open_dir_vnode(struct vnode* vnode, bool kernel)
5461 {
5462 	void* cookie;
5463 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5464 	if (status != B_OK)
5465 		return status;
5466 
5467 	// directory is opened, create a fd
5468 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5469 	if (status >= 0)
5470 		return status;
5471 
5472 	FS_CALL(vnode, close_dir, cookie);
5473 	FS_CALL(vnode, free_dir_cookie, cookie);
5474 
5475 	return status;
5476 }
5477 
5478 
5479 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5480 	file descriptor for it.
5481 	Used by attr_dir_open(), and attr_dir_open_fd().
5482 */
5483 static int
5484 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5485 {
5486 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5487 		return B_UNSUPPORTED;
5488 
5489 	void* cookie;
5490 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5491 	if (status != B_OK)
5492 		return status;
5493 
5494 	// directory is opened, create a fd
5495 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5496 		kernel);
5497 	if (status >= 0)
5498 		return status;
5499 
5500 	FS_CALL(vnode, close_attr_dir, cookie);
5501 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5502 
5503 	return status;
5504 }
5505 
5506 
5507 static int
5508 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5509 	int openMode, int perms, bool kernel)
5510 {
5511 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5512 		"kernel %d\n", name, openMode, perms, kernel));
5513 
5514 	// get directory to put the new file in
5515 	struct vnode* directory;
5516 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5517 	if (status != B_OK)
5518 		return status;
5519 
5520 	status = create_vnode(directory, name, openMode, perms, kernel);
5521 	put_vnode(directory);
5522 
5523 	return status;
5524 }
5525 
5526 
5527 static int
5528 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5529 {
5530 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5531 		openMode, perms, kernel));
5532 
5533 	// get directory to put the new file in
5534 	char name[B_FILE_NAME_LENGTH];
5535 	struct vnode* directory;
5536 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5537 		kernel);
5538 	if (status < 0)
5539 		return status;
5540 
5541 	status = create_vnode(directory, name, openMode, perms, kernel);
5542 
5543 	put_vnode(directory);
5544 	return status;
5545 }
5546 
5547 
5548 static int
5549 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5550 	int openMode, bool kernel)
5551 {
5552 	if (name == NULL || *name == '\0')
5553 		return B_BAD_VALUE;
5554 
5555 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5556 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5557 
5558 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5559 
5560 	// get the vnode matching the entry_ref
5561 	struct vnode* vnode;
5562 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5563 		kernel, &vnode);
5564 	if (status != B_OK)
5565 		return status;
5566 
5567 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5568 		put_vnode(vnode);
5569 		return B_LINK_LIMIT;
5570 	}
5571 
5572 	int newFD = open_vnode(vnode, openMode, kernel);
5573 	if (newFD >= 0) {
5574 		// The vnode reference has been transferred to the FD
5575 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5576 			directoryID, vnode->id, name);
5577 	} else
5578 		put_vnode(vnode);
5579 
5580 	return newFD;
5581 }
5582 
5583 
5584 static int
5585 file_open(int fd, char* path, int openMode, bool kernel)
5586 {
5587 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5588 
5589 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5590 		fd, path, openMode, kernel));
5591 
5592 	// get the vnode matching the vnode + path combination
5593 	struct vnode* vnode;
5594 	ino_t parentID;
5595 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5596 		&parentID, kernel);
5597 	if (status != B_OK)
5598 		return status;
5599 
5600 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5601 		put_vnode(vnode);
5602 		return B_LINK_LIMIT;
5603 	}
5604 
5605 	// open the vnode
5606 	int newFD = open_vnode(vnode, openMode, kernel);
5607 	if (newFD >= 0) {
5608 		// The vnode reference has been transferred to the FD
5609 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5610 			vnode->device, parentID, vnode->id, NULL);
5611 	} else
5612 		put_vnode(vnode);
5613 
5614 	return newFD;
5615 }
5616 
5617 
5618 static status_t
5619 file_close(struct file_descriptor* descriptor)
5620 {
5621 	struct vnode* vnode = descriptor->u.vnode;
5622 	status_t status = B_OK;
5623 
5624 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5625 
5626 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5627 		vnode->id);
5628 	if (HAS_FS_CALL(vnode, close)) {
5629 		status = FS_CALL(vnode, close, descriptor->cookie);
5630 	}
5631 
5632 	if (status == B_OK) {
5633 		// remove all outstanding locks for this team
5634 		if (HAS_FS_CALL(vnode, release_lock))
5635 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5636 		else
5637 			status = release_advisory_lock(vnode, NULL);
5638 	}
5639 	return status;
5640 }
5641 
5642 
5643 static void
5644 file_free_fd(struct file_descriptor* descriptor)
5645 {
5646 	struct vnode* vnode = descriptor->u.vnode;
5647 
5648 	if (vnode != NULL) {
5649 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5650 		put_vnode(vnode);
5651 	}
5652 }
5653 
5654 
5655 static status_t
5656 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5657 	size_t* length)
5658 {
5659 	struct vnode* vnode = descriptor->u.vnode;
5660 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5661 		pos, length, *length));
5662 
5663 	if (S_ISDIR(vnode->Type()))
5664 		return B_IS_A_DIRECTORY;
5665 
5666 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5667 }
5668 
5669 
5670 static status_t
5671 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5672 	size_t* length)
5673 {
5674 	struct vnode* vnode = descriptor->u.vnode;
5675 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5676 		length));
5677 
5678 	if (S_ISDIR(vnode->Type()))
5679 		return B_IS_A_DIRECTORY;
5680 	if (!HAS_FS_CALL(vnode, write))
5681 		return B_READ_ONLY_DEVICE;
5682 
5683 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5684 }
5685 
5686 
5687 static off_t
5688 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5689 {
5690 	struct vnode* vnode = descriptor->u.vnode;
5691 	off_t offset;
5692 	bool isDevice = false;
5693 
5694 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5695 		seekType));
5696 
5697 	// some kinds of files are not seekable
5698 	switch (vnode->Type() & S_IFMT) {
5699 		case S_IFIFO:
5700 		case S_IFSOCK:
5701 			return ESPIPE;
5702 
5703 		// drivers publish block devices as chr, so pick both
5704 		case S_IFBLK:
5705 		case S_IFCHR:
5706 			isDevice = true;
5707 			break;
5708 		// The Open Group Base Specs don't mention any file types besides pipes,
5709 		// fifos, and sockets specially, so we allow seeking them.
5710 		case S_IFREG:
5711 		case S_IFDIR:
5712 		case S_IFLNK:
5713 			break;
5714 	}
5715 
5716 	switch (seekType) {
5717 		case SEEK_SET:
5718 			offset = 0;
5719 			break;
5720 		case SEEK_CUR:
5721 			offset = descriptor->pos;
5722 			break;
5723 		case SEEK_END:
5724 		{
5725 			// stat() the node
5726 			if (!HAS_FS_CALL(vnode, read_stat))
5727 				return B_UNSUPPORTED;
5728 
5729 			struct stat stat;
5730 			status_t status = FS_CALL(vnode, read_stat, &stat);
5731 			if (status != B_OK)
5732 				return status;
5733 
5734 			offset = stat.st_size;
5735 
5736 			if (offset == 0 && isDevice) {
5737 				// stat() on regular drivers doesn't report size
5738 				device_geometry geometry;
5739 
5740 				if (HAS_FS_CALL(vnode, ioctl)) {
5741 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5742 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5743 					if (status == B_OK)
5744 						offset = (off_t)geometry.bytes_per_sector
5745 							* geometry.sectors_per_track
5746 							* geometry.cylinder_count
5747 							* geometry.head_count;
5748 				}
5749 			}
5750 
5751 			break;
5752 		}
5753 		default:
5754 			return B_BAD_VALUE;
5755 	}
5756 
5757 	// assumes off_t is 64 bits wide
5758 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5759 		return B_BUFFER_OVERFLOW;
5760 
5761 	pos += offset;
5762 	if (pos < 0)
5763 		return B_BAD_VALUE;
5764 
5765 	return descriptor->pos = pos;
5766 }
5767 
5768 
5769 static status_t
5770 file_select(struct file_descriptor* descriptor, uint8 event,
5771 	struct selectsync* sync)
5772 {
5773 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5774 
5775 	struct vnode* vnode = descriptor->u.vnode;
5776 
5777 	// If the FS has no select() hook, notify select() now.
5778 	if (!HAS_FS_CALL(vnode, select)) {
5779 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5780 			return notify_select_event(sync, event);
5781 		else
5782 			return B_OK;
5783 	}
5784 
5785 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5786 }
5787 
5788 
5789 static status_t
5790 file_deselect(struct file_descriptor* descriptor, uint8 event,
5791 	struct selectsync* sync)
5792 {
5793 	struct vnode* vnode = descriptor->u.vnode;
5794 
5795 	if (!HAS_FS_CALL(vnode, deselect))
5796 		return B_OK;
5797 
5798 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5799 }
5800 
5801 
5802 static status_t
5803 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5804 	bool kernel)
5805 {
5806 	struct vnode* vnode;
5807 	status_t status;
5808 
5809 	if (name == NULL || *name == '\0')
5810 		return B_BAD_VALUE;
5811 
5812 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5813 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5814 
5815 	status = get_vnode(mountID, parentID, &vnode, true, false);
5816 	if (status != B_OK)
5817 		return status;
5818 
5819 	if (HAS_FS_CALL(vnode, create_dir))
5820 		status = FS_CALL(vnode, create_dir, name, perms);
5821 	else
5822 		status = B_READ_ONLY_DEVICE;
5823 
5824 	put_vnode(vnode);
5825 	return status;
5826 }
5827 
5828 
5829 static status_t
5830 dir_create(int fd, char* path, int perms, bool kernel)
5831 {
5832 	char filename[B_FILE_NAME_LENGTH];
5833 	struct vnode* vnode;
5834 	status_t status;
5835 
5836 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5837 		kernel));
5838 
5839 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5840 	if (status < 0)
5841 		return status;
5842 
5843 	if (HAS_FS_CALL(vnode, create_dir)) {
5844 		status = FS_CALL(vnode, create_dir, filename, perms);
5845 	} else
5846 		status = B_READ_ONLY_DEVICE;
5847 
5848 	put_vnode(vnode);
5849 	return status;
5850 }
5851 
5852 
5853 static int
5854 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5855 {
5856 	FUNCTION(("dir_open_entry_ref()\n"));
5857 
5858 	if (name && name[0] == '\0')
5859 		return B_BAD_VALUE;
5860 
5861 	// get the vnode matching the entry_ref/node_ref
5862 	struct vnode* vnode;
5863 	status_t status;
5864 	if (name) {
5865 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5866 			&vnode);
5867 	} else
5868 		status = get_vnode(mountID, parentID, &vnode, true, false);
5869 	if (status != B_OK)
5870 		return status;
5871 
5872 	int newFD = open_dir_vnode(vnode, kernel);
5873 	if (newFD >= 0) {
5874 		// The vnode reference has been transferred to the FD
5875 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5876 			vnode->id, name);
5877 	} else
5878 		put_vnode(vnode);
5879 
5880 	return newFD;
5881 }
5882 
5883 
5884 static int
5885 dir_open(int fd, char* path, bool kernel)
5886 {
5887 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5888 		kernel));
5889 
5890 	// get the vnode matching the vnode + path combination
5891 	struct vnode* vnode = NULL;
5892 	ino_t parentID;
5893 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5894 		kernel);
5895 	if (status != B_OK)
5896 		return status;
5897 
5898 	// open the dir
5899 	int newFD = open_dir_vnode(vnode, kernel);
5900 	if (newFD >= 0) {
5901 		// The vnode reference has been transferred to the FD
5902 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5903 			parentID, vnode->id, NULL);
5904 	} else
5905 		put_vnode(vnode);
5906 
5907 	return newFD;
5908 }
5909 
5910 
5911 static status_t
5912 dir_close(struct file_descriptor* descriptor)
5913 {
5914 	struct vnode* vnode = descriptor->u.vnode;
5915 
5916 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5917 
5918 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5919 		vnode->id);
5920 	if (HAS_FS_CALL(vnode, close_dir))
5921 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5922 
5923 	return B_OK;
5924 }
5925 
5926 
5927 static void
5928 dir_free_fd(struct file_descriptor* descriptor)
5929 {
5930 	struct vnode* vnode = descriptor->u.vnode;
5931 
5932 	if (vnode != NULL) {
5933 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5934 		put_vnode(vnode);
5935 	}
5936 }
5937 
5938 
5939 static status_t
5940 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5941 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5942 {
5943 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5944 		bufferSize, _count);
5945 }
5946 
5947 
5948 static status_t
5949 fix_dirent(struct vnode* parent, struct dirent* entry,
5950 	struct io_context* ioContext)
5951 {
5952 	// set d_pdev and d_pino
5953 	entry->d_pdev = parent->device;
5954 	entry->d_pino = parent->id;
5955 
5956 	// If this is the ".." entry and the directory covering another vnode,
5957 	// we need to replace d_dev and d_ino with the actual values.
5958 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5959 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
5960 			ioContext);
5961 	}
5962 
5963 	// resolve covered vnodes
5964 	ReadLocker _(&sVnodeLock);
5965 
5966 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5967 	if (vnode != NULL && vnode->covered_by != NULL) {
5968 		do {
5969 			vnode = vnode->covered_by;
5970 		} while (vnode->covered_by != NULL);
5971 
5972 		entry->d_dev = vnode->device;
5973 		entry->d_ino = vnode->id;
5974 	}
5975 
5976 	return B_OK;
5977 }
5978 
5979 
5980 static status_t
5981 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5982 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5983 {
5984 	if (!HAS_FS_CALL(vnode, read_dir))
5985 		return B_UNSUPPORTED;
5986 
5987 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5988 		_count);
5989 	if (error != B_OK)
5990 		return error;
5991 
5992 	// we need to adjust the read dirents
5993 	uint32 count = *_count;
5994 	for (uint32 i = 0; i < count; i++) {
5995 		error = fix_dirent(vnode, buffer, ioContext);
5996 		if (error != B_OK)
5997 			return error;
5998 
5999 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6000 	}
6001 
6002 	return error;
6003 }
6004 
6005 
6006 static status_t
6007 dir_rewind(struct file_descriptor* descriptor)
6008 {
6009 	struct vnode* vnode = descriptor->u.vnode;
6010 
6011 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6012 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6013 	}
6014 
6015 	return B_UNSUPPORTED;
6016 }
6017 
6018 
6019 static status_t
6020 dir_remove(int fd, char* path, bool kernel)
6021 {
6022 	char name[B_FILE_NAME_LENGTH];
6023 	struct vnode* directory;
6024 	status_t status;
6025 
6026 	if (path != NULL) {
6027 		// we need to make sure our path name doesn't stop with "/", ".",
6028 		// or ".."
6029 		char* lastSlash;
6030 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6031 			char* leaf = lastSlash + 1;
6032 			if (!strcmp(leaf, ".."))
6033 				return B_NOT_ALLOWED;
6034 
6035 			// omit multiple slashes
6036 			while (lastSlash > path && lastSlash[-1] == '/')
6037 				lastSlash--;
6038 
6039 			if (leaf[0]
6040 				&& strcmp(leaf, ".")) {
6041 				break;
6042 			}
6043 			// "name/" -> "name", or "name/." -> "name"
6044 			lastSlash[0] = '\0';
6045 		}
6046 
6047 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6048 			return B_NOT_ALLOWED;
6049 	}
6050 
6051 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6052 	if (status != B_OK)
6053 		return status;
6054 
6055 	if (HAS_FS_CALL(directory, remove_dir))
6056 		status = FS_CALL(directory, remove_dir, name);
6057 	else
6058 		status = B_READ_ONLY_DEVICE;
6059 
6060 	put_vnode(directory);
6061 	return status;
6062 }
6063 
6064 
6065 static status_t
6066 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6067 	size_t length)
6068 {
6069 	struct vnode* vnode = descriptor->u.vnode;
6070 
6071 	if (HAS_FS_CALL(vnode, ioctl))
6072 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6073 
6074 	return B_DEV_INVALID_IOCTL;
6075 }
6076 
6077 
6078 static status_t
6079 common_fcntl(int fd, int op, size_t argument, bool kernel)
6080 {
6081 	struct flock flock;
6082 
6083 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6084 		fd, op, argument, kernel ? "kernel" : "user"));
6085 
6086 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
6087 		fd);
6088 	if (descriptor == NULL)
6089 		return B_FILE_ERROR;
6090 
6091 	struct vnode* vnode = fd_vnode(descriptor);
6092 
6093 	status_t status = B_OK;
6094 
6095 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6096 		if (descriptor->type != FDTYPE_FILE)
6097 			status = B_BAD_VALUE;
6098 		else if (user_memcpy(&flock, (struct flock*)argument,
6099 				sizeof(struct flock)) != B_OK)
6100 			status = B_BAD_ADDRESS;
6101 
6102 		if (status != B_OK) {
6103 			put_fd(descriptor);
6104 			return status;
6105 		}
6106 	}
6107 
6108 	switch (op) {
6109 		case F_SETFD:
6110 		{
6111 			struct io_context* context = get_current_io_context(kernel);
6112 			// Set file descriptor flags
6113 
6114 			// O_CLOEXEC is the only flag available at this time
6115 			mutex_lock(&context->io_mutex);
6116 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6117 			mutex_unlock(&context->io_mutex);
6118 
6119 			status = B_OK;
6120 			break;
6121 		}
6122 
6123 		case F_GETFD:
6124 		{
6125 			struct io_context* context = get_current_io_context(kernel);
6126 
6127 			// Get file descriptor flags
6128 			mutex_lock(&context->io_mutex);
6129 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6130 			mutex_unlock(&context->io_mutex);
6131 			break;
6132 		}
6133 
6134 		case F_SETFL:
6135 			// Set file descriptor open mode
6136 
6137 			// we only accept changes to O_APPEND and O_NONBLOCK
6138 			argument &= O_APPEND | O_NONBLOCK;
6139 			if (descriptor->ops->fd_set_flags != NULL) {
6140 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6141 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6142 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6143 					(int)argument);
6144 			} else
6145 				status = B_UNSUPPORTED;
6146 
6147 			if (status == B_OK) {
6148 				// update this descriptor's open_mode field
6149 				descriptor->open_mode = (descriptor->open_mode
6150 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6151 			}
6152 
6153 			break;
6154 
6155 		case F_GETFL:
6156 			// Get file descriptor open mode
6157 			status = descriptor->open_mode;
6158 			break;
6159 
6160 		case F_DUPFD:
6161 		case F_DUPFD_CLOEXEC:
6162 		{
6163 			struct io_context* context = get_current_io_context(kernel);
6164 
6165 			status = new_fd_etc(context, descriptor, (int)argument);
6166 			if (status >= 0) {
6167 				mutex_lock(&context->io_mutex);
6168 				fd_set_close_on_exec(context, fd, op == F_DUPFD_CLOEXEC);
6169 				mutex_unlock(&context->io_mutex);
6170 
6171 				atomic_add(&descriptor->ref_count, 1);
6172 			}
6173 			break;
6174 		}
6175 
6176 		case F_GETLK:
6177 			if (vnode != NULL) {
6178 				struct flock normalizedLock;
6179 
6180 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6181 				status = normalize_flock(descriptor, &normalizedLock);
6182 				if (status != B_OK)
6183 					break;
6184 
6185 				if (HAS_FS_CALL(vnode, test_lock)) {
6186 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6187 						&normalizedLock);
6188 				} else
6189 					status = test_advisory_lock(vnode, &normalizedLock);
6190 				if (status == B_OK) {
6191 					if (normalizedLock.l_type == F_UNLCK) {
6192 						// no conflicting lock found, copy back the same struct
6193 						// we were given except change type to F_UNLCK
6194 						flock.l_type = F_UNLCK;
6195 						status = user_memcpy((struct flock*)argument, &flock,
6196 							sizeof(struct flock));
6197 					} else {
6198 						// a conflicting lock was found, copy back its range and
6199 						// type
6200 						if (normalizedLock.l_len == OFF_MAX)
6201 							normalizedLock.l_len = 0;
6202 
6203 						status = user_memcpy((struct flock*)argument,
6204 							&normalizedLock, sizeof(struct flock));
6205 					}
6206 				}
6207 			} else
6208 				status = B_BAD_VALUE;
6209 			break;
6210 
6211 		case F_SETLK:
6212 		case F_SETLKW:
6213 			status = normalize_flock(descriptor, &flock);
6214 			if (status != B_OK)
6215 				break;
6216 
6217 			if (vnode == NULL) {
6218 				status = B_BAD_VALUE;
6219 			} else if (flock.l_type == F_UNLCK) {
6220 				if (HAS_FS_CALL(vnode, release_lock)) {
6221 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6222 						&flock);
6223 				} else
6224 					status = release_advisory_lock(vnode, &flock);
6225 			} else {
6226 				// the open mode must match the lock type
6227 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6228 						&& flock.l_type == F_WRLCK)
6229 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6230 						&& flock.l_type == F_RDLCK))
6231 					status = B_FILE_ERROR;
6232 				else {
6233 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6234 						status = FS_CALL(vnode, acquire_lock,
6235 							descriptor->cookie, &flock, op == F_SETLKW);
6236 					} else {
6237 						status = acquire_advisory_lock(vnode, -1,
6238 							&flock, op == F_SETLKW);
6239 					}
6240 				}
6241 			}
6242 			break;
6243 
6244 		// ToDo: add support for more ops?
6245 
6246 		default:
6247 			status = B_BAD_VALUE;
6248 	}
6249 
6250 	put_fd(descriptor);
6251 	return status;
6252 }
6253 
6254 
6255 static status_t
6256 common_sync(int fd, bool kernel)
6257 {
6258 	struct file_descriptor* descriptor;
6259 	struct vnode* vnode;
6260 	status_t status;
6261 
6262 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6263 
6264 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6265 	if (descriptor == NULL)
6266 		return B_FILE_ERROR;
6267 
6268 	if (HAS_FS_CALL(vnode, fsync))
6269 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6270 	else
6271 		status = B_UNSUPPORTED;
6272 
6273 	put_fd(descriptor);
6274 	return status;
6275 }
6276 
6277 
6278 static status_t
6279 common_lock_node(int fd, bool kernel)
6280 {
6281 	struct file_descriptor* descriptor;
6282 	struct vnode* vnode;
6283 
6284 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6285 	if (descriptor == NULL)
6286 		return B_FILE_ERROR;
6287 
6288 	status_t status = B_OK;
6289 
6290 	// We need to set the locking atomically - someone
6291 	// else might set one at the same time
6292 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6293 			(file_descriptor*)NULL) != NULL)
6294 		status = B_BUSY;
6295 
6296 	put_fd(descriptor);
6297 	return status;
6298 }
6299 
6300 
6301 static status_t
6302 common_unlock_node(int fd, bool kernel)
6303 {
6304 	struct file_descriptor* descriptor;
6305 	struct vnode* vnode;
6306 
6307 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6308 	if (descriptor == NULL)
6309 		return B_FILE_ERROR;
6310 
6311 	status_t status = B_OK;
6312 
6313 	// We need to set the locking atomically - someone
6314 	// else might set one at the same time
6315 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6316 			(file_descriptor*)NULL, descriptor) != descriptor)
6317 		status = B_BAD_VALUE;
6318 
6319 	put_fd(descriptor);
6320 	return status;
6321 }
6322 
6323 
6324 static status_t
6325 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6326 	bool kernel)
6327 {
6328 	struct vnode* vnode;
6329 	status_t status;
6330 
6331 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6332 	if (status != B_OK)
6333 		return status;
6334 
6335 	if (HAS_FS_CALL(vnode, read_symlink)) {
6336 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6337 	} else
6338 		status = B_BAD_VALUE;
6339 
6340 	put_vnode(vnode);
6341 	return status;
6342 }
6343 
6344 
6345 static status_t
6346 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6347 	bool kernel)
6348 {
6349 	// path validity checks have to be in the calling function!
6350 	char name[B_FILE_NAME_LENGTH];
6351 	struct vnode* vnode;
6352 	status_t status;
6353 
6354 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6355 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6356 
6357 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6358 	if (status != B_OK)
6359 		return status;
6360 
6361 	if (HAS_FS_CALL(vnode, create_symlink))
6362 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6363 	else {
6364 		status = HAS_FS_CALL(vnode, write)
6365 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6366 	}
6367 
6368 	put_vnode(vnode);
6369 
6370 	return status;
6371 }
6372 
6373 
6374 static status_t
6375 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6376 	bool traverseLeafLink, bool kernel)
6377 {
6378 	// path validity checks have to be in the calling function!
6379 
6380 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6381 		toPath, kernel));
6382 
6383 	char name[B_FILE_NAME_LENGTH];
6384 	struct vnode* directory;
6385 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6386 		kernel);
6387 	if (status != B_OK)
6388 		return status;
6389 
6390 	struct vnode* vnode;
6391 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6392 		kernel);
6393 	if (status != B_OK)
6394 		goto err;
6395 
6396 	if (directory->mount != vnode->mount) {
6397 		status = B_CROSS_DEVICE_LINK;
6398 		goto err1;
6399 	}
6400 
6401 	if (HAS_FS_CALL(directory, link))
6402 		status = FS_CALL(directory, link, name, vnode);
6403 	else
6404 		status = B_READ_ONLY_DEVICE;
6405 
6406 err1:
6407 	put_vnode(vnode);
6408 err:
6409 	put_vnode(directory);
6410 
6411 	return status;
6412 }
6413 
6414 
6415 static status_t
6416 common_unlink(int fd, char* path, bool kernel)
6417 {
6418 	char filename[B_FILE_NAME_LENGTH];
6419 	struct vnode* vnode;
6420 	status_t status;
6421 
6422 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6423 		kernel));
6424 
6425 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6426 	if (status < 0)
6427 		return status;
6428 
6429 	if (HAS_FS_CALL(vnode, unlink))
6430 		status = FS_CALL(vnode, unlink, filename);
6431 	else
6432 		status = B_READ_ONLY_DEVICE;
6433 
6434 	put_vnode(vnode);
6435 
6436 	return status;
6437 }
6438 
6439 
6440 static status_t
6441 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6442 {
6443 	struct vnode* vnode;
6444 	status_t status;
6445 
6446 	// TODO: honor effectiveUserGroup argument
6447 
6448 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6449 	if (status != B_OK)
6450 		return status;
6451 
6452 	if (HAS_FS_CALL(vnode, access))
6453 		status = FS_CALL(vnode, access, mode);
6454 	else
6455 		status = B_OK;
6456 
6457 	put_vnode(vnode);
6458 
6459 	return status;
6460 }
6461 
6462 
6463 static status_t
6464 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6465 {
6466 	struct vnode* fromVnode;
6467 	struct vnode* toVnode;
6468 	char fromName[B_FILE_NAME_LENGTH];
6469 	char toName[B_FILE_NAME_LENGTH];
6470 	status_t status;
6471 
6472 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6473 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6474 
6475 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6476 	if (status != B_OK)
6477 		return status;
6478 
6479 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6480 	if (status != B_OK)
6481 		goto err1;
6482 
6483 	if (fromVnode->device != toVnode->device) {
6484 		status = B_CROSS_DEVICE_LINK;
6485 		goto err2;
6486 	}
6487 
6488 	if (fromName[0] == '\0' || toName[0] == '\0'
6489 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6490 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6491 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6492 		status = B_BAD_VALUE;
6493 		goto err2;
6494 	}
6495 
6496 	if (HAS_FS_CALL(fromVnode, rename))
6497 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6498 	else
6499 		status = B_READ_ONLY_DEVICE;
6500 
6501 err2:
6502 	put_vnode(toVnode);
6503 err1:
6504 	put_vnode(fromVnode);
6505 
6506 	return status;
6507 }
6508 
6509 
6510 static status_t
6511 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6512 {
6513 	struct vnode* vnode = descriptor->u.vnode;
6514 
6515 	FUNCTION(("common_read_stat: stat %p\n", stat));
6516 
6517 	// TODO: remove this once all file systems properly set them!
6518 	stat->st_crtim.tv_nsec = 0;
6519 	stat->st_ctim.tv_nsec = 0;
6520 	stat->st_mtim.tv_nsec = 0;
6521 	stat->st_atim.tv_nsec = 0;
6522 
6523 	return vfs_stat_vnode(vnode, stat);
6524 }
6525 
6526 
6527 static status_t
6528 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6529 	int statMask)
6530 {
6531 	struct vnode* vnode = descriptor->u.vnode;
6532 
6533 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6534 		vnode, stat, statMask));
6535 
6536 	if (!HAS_FS_CALL(vnode, write_stat))
6537 		return B_READ_ONLY_DEVICE;
6538 
6539 	return FS_CALL(vnode, write_stat, stat, statMask);
6540 }
6541 
6542 
6543 static status_t
6544 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6545 	struct stat* stat, bool kernel)
6546 {
6547 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6548 		stat));
6549 
6550 	struct vnode* vnode;
6551 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6552 		NULL, kernel);
6553 	if (status != B_OK)
6554 		return status;
6555 
6556 	status = vfs_stat_vnode(vnode, stat);
6557 
6558 	put_vnode(vnode);
6559 	return status;
6560 }
6561 
6562 
6563 static status_t
6564 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6565 	const struct stat* stat, int statMask, bool kernel)
6566 {
6567 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6568 		"kernel %d\n", fd, path, stat, statMask, kernel));
6569 
6570 	struct vnode* vnode;
6571 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6572 		NULL, kernel);
6573 	if (status != B_OK)
6574 		return status;
6575 
6576 	if (HAS_FS_CALL(vnode, write_stat))
6577 		status = FS_CALL(vnode, write_stat, stat, statMask);
6578 	else
6579 		status = B_READ_ONLY_DEVICE;
6580 
6581 	put_vnode(vnode);
6582 
6583 	return status;
6584 }
6585 
6586 
6587 static int
6588 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6589 {
6590 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6591 		kernel));
6592 
6593 	struct vnode* vnode;
6594 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6595 		NULL, kernel);
6596 	if (status != B_OK)
6597 		return status;
6598 
6599 	status = open_attr_dir_vnode(vnode, kernel);
6600 	if (status < 0)
6601 		put_vnode(vnode);
6602 
6603 	return status;
6604 }
6605 
6606 
6607 static status_t
6608 attr_dir_close(struct file_descriptor* descriptor)
6609 {
6610 	struct vnode* vnode = descriptor->u.vnode;
6611 
6612 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6613 
6614 	if (HAS_FS_CALL(vnode, close_attr_dir))
6615 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6616 
6617 	return B_OK;
6618 }
6619 
6620 
6621 static void
6622 attr_dir_free_fd(struct file_descriptor* descriptor)
6623 {
6624 	struct vnode* vnode = descriptor->u.vnode;
6625 
6626 	if (vnode != NULL) {
6627 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6628 		put_vnode(vnode);
6629 	}
6630 }
6631 
6632 
6633 static status_t
6634 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6635 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6636 {
6637 	struct vnode* vnode = descriptor->u.vnode;
6638 
6639 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6640 
6641 	if (HAS_FS_CALL(vnode, read_attr_dir))
6642 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6643 			bufferSize, _count);
6644 
6645 	return B_UNSUPPORTED;
6646 }
6647 
6648 
6649 static status_t
6650 attr_dir_rewind(struct file_descriptor* descriptor)
6651 {
6652 	struct vnode* vnode = descriptor->u.vnode;
6653 
6654 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6655 
6656 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6657 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6658 
6659 	return B_UNSUPPORTED;
6660 }
6661 
6662 
6663 static int
6664 attr_create(int fd, char* path, const char* name, uint32 type,
6665 	int openMode, bool kernel)
6666 {
6667 	if (name == NULL || *name == '\0')
6668 		return B_BAD_VALUE;
6669 
6670 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6671 	struct vnode* vnode;
6672 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6673 		kernel);
6674 	if (status != B_OK)
6675 		return status;
6676 
6677 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6678 		status = B_LINK_LIMIT;
6679 		goto err;
6680 	}
6681 
6682 	if (!HAS_FS_CALL(vnode, create_attr)) {
6683 		status = B_READ_ONLY_DEVICE;
6684 		goto err;
6685 	}
6686 
6687 	void* cookie;
6688 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6689 	if (status != B_OK)
6690 		goto err;
6691 
6692 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6693 	if (fd >= 0)
6694 		return fd;
6695 
6696 	status = fd;
6697 
6698 	FS_CALL(vnode, close_attr, cookie);
6699 	FS_CALL(vnode, free_attr_cookie, cookie);
6700 
6701 	FS_CALL(vnode, remove_attr, name);
6702 
6703 err:
6704 	put_vnode(vnode);
6705 
6706 	return status;
6707 }
6708 
6709 
6710 static int
6711 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6712 {
6713 	if (name == NULL || *name == '\0')
6714 		return B_BAD_VALUE;
6715 
6716 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6717 	struct vnode* vnode;
6718 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6719 		kernel);
6720 	if (status != B_OK)
6721 		return status;
6722 
6723 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6724 		status = B_LINK_LIMIT;
6725 		goto err;
6726 	}
6727 
6728 	if (!HAS_FS_CALL(vnode, open_attr)) {
6729 		status = B_UNSUPPORTED;
6730 		goto err;
6731 	}
6732 
6733 	void* cookie;
6734 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6735 	if (status != B_OK)
6736 		goto err;
6737 
6738 	// now we only need a file descriptor for this attribute and we're done
6739 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6740 	if (fd >= 0)
6741 		return fd;
6742 
6743 	status = fd;
6744 
6745 	FS_CALL(vnode, close_attr, cookie);
6746 	FS_CALL(vnode, free_attr_cookie, cookie);
6747 
6748 err:
6749 	put_vnode(vnode);
6750 
6751 	return status;
6752 }
6753 
6754 
6755 static status_t
6756 attr_close(struct file_descriptor* descriptor)
6757 {
6758 	struct vnode* vnode = descriptor->u.vnode;
6759 
6760 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6761 
6762 	if (HAS_FS_CALL(vnode, close_attr))
6763 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6764 
6765 	return B_OK;
6766 }
6767 
6768 
6769 static void
6770 attr_free_fd(struct file_descriptor* descriptor)
6771 {
6772 	struct vnode* vnode = descriptor->u.vnode;
6773 
6774 	if (vnode != NULL) {
6775 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6776 		put_vnode(vnode);
6777 	}
6778 }
6779 
6780 
6781 static status_t
6782 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6783 	size_t* length)
6784 {
6785 	struct vnode* vnode = descriptor->u.vnode;
6786 
6787 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6788 		pos, length, *length));
6789 
6790 	if (!HAS_FS_CALL(vnode, read_attr))
6791 		return B_UNSUPPORTED;
6792 
6793 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6794 }
6795 
6796 
6797 static status_t
6798 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6799 	size_t* length)
6800 {
6801 	struct vnode* vnode = descriptor->u.vnode;
6802 
6803 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6804 		length));
6805 
6806 	if (!HAS_FS_CALL(vnode, write_attr))
6807 		return B_UNSUPPORTED;
6808 
6809 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6810 }
6811 
6812 
6813 static off_t
6814 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6815 {
6816 	off_t offset;
6817 
6818 	switch (seekType) {
6819 		case SEEK_SET:
6820 			offset = 0;
6821 			break;
6822 		case SEEK_CUR:
6823 			offset = descriptor->pos;
6824 			break;
6825 		case SEEK_END:
6826 		{
6827 			struct vnode* vnode = descriptor->u.vnode;
6828 			if (!HAS_FS_CALL(vnode, read_stat))
6829 				return B_UNSUPPORTED;
6830 
6831 			struct stat stat;
6832 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6833 				&stat);
6834 			if (status != B_OK)
6835 				return status;
6836 
6837 			offset = stat.st_size;
6838 			break;
6839 		}
6840 		default:
6841 			return B_BAD_VALUE;
6842 	}
6843 
6844 	// assumes off_t is 64 bits wide
6845 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6846 		return B_BUFFER_OVERFLOW;
6847 
6848 	pos += offset;
6849 	if (pos < 0)
6850 		return B_BAD_VALUE;
6851 
6852 	return descriptor->pos = pos;
6853 }
6854 
6855 
6856 static status_t
6857 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6858 {
6859 	struct vnode* vnode = descriptor->u.vnode;
6860 
6861 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6862 
6863 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6864 		return B_UNSUPPORTED;
6865 
6866 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6867 }
6868 
6869 
6870 static status_t
6871 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6872 	int statMask)
6873 {
6874 	struct vnode* vnode = descriptor->u.vnode;
6875 
6876 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6877 
6878 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6879 		return B_READ_ONLY_DEVICE;
6880 
6881 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6882 }
6883 
6884 
6885 static status_t
6886 attr_remove(int fd, const char* name, bool kernel)
6887 {
6888 	struct file_descriptor* descriptor;
6889 	struct vnode* vnode;
6890 	status_t status;
6891 
6892 	if (name == NULL || *name == '\0')
6893 		return B_BAD_VALUE;
6894 
6895 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6896 		kernel));
6897 
6898 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6899 	if (descriptor == NULL)
6900 		return B_FILE_ERROR;
6901 
6902 	if (HAS_FS_CALL(vnode, remove_attr))
6903 		status = FS_CALL(vnode, remove_attr, name);
6904 	else
6905 		status = B_READ_ONLY_DEVICE;
6906 
6907 	put_fd(descriptor);
6908 
6909 	return status;
6910 }
6911 
6912 
6913 static status_t
6914 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6915 	bool kernel)
6916 {
6917 	struct file_descriptor* fromDescriptor;
6918 	struct file_descriptor* toDescriptor;
6919 	struct vnode* fromVnode;
6920 	struct vnode* toVnode;
6921 	status_t status;
6922 
6923 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6924 		|| *toName == '\0')
6925 		return B_BAD_VALUE;
6926 
6927 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6928 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6929 
6930 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6931 	if (fromDescriptor == NULL)
6932 		return B_FILE_ERROR;
6933 
6934 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6935 	if (toDescriptor == NULL) {
6936 		status = B_FILE_ERROR;
6937 		goto err;
6938 	}
6939 
6940 	// are the files on the same volume?
6941 	if (fromVnode->device != toVnode->device) {
6942 		status = B_CROSS_DEVICE_LINK;
6943 		goto err1;
6944 	}
6945 
6946 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6947 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6948 	} else
6949 		status = B_READ_ONLY_DEVICE;
6950 
6951 err1:
6952 	put_fd(toDescriptor);
6953 err:
6954 	put_fd(fromDescriptor);
6955 
6956 	return status;
6957 }
6958 
6959 
6960 static int
6961 index_dir_open(dev_t mountID, bool kernel)
6962 {
6963 	struct fs_mount* mount;
6964 	void* cookie;
6965 
6966 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
6967 		kernel));
6968 
6969 	status_t status = get_mount(mountID, &mount);
6970 	if (status != B_OK)
6971 		return status;
6972 
6973 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6974 		status = B_UNSUPPORTED;
6975 		goto error;
6976 	}
6977 
6978 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6979 	if (status != B_OK)
6980 		goto error;
6981 
6982 	// get fd for the index directory
6983 	int fd;
6984 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6985 	if (fd >= 0)
6986 		return fd;
6987 
6988 	// something went wrong
6989 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6990 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6991 
6992 	status = fd;
6993 
6994 error:
6995 	put_mount(mount);
6996 	return status;
6997 }
6998 
6999 
7000 static status_t
7001 index_dir_close(struct file_descriptor* descriptor)
7002 {
7003 	struct fs_mount* mount = descriptor->u.mount;
7004 
7005 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7006 
7007 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7008 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7009 
7010 	return B_OK;
7011 }
7012 
7013 
7014 static void
7015 index_dir_free_fd(struct file_descriptor* descriptor)
7016 {
7017 	struct fs_mount* mount = descriptor->u.mount;
7018 
7019 	if (mount != NULL) {
7020 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7021 		put_mount(mount);
7022 	}
7023 }
7024 
7025 
7026 static status_t
7027 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7028 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7029 {
7030 	struct fs_mount* mount = descriptor->u.mount;
7031 
7032 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7033 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7034 			bufferSize, _count);
7035 	}
7036 
7037 	return B_UNSUPPORTED;
7038 }
7039 
7040 
7041 static status_t
7042 index_dir_rewind(struct file_descriptor* descriptor)
7043 {
7044 	struct fs_mount* mount = descriptor->u.mount;
7045 
7046 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7047 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7048 
7049 	return B_UNSUPPORTED;
7050 }
7051 
7052 
7053 static status_t
7054 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7055 	bool kernel)
7056 {
7057 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7058 		mountID, name, kernel));
7059 
7060 	struct fs_mount* mount;
7061 	status_t status = get_mount(mountID, &mount);
7062 	if (status != B_OK)
7063 		return status;
7064 
7065 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7066 		status = B_READ_ONLY_DEVICE;
7067 		goto out;
7068 	}
7069 
7070 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7071 
7072 out:
7073 	put_mount(mount);
7074 	return status;
7075 }
7076 
7077 
7078 #if 0
7079 static status_t
7080 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7081 {
7082 	struct vnode* vnode = descriptor->u.vnode;
7083 
7084 	// ToDo: currently unused!
7085 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7086 	if (!HAS_FS_CALL(vnode, read_index_stat))
7087 		return B_UNSUPPORTED;
7088 
7089 	return B_UNSUPPORTED;
7090 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7091 }
7092 
7093 
7094 static void
7095 index_free_fd(struct file_descriptor* descriptor)
7096 {
7097 	struct vnode* vnode = descriptor->u.vnode;
7098 
7099 	if (vnode != NULL) {
7100 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7101 		put_vnode(vnode);
7102 	}
7103 }
7104 #endif
7105 
7106 
7107 static status_t
7108 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7109 	bool kernel)
7110 {
7111 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7112 		mountID, name, kernel));
7113 
7114 	struct fs_mount* mount;
7115 	status_t status = get_mount(mountID, &mount);
7116 	if (status != B_OK)
7117 		return status;
7118 
7119 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7120 		status = B_UNSUPPORTED;
7121 		goto out;
7122 	}
7123 
7124 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7125 
7126 out:
7127 	put_mount(mount);
7128 	return status;
7129 }
7130 
7131 
7132 static status_t
7133 index_remove(dev_t mountID, const char* name, bool kernel)
7134 {
7135 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7136 		mountID, name, kernel));
7137 
7138 	struct fs_mount* mount;
7139 	status_t status = get_mount(mountID, &mount);
7140 	if (status != B_OK)
7141 		return status;
7142 
7143 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7144 		status = B_READ_ONLY_DEVICE;
7145 		goto out;
7146 	}
7147 
7148 	status = FS_MOUNT_CALL(mount, remove_index, name);
7149 
7150 out:
7151 	put_mount(mount);
7152 	return status;
7153 }
7154 
7155 
7156 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7157 		It would be nice if the FS would find some more kernel support
7158 		for them.
7159 		For example, query parsing should be moved into the kernel.
7160 */
7161 static int
7162 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7163 	int32 token, bool kernel)
7164 {
7165 	struct fs_mount* mount;
7166 	void* cookie;
7167 
7168 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7169 		device, query, kernel));
7170 
7171 	status_t status = get_mount(device, &mount);
7172 	if (status != B_OK)
7173 		return status;
7174 
7175 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7176 		status = B_UNSUPPORTED;
7177 		goto error;
7178 	}
7179 
7180 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7181 		&cookie);
7182 	if (status != B_OK)
7183 		goto error;
7184 
7185 	// get fd for the index directory
7186 	int fd;
7187 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7188 	if (fd >= 0)
7189 		return fd;
7190 
7191 	status = fd;
7192 
7193 	// something went wrong
7194 	FS_MOUNT_CALL(mount, close_query, cookie);
7195 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7196 
7197 error:
7198 	put_mount(mount);
7199 	return status;
7200 }
7201 
7202 
7203 static status_t
7204 query_close(struct file_descriptor* descriptor)
7205 {
7206 	struct fs_mount* mount = descriptor->u.mount;
7207 
7208 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7209 
7210 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7211 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7212 
7213 	return B_OK;
7214 }
7215 
7216 
7217 static void
7218 query_free_fd(struct file_descriptor* descriptor)
7219 {
7220 	struct fs_mount* mount = descriptor->u.mount;
7221 
7222 	if (mount != NULL) {
7223 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7224 		put_mount(mount);
7225 	}
7226 }
7227 
7228 
7229 static status_t
7230 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7231 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7232 {
7233 	struct fs_mount* mount = descriptor->u.mount;
7234 
7235 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7236 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7237 			bufferSize, _count);
7238 	}
7239 
7240 	return B_UNSUPPORTED;
7241 }
7242 
7243 
7244 static status_t
7245 query_rewind(struct file_descriptor* descriptor)
7246 {
7247 	struct fs_mount* mount = descriptor->u.mount;
7248 
7249 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7250 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7251 
7252 	return B_UNSUPPORTED;
7253 }
7254 
7255 
7256 //	#pragma mark - General File System functions
7257 
7258 
7259 static dev_t
7260 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7261 	const char* args, bool kernel)
7262 {
7263 	struct ::fs_mount* mount;
7264 	status_t status = B_OK;
7265 	fs_volume* volume = NULL;
7266 	int32 layer = 0;
7267 	Vnode* coveredNode = NULL;
7268 
7269 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7270 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7271 
7272 	// The path is always safe, we just have to make sure that fsName is
7273 	// almost valid - we can't make any assumptions about args, though.
7274 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7275 	// We'll get it from the DDM later.
7276 	if (fsName == NULL) {
7277 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7278 			return B_BAD_VALUE;
7279 	} else if (fsName[0] == '\0')
7280 		return B_BAD_VALUE;
7281 
7282 	RecursiveLocker mountOpLocker(sMountOpLock);
7283 
7284 	// Helper to delete a newly created file device on failure.
7285 	// Not exactly beautiful, but helps to keep the code below cleaner.
7286 	struct FileDeviceDeleter {
7287 		FileDeviceDeleter() : id(-1) {}
7288 		~FileDeviceDeleter()
7289 		{
7290 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7291 		}
7292 
7293 		partition_id id;
7294 	} fileDeviceDeleter;
7295 
7296 	// If the file system is not a "virtual" one, the device argument should
7297 	// point to a real file/device (if given at all).
7298 	// get the partition
7299 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7300 	KPartition* partition = NULL;
7301 	KPath normalizedDevice;
7302 	bool newlyCreatedFileDevice = false;
7303 
7304 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7305 		// normalize the device path
7306 		status = normalizedDevice.SetTo(device, true);
7307 		if (status != B_OK)
7308 			return status;
7309 
7310 		// get a corresponding partition from the DDM
7311 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7312 		if (partition == NULL) {
7313 			// Partition not found: This either means, the user supplied
7314 			// an invalid path, or the path refers to an image file. We try
7315 			// to let the DDM create a file device for the path.
7316 			partition_id deviceID = ddm->CreateFileDevice(
7317 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7318 			if (deviceID >= 0) {
7319 				partition = ddm->RegisterPartition(deviceID);
7320 				if (newlyCreatedFileDevice)
7321 					fileDeviceDeleter.id = deviceID;
7322 			}
7323 		}
7324 
7325 		if (!partition) {
7326 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7327 				normalizedDevice.Path()));
7328 			return B_ENTRY_NOT_FOUND;
7329 		}
7330 
7331 		device = normalizedDevice.Path();
7332 			// correct path to file device
7333 	}
7334 	PartitionRegistrar partitionRegistrar(partition, true);
7335 
7336 	// Write lock the partition's device. For the time being, we keep the lock
7337 	// until we're done mounting -- not nice, but ensure, that no-one is
7338 	// interfering.
7339 	// TODO: Just mark the partition busy while mounting!
7340 	KDiskDevice* diskDevice = NULL;
7341 	if (partition) {
7342 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7343 		if (!diskDevice) {
7344 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7345 			return B_ERROR;
7346 		}
7347 	}
7348 
7349 	DeviceWriteLocker writeLocker(diskDevice, true);
7350 		// this takes over the write lock acquired before
7351 
7352 	if (partition != NULL) {
7353 		// make sure, that the partition is not busy
7354 		if (partition->IsBusy()) {
7355 			TRACE(("fs_mount(): Partition is busy.\n"));
7356 			return B_BUSY;
7357 		}
7358 
7359 		// if no FS name had been supplied, we get it from the partition
7360 		if (fsName == NULL) {
7361 			KDiskSystem* diskSystem = partition->DiskSystem();
7362 			if (!diskSystem) {
7363 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7364 					"recognize it.\n"));
7365 				return B_BAD_VALUE;
7366 			}
7367 
7368 			if (!diskSystem->IsFileSystem()) {
7369 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7370 					"partitioning system.\n"));
7371 				return B_BAD_VALUE;
7372 			}
7373 
7374 			// The disk system name will not change, and the KDiskSystem
7375 			// object will not go away while the disk device is locked (and
7376 			// the partition has a reference to it), so this is safe.
7377 			fsName = diskSystem->Name();
7378 		}
7379 	}
7380 
7381 	mount = new(std::nothrow) (struct ::fs_mount);
7382 	if (mount == NULL)
7383 		return B_NO_MEMORY;
7384 
7385 	mount->device_name = strdup(device);
7386 		// "device" can be NULL
7387 
7388 	status = mount->entry_cache.Init();
7389 	if (status != B_OK)
7390 		goto err1;
7391 
7392 	// initialize structure
7393 	mount->id = sNextMountID++;
7394 	mount->partition = NULL;
7395 	mount->root_vnode = NULL;
7396 	mount->covers_vnode = NULL;
7397 	mount->unmounting = false;
7398 	mount->owns_file_device = false;
7399 	mount->volume = NULL;
7400 
7401 	// build up the volume(s)
7402 	while (true) {
7403 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7404 		if (layerFSName == NULL) {
7405 			if (layer == 0) {
7406 				status = B_NO_MEMORY;
7407 				goto err1;
7408 			}
7409 
7410 			break;
7411 		}
7412 		MemoryDeleter layerFSNameDeleter(layerFSName);
7413 
7414 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7415 		if (volume == NULL) {
7416 			status = B_NO_MEMORY;
7417 			goto err1;
7418 		}
7419 
7420 		volume->id = mount->id;
7421 		volume->partition = partition != NULL ? partition->ID() : -1;
7422 		volume->layer = layer++;
7423 		volume->private_volume = NULL;
7424 		volume->ops = NULL;
7425 		volume->sub_volume = NULL;
7426 		volume->super_volume = NULL;
7427 		volume->file_system = NULL;
7428 		volume->file_system_name = NULL;
7429 
7430 		volume->file_system_name = get_file_system_name(layerFSName);
7431 		if (volume->file_system_name == NULL) {
7432 			status = B_NO_MEMORY;
7433 			free(volume);
7434 			goto err1;
7435 		}
7436 
7437 		volume->file_system = get_file_system(layerFSName);
7438 		if (volume->file_system == NULL) {
7439 			status = B_DEVICE_NOT_FOUND;
7440 			free(volume->file_system_name);
7441 			free(volume);
7442 			goto err1;
7443 		}
7444 
7445 		if (mount->volume == NULL)
7446 			mount->volume = volume;
7447 		else {
7448 			volume->super_volume = mount->volume;
7449 			mount->volume->sub_volume = volume;
7450 			mount->volume = volume;
7451 		}
7452 	}
7453 
7454 	// insert mount struct into list before we call FS's mount() function
7455 	// so that vnodes can be created for this mount
7456 	mutex_lock(&sMountMutex);
7457 	sMountsTable->Insert(mount);
7458 	mutex_unlock(&sMountMutex);
7459 
7460 	ino_t rootID;
7461 
7462 	if (!sRoot) {
7463 		// we haven't mounted anything yet
7464 		if (strcmp(path, "/") != 0) {
7465 			status = B_ERROR;
7466 			goto err2;
7467 		}
7468 
7469 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7470 			args, &rootID);
7471 		if (status != 0)
7472 			goto err2;
7473 	} else {
7474 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7475 		if (status != B_OK)
7476 			goto err2;
7477 
7478 		mount->covers_vnode = coveredNode;
7479 
7480 		// make sure covered_vnode is a directory
7481 		if (!S_ISDIR(coveredNode->Type())) {
7482 			status = B_NOT_A_DIRECTORY;
7483 			goto err3;
7484 		}
7485 
7486 		if (coveredNode->IsCovered()) {
7487 			// this is already a covered vnode
7488 			status = B_BUSY;
7489 			goto err3;
7490 		}
7491 
7492 		// mount it/them
7493 		fs_volume* volume = mount->volume;
7494 		while (volume) {
7495 			status = volume->file_system->mount(volume, device, flags, args,
7496 				&rootID);
7497 			if (status != B_OK) {
7498 				if (volume->sub_volume)
7499 					goto err4;
7500 				goto err3;
7501 			}
7502 
7503 			volume = volume->super_volume;
7504 		}
7505 
7506 		volume = mount->volume;
7507 		while (volume) {
7508 			if (volume->ops->all_layers_mounted != NULL)
7509 				volume->ops->all_layers_mounted(volume);
7510 			volume = volume->super_volume;
7511 		}
7512 	}
7513 
7514 	// the root node is supposed to be owned by the file system - it must
7515 	// exist at this point
7516 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7517 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7518 		panic("fs_mount: file system does not own its root node!\n");
7519 		status = B_ERROR;
7520 		goto err4;
7521 	}
7522 
7523 	// set up the links between the root vnode and the vnode it covers
7524 	rw_lock_write_lock(&sVnodeLock);
7525 	if (coveredNode != NULL) {
7526 		if (coveredNode->IsCovered()) {
7527 			// the vnode is covered now
7528 			status = B_BUSY;
7529 			rw_lock_write_unlock(&sVnodeLock);
7530 			goto err4;
7531 		}
7532 
7533 		mount->root_vnode->covers = coveredNode;
7534 		mount->root_vnode->SetCovering(true);
7535 
7536 		coveredNode->covered_by = mount->root_vnode;
7537 		coveredNode->SetCovered(true);
7538 	}
7539 	rw_lock_write_unlock(&sVnodeLock);
7540 
7541 	if (!sRoot) {
7542 		sRoot = mount->root_vnode;
7543 		mutex_lock(&sIOContextRootLock);
7544 		get_current_io_context(true)->root = sRoot;
7545 		mutex_unlock(&sIOContextRootLock);
7546 		inc_vnode_ref_count(sRoot);
7547 	}
7548 
7549 	// supply the partition (if any) with the mount cookie and mark it mounted
7550 	if (partition) {
7551 		partition->SetMountCookie(mount->volume->private_volume);
7552 		partition->SetVolumeID(mount->id);
7553 
7554 		// keep a partition reference as long as the partition is mounted
7555 		partitionRegistrar.Detach();
7556 		mount->partition = partition;
7557 		mount->owns_file_device = newlyCreatedFileDevice;
7558 		fileDeviceDeleter.id = -1;
7559 	}
7560 
7561 	notify_mount(mount->id,
7562 		coveredNode != NULL ? coveredNode->device : -1,
7563 		coveredNode ? coveredNode->id : -1);
7564 
7565 	return mount->id;
7566 
7567 err4:
7568 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7569 err3:
7570 	if (coveredNode != NULL)
7571 		put_vnode(coveredNode);
7572 err2:
7573 	mutex_lock(&sMountMutex);
7574 	sMountsTable->Remove(mount);
7575 	mutex_unlock(&sMountMutex);
7576 err1:
7577 	delete mount;
7578 
7579 	return status;
7580 }
7581 
7582 
7583 static status_t
7584 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7585 {
7586 	struct fs_mount* mount;
7587 	status_t err;
7588 
7589 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7590 		mountID, kernel));
7591 
7592 	struct vnode* pathVnode = NULL;
7593 	if (path != NULL) {
7594 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7595 		if (err != B_OK)
7596 			return B_ENTRY_NOT_FOUND;
7597 	}
7598 
7599 	RecursiveLocker mountOpLocker(sMountOpLock);
7600 
7601 	// this lock is not strictly necessary, but here in case of KDEBUG
7602 	// to keep the ASSERT in find_mount() working.
7603 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7604 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7605 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7606 	if (mount == NULL) {
7607 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7608 			pathVnode);
7609 	}
7610 
7611 	if (path != NULL) {
7612 		put_vnode(pathVnode);
7613 
7614 		if (mount->root_vnode != pathVnode) {
7615 			// not mountpoint
7616 			return B_BAD_VALUE;
7617 		}
7618 	}
7619 
7620 	// if the volume is associated with a partition, lock the device of the
7621 	// partition as long as we are unmounting
7622 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7623 	KPartition* partition = mount->partition;
7624 	KDiskDevice* diskDevice = NULL;
7625 	if (partition != NULL) {
7626 		if (partition->Device() == NULL) {
7627 			dprintf("fs_unmount(): There is no device!\n");
7628 			return B_ERROR;
7629 		}
7630 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7631 		if (!diskDevice) {
7632 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7633 			return B_ERROR;
7634 		}
7635 	}
7636 	DeviceWriteLocker writeLocker(diskDevice, true);
7637 
7638 	// make sure, that the partition is not busy
7639 	if (partition != NULL) {
7640 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7641 			TRACE(("fs_unmount(): Partition is busy.\n"));
7642 			return B_BUSY;
7643 		}
7644 	}
7645 
7646 	// grab the vnode master mutex to keep someone from creating
7647 	// a vnode while we're figuring out if we can continue
7648 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7649 
7650 	bool disconnectedDescriptors = false;
7651 
7652 	while (true) {
7653 		bool busy = false;
7654 
7655 		// cycle through the list of vnodes associated with this mount and
7656 		// make sure all of them are not busy or have refs on them
7657 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7658 		while (struct vnode* vnode = iterator.Next()) {
7659 			if (vnode->IsBusy()) {
7660 				busy = true;
7661 				break;
7662 			}
7663 
7664 			// check the vnode's ref count -- subtract additional references for
7665 			// covering
7666 			int32 refCount = vnode->ref_count;
7667 			if (vnode->covers != NULL)
7668 				refCount--;
7669 			if (vnode->covered_by != NULL)
7670 				refCount--;
7671 
7672 			if (refCount != 0) {
7673 				// there are still vnodes in use on this mount, so we cannot
7674 				// unmount yet
7675 				busy = true;
7676 				break;
7677 			}
7678 		}
7679 
7680 		if (!busy)
7681 			break;
7682 
7683 		if ((flags & B_FORCE_UNMOUNT) == 0)
7684 			return B_BUSY;
7685 
7686 		if (disconnectedDescriptors) {
7687 			// wait a bit until the last access is finished, and then try again
7688 			vnodesWriteLocker.Unlock();
7689 			snooze(100000);
7690 			// TODO: if there is some kind of bug that prevents the ref counts
7691 			// from getting back to zero, this will fall into an endless loop...
7692 			vnodesWriteLocker.Lock();
7693 			continue;
7694 		}
7695 
7696 		// the file system is still busy - but we're forced to unmount it,
7697 		// so let's disconnect all open file descriptors
7698 
7699 		mount->unmounting = true;
7700 			// prevent new vnodes from being created
7701 
7702 		vnodesWriteLocker.Unlock();
7703 
7704 		disconnect_mount_or_vnode_fds(mount, NULL);
7705 		disconnectedDescriptors = true;
7706 
7707 		vnodesWriteLocker.Lock();
7708 	}
7709 
7710 	// We can safely continue. Mark all of the vnodes busy and this mount
7711 	// structure in unmounting state. Also undo the vnode covers/covered_by
7712 	// links.
7713 	mount->unmounting = true;
7714 
7715 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7716 	while (struct vnode* vnode = iterator.Next()) {
7717 		// Remove all covers/covered_by links from other mounts' nodes to this
7718 		// vnode and adjust the node ref count accordingly. We will release the
7719 		// references to the external vnodes below.
7720 		if (Vnode* coveredNode = vnode->covers) {
7721 			if (Vnode* coveringNode = vnode->covered_by) {
7722 				// We have both covered and covering vnodes, so just remove us
7723 				// from the chain.
7724 				coveredNode->covered_by = coveringNode;
7725 				coveringNode->covers = coveredNode;
7726 				vnode->ref_count -= 2;
7727 
7728 				vnode->covered_by = NULL;
7729 				vnode->covers = NULL;
7730 				vnode->SetCovering(false);
7731 				vnode->SetCovered(false);
7732 			} else {
7733 				// We only have a covered vnode. Remove its link to us.
7734 				coveredNode->covered_by = NULL;
7735 				coveredNode->SetCovered(false);
7736 				vnode->ref_count--;
7737 
7738 				// If the other node is an external vnode, we keep its link
7739 				// link around so we can put the reference later on. Otherwise
7740 				// we get rid of it right now.
7741 				if (coveredNode->mount == mount) {
7742 					vnode->covers = NULL;
7743 					coveredNode->ref_count--;
7744 				}
7745 			}
7746 		} else if (Vnode* coveringNode = vnode->covered_by) {
7747 			// We only have a covering vnode. Remove its link to us.
7748 			coveringNode->covers = NULL;
7749 			coveringNode->SetCovering(false);
7750 			vnode->ref_count--;
7751 
7752 			// If the other node is an external vnode, we keep its link
7753 			// link around so we can put the reference later on. Otherwise
7754 			// we get rid of it right now.
7755 			if (coveringNode->mount == mount) {
7756 				vnode->covered_by = NULL;
7757 				coveringNode->ref_count--;
7758 			}
7759 		}
7760 
7761 		vnode->SetBusy(true);
7762 		vnode_to_be_freed(vnode);
7763 	}
7764 
7765 	vnodesWriteLocker.Unlock();
7766 
7767 	// Free all vnodes associated with this mount.
7768 	// They will be removed from the mount list by free_vnode(), so
7769 	// we don't have to do this.
7770 	while (struct vnode* vnode = mount->vnodes.Head()) {
7771 		// Put the references to external covered/covering vnodes we kept above.
7772 		if (Vnode* coveredNode = vnode->covers)
7773 			put_vnode(coveredNode);
7774 		if (Vnode* coveringNode = vnode->covered_by)
7775 			put_vnode(coveringNode);
7776 
7777 		free_vnode(vnode, false);
7778 	}
7779 
7780 	// remove the mount structure from the hash table
7781 	mutex_lock(&sMountMutex);
7782 	sMountsTable->Remove(mount);
7783 	mutex_unlock(&sMountMutex);
7784 
7785 	mountOpLocker.Unlock();
7786 
7787 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7788 	notify_unmount(mount->id);
7789 
7790 	// dereference the partition and mark it unmounted
7791 	if (partition) {
7792 		partition->SetVolumeID(-1);
7793 		partition->SetMountCookie(NULL);
7794 
7795 		if (mount->owns_file_device)
7796 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7797 		partition->Unregister();
7798 	}
7799 
7800 	delete mount;
7801 	return B_OK;
7802 }
7803 
7804 
7805 static status_t
7806 fs_sync(dev_t device)
7807 {
7808 	struct fs_mount* mount;
7809 	status_t status = get_mount(device, &mount);
7810 	if (status != B_OK)
7811 		return status;
7812 
7813 	struct vnode marker;
7814 	memset(&marker, 0, sizeof(marker));
7815 	marker.SetBusy(true);
7816 	marker.SetRemoved(true);
7817 
7818 	// First, synchronize all file caches
7819 
7820 	while (true) {
7821 		WriteLocker locker(sVnodeLock);
7822 			// Note: That's the easy way. Which is probably OK for sync(),
7823 			// since it's a relatively rare call and doesn't need to allow for
7824 			// a lot of concurrency. Using a read lock would be possible, but
7825 			// also more involved, since we had to lock the individual nodes
7826 			// and take care of the locking order, which we might not want to
7827 			// do while holding fs_mount::rlock.
7828 
7829 		// synchronize access to vnode list
7830 		recursive_lock_lock(&mount->rlock);
7831 
7832 		struct vnode* vnode;
7833 		if (!marker.IsRemoved()) {
7834 			vnode = mount->vnodes.GetNext(&marker);
7835 			mount->vnodes.Remove(&marker);
7836 			marker.SetRemoved(true);
7837 		} else
7838 			vnode = mount->vnodes.First();
7839 
7840 		while (vnode != NULL && (vnode->cache == NULL
7841 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7842 			// TODO: we could track writes (and writable mapped vnodes)
7843 			//	and have a simple flag that we could test for here
7844 			vnode = mount->vnodes.GetNext(vnode);
7845 		}
7846 
7847 		if (vnode != NULL) {
7848 			// insert marker vnode again
7849 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7850 			marker.SetRemoved(false);
7851 		}
7852 
7853 		recursive_lock_unlock(&mount->rlock);
7854 
7855 		if (vnode == NULL)
7856 			break;
7857 
7858 		vnode = lookup_vnode(mount->id, vnode->id);
7859 		if (vnode == NULL || vnode->IsBusy())
7860 			continue;
7861 
7862 		if (vnode->ref_count == 0) {
7863 			// this vnode has been unused before
7864 			vnode_used(vnode);
7865 		}
7866 		inc_vnode_ref_count(vnode);
7867 
7868 		locker.Unlock();
7869 
7870 		if (vnode->cache != NULL && !vnode->IsRemoved())
7871 			vnode->cache->WriteModified();
7872 
7873 		put_vnode(vnode);
7874 	}
7875 
7876 	// And then, let the file systems do their synchronizing work
7877 
7878 	if (HAS_FS_MOUNT_CALL(mount, sync))
7879 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7880 
7881 	put_mount(mount);
7882 	return status;
7883 }
7884 
7885 
7886 static status_t
7887 fs_read_info(dev_t device, struct fs_info* info)
7888 {
7889 	struct fs_mount* mount;
7890 	status_t status = get_mount(device, &mount);
7891 	if (status != B_OK)
7892 		return status;
7893 
7894 	memset(info, 0, sizeof(struct fs_info));
7895 
7896 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7897 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7898 
7899 	// fill in info the file system doesn't (have to) know about
7900 	if (status == B_OK) {
7901 		info->dev = mount->id;
7902 		info->root = mount->root_vnode->id;
7903 
7904 		fs_volume* volume = mount->volume;
7905 		while (volume->super_volume != NULL)
7906 			volume = volume->super_volume;
7907 
7908 		strlcpy(info->fsh_name, volume->file_system_name,
7909 			sizeof(info->fsh_name));
7910 		if (mount->device_name != NULL) {
7911 			strlcpy(info->device_name, mount->device_name,
7912 				sizeof(info->device_name));
7913 		}
7914 	}
7915 
7916 	// if the call is not supported by the file system, there are still
7917 	// the parts that we filled out ourselves
7918 
7919 	put_mount(mount);
7920 	return status;
7921 }
7922 
7923 
7924 static status_t
7925 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7926 {
7927 	struct fs_mount* mount;
7928 	status_t status = get_mount(device, &mount);
7929 	if (status != B_OK)
7930 		return status;
7931 
7932 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7933 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7934 	else
7935 		status = B_READ_ONLY_DEVICE;
7936 
7937 	put_mount(mount);
7938 	return status;
7939 }
7940 
7941 
7942 static dev_t
7943 fs_next_device(int32* _cookie)
7944 {
7945 	struct fs_mount* mount = NULL;
7946 	dev_t device = *_cookie;
7947 
7948 	mutex_lock(&sMountMutex);
7949 
7950 	// Since device IDs are assigned sequentially, this algorithm
7951 	// does work good enough. It makes sure that the device list
7952 	// returned is sorted, and that no device is skipped when an
7953 	// already visited device got unmounted.
7954 
7955 	while (device < sNextMountID) {
7956 		mount = find_mount(device++);
7957 		if (mount != NULL && mount->volume->private_volume != NULL)
7958 			break;
7959 	}
7960 
7961 	*_cookie = device;
7962 
7963 	if (mount != NULL)
7964 		device = mount->id;
7965 	else
7966 		device = B_BAD_VALUE;
7967 
7968 	mutex_unlock(&sMountMutex);
7969 
7970 	return device;
7971 }
7972 
7973 
7974 ssize_t
7975 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7976 	void *buffer, size_t readBytes)
7977 {
7978 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7979 	if (attrFD < 0)
7980 		return attrFD;
7981 
7982 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7983 
7984 	_kern_close(attrFD);
7985 
7986 	return bytesRead;
7987 }
7988 
7989 
7990 static status_t
7991 get_cwd(char* buffer, size_t size, bool kernel)
7992 {
7993 	// Get current working directory from io context
7994 	struct io_context* context = get_current_io_context(kernel);
7995 	status_t status;
7996 
7997 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7998 
7999 	mutex_lock(&context->io_mutex);
8000 
8001 	struct vnode* vnode = context->cwd;
8002 	if (vnode)
8003 		inc_vnode_ref_count(vnode);
8004 
8005 	mutex_unlock(&context->io_mutex);
8006 
8007 	if (vnode) {
8008 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8009 		put_vnode(vnode);
8010 	} else
8011 		status = B_ERROR;
8012 
8013 	return status;
8014 }
8015 
8016 
8017 static status_t
8018 set_cwd(int fd, char* path, bool kernel)
8019 {
8020 	struct io_context* context;
8021 	struct vnode* vnode = NULL;
8022 	struct vnode* oldDirectory;
8023 	status_t status;
8024 
8025 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8026 
8027 	// Get vnode for passed path, and bail if it failed
8028 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8029 	if (status < 0)
8030 		return status;
8031 
8032 	if (!S_ISDIR(vnode->Type())) {
8033 		// nope, can't cwd to here
8034 		status = B_NOT_A_DIRECTORY;
8035 		goto err;
8036 	}
8037 
8038 	// We need to have the permission to enter the directory, too
8039 	if (HAS_FS_CALL(vnode, access)) {
8040 		status = FS_CALL(vnode, access, X_OK);
8041 		if (status != B_OK)
8042 			goto err;
8043 	}
8044 
8045 	// Get current io context and lock
8046 	context = get_current_io_context(kernel);
8047 	mutex_lock(&context->io_mutex);
8048 
8049 	// save the old current working directory first
8050 	oldDirectory = context->cwd;
8051 	context->cwd = vnode;
8052 
8053 	mutex_unlock(&context->io_mutex);
8054 
8055 	if (oldDirectory)
8056 		put_vnode(oldDirectory);
8057 
8058 	return B_NO_ERROR;
8059 
8060 err:
8061 	put_vnode(vnode);
8062 	return status;
8063 }
8064 
8065 
8066 //	#pragma mark - kernel mirrored syscalls
8067 
8068 
8069 dev_t
8070 _kern_mount(const char* path, const char* device, const char* fsName,
8071 	uint32 flags, const char* args, size_t argsLength)
8072 {
8073 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8074 	if (pathBuffer.InitCheck() != B_OK)
8075 		return B_NO_MEMORY;
8076 
8077 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8078 }
8079 
8080 
8081 status_t
8082 _kern_unmount(const char* path, uint32 flags)
8083 {
8084 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8085 	if (pathBuffer.InitCheck() != B_OK)
8086 		return B_NO_MEMORY;
8087 
8088 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8089 }
8090 
8091 
8092 status_t
8093 _kern_read_fs_info(dev_t device, struct fs_info* info)
8094 {
8095 	if (info == NULL)
8096 		return B_BAD_VALUE;
8097 
8098 	return fs_read_info(device, info);
8099 }
8100 
8101 
8102 status_t
8103 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8104 {
8105 	if (info == NULL)
8106 		return B_BAD_VALUE;
8107 
8108 	return fs_write_info(device, info, mask);
8109 }
8110 
8111 
8112 status_t
8113 _kern_sync(void)
8114 {
8115 	// Note: _kern_sync() is also called from _user_sync()
8116 	int32 cookie = 0;
8117 	dev_t device;
8118 	while ((device = next_dev(&cookie)) >= 0) {
8119 		status_t status = fs_sync(device);
8120 		if (status != B_OK && status != B_BAD_VALUE) {
8121 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8122 				strerror(status));
8123 		}
8124 	}
8125 
8126 	return B_OK;
8127 }
8128 
8129 
8130 dev_t
8131 _kern_next_device(int32* _cookie)
8132 {
8133 	return fs_next_device(_cookie);
8134 }
8135 
8136 
8137 status_t
8138 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8139 	size_t infoSize)
8140 {
8141 	if (infoSize != sizeof(fd_info))
8142 		return B_BAD_VALUE;
8143 
8144 	// get the team
8145 	Team* team = Team::Get(teamID);
8146 	if (team == NULL)
8147 		return B_BAD_TEAM_ID;
8148 	BReference<Team> teamReference(team, true);
8149 
8150 	// now that we have a team reference, its I/O context won't go away
8151 	io_context* context = team->io_context;
8152 	MutexLocker contextLocker(context->io_mutex);
8153 
8154 	uint32 slot = *_cookie;
8155 
8156 	struct file_descriptor* descriptor;
8157 	while (slot < context->table_size
8158 		&& (descriptor = context->fds[slot]) == NULL) {
8159 		slot++;
8160 	}
8161 
8162 	if (slot >= context->table_size)
8163 		return B_ENTRY_NOT_FOUND;
8164 
8165 	info->number = slot;
8166 	info->open_mode = descriptor->open_mode;
8167 
8168 	struct vnode* vnode = fd_vnode(descriptor);
8169 	if (vnode != NULL) {
8170 		info->device = vnode->device;
8171 		info->node = vnode->id;
8172 	} else if (descriptor->u.mount != NULL) {
8173 		info->device = descriptor->u.mount->id;
8174 		info->node = -1;
8175 	}
8176 
8177 	*_cookie = slot + 1;
8178 	return B_OK;
8179 }
8180 
8181 
8182 int
8183 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8184 	int perms)
8185 {
8186 	if ((openMode & O_CREAT) != 0) {
8187 		return file_create_entry_ref(device, inode, name, openMode, perms,
8188 			true);
8189 	}
8190 
8191 	return file_open_entry_ref(device, inode, name, openMode, true);
8192 }
8193 
8194 
8195 /*!	\brief Opens a node specified by a FD + path pair.
8196 
8197 	At least one of \a fd and \a path must be specified.
8198 	If only \a fd is given, the function opens the node identified by this
8199 	FD. If only a path is given, this path is opened. If both are given and
8200 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8201 	of the directory (!) identified by \a fd.
8202 
8203 	\param fd The FD. May be < 0.
8204 	\param path The absolute or relative path. May be \c NULL.
8205 	\param openMode The open mode.
8206 	\return A FD referring to the newly opened node, or an error code,
8207 			if an error occurs.
8208 */
8209 int
8210 _kern_open(int fd, const char* path, int openMode, int perms)
8211 {
8212 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8213 	if (pathBuffer.InitCheck() != B_OK)
8214 		return B_NO_MEMORY;
8215 
8216 	if ((openMode & O_CREAT) != 0)
8217 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8218 
8219 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8220 }
8221 
8222 
8223 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8224 
8225 	The supplied name may be \c NULL, in which case directory identified
8226 	by \a device and \a inode will be opened. Otherwise \a device and
8227 	\a inode identify the parent directory of the directory to be opened
8228 	and \a name its entry name.
8229 
8230 	\param device If \a name is specified the ID of the device the parent
8231 		   directory of the directory to be opened resides on, otherwise
8232 		   the device of the directory itself.
8233 	\param inode If \a name is specified the node ID of the parent
8234 		   directory of the directory to be opened, otherwise node ID of the
8235 		   directory itself.
8236 	\param name The entry name of the directory to be opened. If \c NULL,
8237 		   the \a device + \a inode pair identify the node to be opened.
8238 	\return The FD of the newly opened directory or an error code, if
8239 			something went wrong.
8240 */
8241 int
8242 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8243 {
8244 	return dir_open_entry_ref(device, inode, name, true);
8245 }
8246 
8247 
8248 /*!	\brief Opens a directory specified by a FD + path pair.
8249 
8250 	At least one of \a fd and \a path must be specified.
8251 	If only \a fd is given, the function opens the directory identified by this
8252 	FD. If only a path is given, this path is opened. If both are given and
8253 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8254 	of the directory (!) identified by \a fd.
8255 
8256 	\param fd The FD. May be < 0.
8257 	\param path The absolute or relative path. May be \c NULL.
8258 	\return A FD referring to the newly opened directory, or an error code,
8259 			if an error occurs.
8260 */
8261 int
8262 _kern_open_dir(int fd, const char* path)
8263 {
8264 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8265 	if (pathBuffer.InitCheck() != B_OK)
8266 		return B_NO_MEMORY;
8267 
8268 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8269 }
8270 
8271 
8272 status_t
8273 _kern_fcntl(int fd, int op, size_t argument)
8274 {
8275 	return common_fcntl(fd, op, argument, true);
8276 }
8277 
8278 
8279 status_t
8280 _kern_fsync(int fd)
8281 {
8282 	return common_sync(fd, true);
8283 }
8284 
8285 
8286 status_t
8287 _kern_lock_node(int fd)
8288 {
8289 	return common_lock_node(fd, true);
8290 }
8291 
8292 
8293 status_t
8294 _kern_unlock_node(int fd)
8295 {
8296 	return common_unlock_node(fd, true);
8297 }
8298 
8299 
8300 status_t
8301 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8302 	int perms)
8303 {
8304 	return dir_create_entry_ref(device, inode, name, perms, true);
8305 }
8306 
8307 
8308 /*!	\brief Creates a directory specified by a FD + path pair.
8309 
8310 	\a path must always be specified (it contains the name of the new directory
8311 	at least). If only a path is given, this path identifies the location at
8312 	which the directory shall be created. If both \a fd and \a path are given
8313 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8314 	of the directory (!) identified by \a fd.
8315 
8316 	\param fd The FD. May be < 0.
8317 	\param path The absolute or relative path. Must not be \c NULL.
8318 	\param perms The access permissions the new directory shall have.
8319 	\return \c B_OK, if the directory has been created successfully, another
8320 			error code otherwise.
8321 */
8322 status_t
8323 _kern_create_dir(int fd, const char* path, int perms)
8324 {
8325 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8326 	if (pathBuffer.InitCheck() != B_OK)
8327 		return B_NO_MEMORY;
8328 
8329 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8330 }
8331 
8332 
8333 status_t
8334 _kern_remove_dir(int fd, const char* path)
8335 {
8336 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8337 	if (pathBuffer.InitCheck() != B_OK)
8338 		return B_NO_MEMORY;
8339 
8340 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8341 }
8342 
8343 
8344 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8345 
8346 	At least one of \a fd and \a path must be specified.
8347 	If only \a fd is given, the function the symlink to be read is the node
8348 	identified by this FD. If only a path is given, this path identifies the
8349 	symlink to be read. If both are given and the path is absolute, \a fd is
8350 	ignored; a relative path is reckoned off of the directory (!) identified
8351 	by \a fd.
8352 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8353 	will still be updated to reflect the required buffer size.
8354 
8355 	\param fd The FD. May be < 0.
8356 	\param path The absolute or relative path. May be \c NULL.
8357 	\param buffer The buffer into which the contents of the symlink shall be
8358 		   written.
8359 	\param _bufferSize A pointer to the size of the supplied buffer.
8360 	\return The length of the link on success or an appropriate error code
8361 */
8362 status_t
8363 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8364 {
8365 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8366 	if (pathBuffer.InitCheck() != B_OK)
8367 		return B_NO_MEMORY;
8368 
8369 	return common_read_link(fd, pathBuffer.LockBuffer(),
8370 		buffer, _bufferSize, true);
8371 }
8372 
8373 
8374 /*!	\brief Creates a symlink specified by a FD + path pair.
8375 
8376 	\a path must always be specified (it contains the name of the new symlink
8377 	at least). If only a path is given, this path identifies the location at
8378 	which the symlink shall be created. If both \a fd and \a path are given and
8379 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8380 	of the directory (!) identified by \a fd.
8381 
8382 	\param fd The FD. May be < 0.
8383 	\param toPath The absolute or relative path. Must not be \c NULL.
8384 	\param mode The access permissions the new symlink shall have.
8385 	\return \c B_OK, if the symlink has been created successfully, another
8386 			error code otherwise.
8387 */
8388 status_t
8389 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8390 {
8391 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8392 	if (pathBuffer.InitCheck() != B_OK)
8393 		return B_NO_MEMORY;
8394 
8395 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8396 		toPath, mode, true);
8397 }
8398 
8399 
8400 status_t
8401 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8402 	bool traverseLeafLink)
8403 {
8404 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8405 	KPath toPathBuffer(toPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8406 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8407 		return B_NO_MEMORY;
8408 
8409 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8410 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8411 }
8412 
8413 
8414 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8415 
8416 	\a path must always be specified (it contains at least the name of the entry
8417 	to be deleted). If only a path is given, this path identifies the entry
8418 	directly. If both \a fd and \a path are given and the path is absolute,
8419 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8420 	identified by \a fd.
8421 
8422 	\param fd The FD. May be < 0.
8423 	\param path The absolute or relative path. Must not be \c NULL.
8424 	\return \c B_OK, if the entry has been removed successfully, another
8425 			error code otherwise.
8426 */
8427 status_t
8428 _kern_unlink(int fd, const char* path)
8429 {
8430 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8431 	if (pathBuffer.InitCheck() != B_OK)
8432 		return B_NO_MEMORY;
8433 
8434 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8435 }
8436 
8437 
8438 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8439 		   by another FD + path pair.
8440 
8441 	\a oldPath and \a newPath must always be specified (they contain at least
8442 	the name of the entry). If only a path is given, this path identifies the
8443 	entry directly. If both a FD and a path are given and the path is absolute,
8444 	the FD is ignored; a relative path is reckoned off of the directory (!)
8445 	identified by the respective FD.
8446 
8447 	\param oldFD The FD of the old location. May be < 0.
8448 	\param oldPath The absolute or relative path of the old location. Must not
8449 		   be \c NULL.
8450 	\param newFD The FD of the new location. May be < 0.
8451 	\param newPath The absolute or relative path of the new location. Must not
8452 		   be \c NULL.
8453 	\return \c B_OK, if the entry has been moved successfully, another
8454 			error code otherwise.
8455 */
8456 status_t
8457 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8458 {
8459 	KPath oldPathBuffer(oldPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8460 	KPath newPathBuffer(newPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8461 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8462 		return B_NO_MEMORY;
8463 
8464 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8465 		newFD, newPathBuffer.LockBuffer(), true);
8466 }
8467 
8468 
8469 status_t
8470 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8471 {
8472 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8473 	if (pathBuffer.InitCheck() != B_OK)
8474 		return B_NO_MEMORY;
8475 
8476 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8477 		true);
8478 }
8479 
8480 
8481 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8482 
8483 	If only \a fd is given, the stat operation associated with the type
8484 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8485 	given, this path identifies the entry for whose node to retrieve the
8486 	stat data. If both \a fd and \a path are given and the path is absolute,
8487 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8488 	identified by \a fd and specifies the entry whose stat data shall be
8489 	retrieved.
8490 
8491 	\param fd The FD. May be < 0.
8492 	\param path The absolute or relative path. Must not be \c NULL.
8493 	\param traverseLeafLink If \a path is given, \c true specifies that the
8494 		   function shall not stick to symlinks, but traverse them.
8495 	\param stat The buffer the stat data shall be written into.
8496 	\param statSize The size of the supplied stat buffer.
8497 	\return \c B_OK, if the the stat data have been read successfully, another
8498 			error code otherwise.
8499 */
8500 status_t
8501 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8502 	struct stat* stat, size_t statSize)
8503 {
8504 	struct stat completeStat;
8505 	struct stat* originalStat = NULL;
8506 	status_t status;
8507 
8508 	if (statSize > sizeof(struct stat))
8509 		return B_BAD_VALUE;
8510 
8511 	// this supports different stat extensions
8512 	if (statSize < sizeof(struct stat)) {
8513 		originalStat = stat;
8514 		stat = &completeStat;
8515 	}
8516 
8517 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8518 
8519 	if (status == B_OK && originalStat != NULL)
8520 		memcpy(originalStat, stat, statSize);
8521 
8522 	return status;
8523 }
8524 
8525 
8526 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8527 
8528 	If only \a fd is given, the stat operation associated with the type
8529 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8530 	given, this path identifies the entry for whose node to write the
8531 	stat data. If both \a fd and \a path are given and the path is absolute,
8532 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8533 	identified by \a fd and specifies the entry whose stat data shall be
8534 	written.
8535 
8536 	\param fd The FD. May be < 0.
8537 	\param path The absolute or relative path. May be \c NULL.
8538 	\param traverseLeafLink If \a path is given, \c true specifies that the
8539 		   function shall not stick to symlinks, but traverse them.
8540 	\param stat The buffer containing the stat data to be written.
8541 	\param statSize The size of the supplied stat buffer.
8542 	\param statMask A mask specifying which parts of the stat data shall be
8543 		   written.
8544 	\return \c B_OK, if the the stat data have been written successfully,
8545 			another error code otherwise.
8546 */
8547 status_t
8548 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8549 	const struct stat* stat, size_t statSize, int statMask)
8550 {
8551 	struct stat completeStat;
8552 
8553 	if (statSize > sizeof(struct stat))
8554 		return B_BAD_VALUE;
8555 
8556 	// this supports different stat extensions
8557 	if (statSize < sizeof(struct stat)) {
8558 		memset((uint8*)&completeStat + statSize, 0,
8559 			sizeof(struct stat) - statSize);
8560 		memcpy(&completeStat, stat, statSize);
8561 		stat = &completeStat;
8562 	}
8563 
8564 	status_t status;
8565 
8566 	if (path != NULL) {
8567 		// path given: write the stat of the node referred to by (fd, path)
8568 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8569 		if (pathBuffer.InitCheck() != B_OK)
8570 			return B_NO_MEMORY;
8571 
8572 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8573 			traverseLeafLink, stat, statMask, true);
8574 	} else {
8575 		// no path given: get the FD and use the FD operation
8576 		struct file_descriptor* descriptor
8577 			= get_fd(get_current_io_context(true), fd);
8578 		if (descriptor == NULL)
8579 			return B_FILE_ERROR;
8580 
8581 		if (descriptor->ops->fd_write_stat)
8582 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8583 		else
8584 			status = B_UNSUPPORTED;
8585 
8586 		put_fd(descriptor);
8587 	}
8588 
8589 	return status;
8590 }
8591 
8592 
8593 int
8594 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8595 {
8596 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8597 	if (pathBuffer.InitCheck() != B_OK)
8598 		return B_NO_MEMORY;
8599 
8600 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8601 }
8602 
8603 
8604 int
8605 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8606 	int openMode)
8607 {
8608 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8609 	if (pathBuffer.InitCheck() != B_OK)
8610 		return B_NO_MEMORY;
8611 
8612 	if ((openMode & O_CREAT) != 0) {
8613 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8614 			true);
8615 	}
8616 
8617 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8618 }
8619 
8620 
8621 status_t
8622 _kern_remove_attr(int fd, const char* name)
8623 {
8624 	return attr_remove(fd, name, true);
8625 }
8626 
8627 
8628 status_t
8629 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8630 	const char* toName)
8631 {
8632 	return attr_rename(fromFile, fromName, toFile, toName, true);
8633 }
8634 
8635 
8636 int
8637 _kern_open_index_dir(dev_t device)
8638 {
8639 	return index_dir_open(device, true);
8640 }
8641 
8642 
8643 status_t
8644 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8645 {
8646 	return index_create(device, name, type, flags, true);
8647 }
8648 
8649 
8650 status_t
8651 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8652 {
8653 	return index_name_read_stat(device, name, stat, true);
8654 }
8655 
8656 
8657 status_t
8658 _kern_remove_index(dev_t device, const char* name)
8659 {
8660 	return index_remove(device, name, true);
8661 }
8662 
8663 
8664 status_t
8665 _kern_getcwd(char* buffer, size_t size)
8666 {
8667 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8668 
8669 	// Call vfs to get current working directory
8670 	return get_cwd(buffer, size, true);
8671 }
8672 
8673 
8674 status_t
8675 _kern_setcwd(int fd, const char* path)
8676 {
8677 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8678 	if (pathBuffer.InitCheck() != B_OK)
8679 		return B_NO_MEMORY;
8680 
8681 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8682 }
8683 
8684 
8685 //	#pragma mark - userland syscalls
8686 
8687 
8688 dev_t
8689 _user_mount(const char* userPath, const char* userDevice,
8690 	const char* userFileSystem, uint32 flags, const char* userArgs,
8691 	size_t argsLength)
8692 {
8693 	char fileSystem[B_FILE_NAME_LENGTH];
8694 	KPath path, device;
8695 	char* args = NULL;
8696 	status_t status;
8697 
8698 	if (!IS_USER_ADDRESS(userPath)
8699 		|| !IS_USER_ADDRESS(userFileSystem)
8700 		|| !IS_USER_ADDRESS(userDevice))
8701 		return B_BAD_ADDRESS;
8702 
8703 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8704 		return B_NO_MEMORY;
8705 
8706 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8707 		return B_BAD_ADDRESS;
8708 
8709 	if (userFileSystem != NULL
8710 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8711 		return B_BAD_ADDRESS;
8712 
8713 	if (userDevice != NULL
8714 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8715 			< B_OK)
8716 		return B_BAD_ADDRESS;
8717 
8718 	if (userArgs != NULL && argsLength > 0) {
8719 		if (!IS_USER_ADDRESS(userArgs))
8720 			return B_BAD_ADDRESS;
8721 
8722 		// this is a safety restriction
8723 		if (argsLength >= 65536)
8724 			return B_NAME_TOO_LONG;
8725 
8726 		args = (char*)malloc(argsLength + 1);
8727 		if (args == NULL)
8728 			return B_NO_MEMORY;
8729 
8730 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8731 			free(args);
8732 			return B_BAD_ADDRESS;
8733 		}
8734 	}
8735 	path.UnlockBuffer();
8736 	device.UnlockBuffer();
8737 
8738 	status = fs_mount(path.LockBuffer(),
8739 		userDevice != NULL ? device.Path() : NULL,
8740 		userFileSystem ? fileSystem : NULL, flags, args, false);
8741 
8742 	free(args);
8743 	return status;
8744 }
8745 
8746 
8747 status_t
8748 _user_unmount(const char* userPath, uint32 flags)
8749 {
8750 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8751 
8752 	if (!IS_USER_ADDRESS(userPath))
8753 		return B_BAD_ADDRESS;
8754 
8755 	if (pathBuffer.InitCheck() != B_OK)
8756 		return B_NO_MEMORY;
8757 
8758 	char* path = pathBuffer.LockBuffer();
8759 
8760 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8761 		return B_BAD_ADDRESS;
8762 
8763 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8764 }
8765 
8766 
8767 status_t
8768 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8769 {
8770 	struct fs_info info;
8771 	status_t status;
8772 
8773 	if (userInfo == NULL)
8774 		return B_BAD_VALUE;
8775 
8776 	if (!IS_USER_ADDRESS(userInfo))
8777 		return B_BAD_ADDRESS;
8778 
8779 	status = fs_read_info(device, &info);
8780 	if (status != B_OK)
8781 		return status;
8782 
8783 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8784 		return B_BAD_ADDRESS;
8785 
8786 	return B_OK;
8787 }
8788 
8789 
8790 status_t
8791 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8792 {
8793 	struct fs_info info;
8794 
8795 	if (userInfo == NULL)
8796 		return B_BAD_VALUE;
8797 
8798 	if (!IS_USER_ADDRESS(userInfo)
8799 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8800 		return B_BAD_ADDRESS;
8801 
8802 	return fs_write_info(device, &info, mask);
8803 }
8804 
8805 
8806 dev_t
8807 _user_next_device(int32* _userCookie)
8808 {
8809 	int32 cookie;
8810 	dev_t device;
8811 
8812 	if (!IS_USER_ADDRESS(_userCookie)
8813 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8814 		return B_BAD_ADDRESS;
8815 
8816 	device = fs_next_device(&cookie);
8817 
8818 	if (device >= B_OK) {
8819 		// update user cookie
8820 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8821 			return B_BAD_ADDRESS;
8822 	}
8823 
8824 	return device;
8825 }
8826 
8827 
8828 status_t
8829 _user_sync(void)
8830 {
8831 	return _kern_sync();
8832 }
8833 
8834 
8835 status_t
8836 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8837 	size_t infoSize)
8838 {
8839 	struct fd_info info;
8840 	uint32 cookie;
8841 
8842 	// only root can do this (or should root's group be enough?)
8843 	if (geteuid() != 0)
8844 		return B_NOT_ALLOWED;
8845 
8846 	if (infoSize != sizeof(fd_info))
8847 		return B_BAD_VALUE;
8848 
8849 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8850 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8851 		return B_BAD_ADDRESS;
8852 
8853 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8854 	if (status != B_OK)
8855 		return status;
8856 
8857 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8858 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8859 		return B_BAD_ADDRESS;
8860 
8861 	return status;
8862 }
8863 
8864 
8865 status_t
8866 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8867 	char* userPath, size_t pathLength)
8868 {
8869 	if (!IS_USER_ADDRESS(userPath))
8870 		return B_BAD_ADDRESS;
8871 
8872 	KPath path(B_PATH_NAME_LENGTH + 1);
8873 	if (path.InitCheck() != B_OK)
8874 		return B_NO_MEMORY;
8875 
8876 	// copy the leaf name onto the stack
8877 	char stackLeaf[B_FILE_NAME_LENGTH];
8878 	if (leaf != NULL) {
8879 		if (!IS_USER_ADDRESS(leaf))
8880 			return B_BAD_ADDRESS;
8881 
8882 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8883 		if (length < 0)
8884 			return length;
8885 		if (length >= B_FILE_NAME_LENGTH)
8886 			return B_NAME_TOO_LONG;
8887 
8888 		leaf = stackLeaf;
8889 	}
8890 
8891 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8892 		false, path.LockBuffer(), path.BufferSize());
8893 	if (status != B_OK)
8894 		return status;
8895 
8896 	path.UnlockBuffer();
8897 
8898 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8899 	if (length < 0)
8900 		return length;
8901 	if (length >= (int)pathLength)
8902 		return B_BUFFER_OVERFLOW;
8903 
8904 	return B_OK;
8905 }
8906 
8907 
8908 status_t
8909 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8910 {
8911 	if (userPath == NULL || buffer == NULL)
8912 		return B_BAD_VALUE;
8913 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8914 		return B_BAD_ADDRESS;
8915 
8916 	// copy path from userland
8917 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8918 	if (pathBuffer.InitCheck() != B_OK)
8919 		return B_NO_MEMORY;
8920 	char* path = pathBuffer.LockBuffer();
8921 
8922 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8923 		return B_BAD_ADDRESS;
8924 
8925 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8926 		false);
8927 	if (error != B_OK)
8928 		return error;
8929 
8930 	// copy back to userland
8931 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8932 	if (len < 0)
8933 		return len;
8934 	if (len >= B_PATH_NAME_LENGTH)
8935 		return B_BUFFER_OVERFLOW;
8936 
8937 	return B_OK;
8938 }
8939 
8940 
8941 int
8942 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8943 	int openMode, int perms)
8944 {
8945 	char name[B_FILE_NAME_LENGTH];
8946 
8947 	if (userName == NULL || device < 0 || inode < 0)
8948 		return B_BAD_VALUE;
8949 	if (!IS_USER_ADDRESS(userName)
8950 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8951 		return B_BAD_ADDRESS;
8952 
8953 	if ((openMode & O_CREAT) != 0) {
8954 		return file_create_entry_ref(device, inode, name, openMode, perms,
8955 			false);
8956 	}
8957 
8958 	return file_open_entry_ref(device, inode, name, openMode, false);
8959 }
8960 
8961 
8962 int
8963 _user_open(int fd, const char* userPath, int openMode, int perms)
8964 {
8965 	KPath path(B_PATH_NAME_LENGTH + 1);
8966 	if (path.InitCheck() != B_OK)
8967 		return B_NO_MEMORY;
8968 
8969 	char* buffer = path.LockBuffer();
8970 
8971 	if (!IS_USER_ADDRESS(userPath)
8972 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8973 		return B_BAD_ADDRESS;
8974 
8975 	if ((openMode & O_CREAT) != 0)
8976 		return file_create(fd, buffer, openMode, perms, false);
8977 
8978 	return file_open(fd, buffer, openMode, false);
8979 }
8980 
8981 
8982 int
8983 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8984 {
8985 	if (userName != NULL) {
8986 		char name[B_FILE_NAME_LENGTH];
8987 
8988 		if (!IS_USER_ADDRESS(userName)
8989 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8990 			return B_BAD_ADDRESS;
8991 
8992 		return dir_open_entry_ref(device, inode, name, false);
8993 	}
8994 	return dir_open_entry_ref(device, inode, NULL, false);
8995 }
8996 
8997 
8998 int
8999 _user_open_dir(int fd, const char* userPath)
9000 {
9001 	if (userPath == NULL)
9002 		return dir_open(fd, NULL, false);
9003 
9004 	KPath path(B_PATH_NAME_LENGTH + 1);
9005 	if (path.InitCheck() != B_OK)
9006 		return B_NO_MEMORY;
9007 
9008 	char* buffer = path.LockBuffer();
9009 
9010 	if (!IS_USER_ADDRESS(userPath)
9011 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
9012 		return B_BAD_ADDRESS;
9013 
9014 	return dir_open(fd, buffer, false);
9015 }
9016 
9017 
9018 /*!	\brief Opens a directory's parent directory and returns the entry name
9019 		   of the former.
9020 
9021 	Aside from that it returns the directory's entry name, this method is
9022 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9023 	equivalent, if \a userName is \c NULL.
9024 
9025 	If a name buffer is supplied and the name does not fit the buffer, the
9026 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9027 
9028 	\param fd A FD referring to a directory.
9029 	\param userName Buffer the directory's entry name shall be written into.
9030 		   May be \c NULL.
9031 	\param nameLength Size of the name buffer.
9032 	\return The file descriptor of the opened parent directory, if everything
9033 			went fine, an error code otherwise.
9034 */
9035 int
9036 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9037 {
9038 	bool kernel = false;
9039 
9040 	if (userName && !IS_USER_ADDRESS(userName))
9041 		return B_BAD_ADDRESS;
9042 
9043 	// open the parent dir
9044 	int parentFD = dir_open(fd, (char*)"..", kernel);
9045 	if (parentFD < 0)
9046 		return parentFD;
9047 	FDCloser fdCloser(parentFD, kernel);
9048 
9049 	if (userName) {
9050 		// get the vnodes
9051 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9052 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9053 		VNodePutter parentVNodePutter(parentVNode);
9054 		VNodePutter dirVNodePutter(dirVNode);
9055 		if (!parentVNode || !dirVNode)
9056 			return B_FILE_ERROR;
9057 
9058 		// get the vnode name
9059 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9060 		struct dirent* buffer = (struct dirent*)_buffer;
9061 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9062 			sizeof(_buffer), get_current_io_context(false));
9063 		if (status != B_OK)
9064 			return status;
9065 
9066 		// copy the name to the userland buffer
9067 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9068 		if (len < 0)
9069 			return len;
9070 		if (len >= (int)nameLength)
9071 			return B_BUFFER_OVERFLOW;
9072 	}
9073 
9074 	return fdCloser.Detach();
9075 }
9076 
9077 
9078 status_t
9079 _user_fcntl(int fd, int op, size_t argument)
9080 {
9081 	status_t status = common_fcntl(fd, op, argument, false);
9082 	if (op == F_SETLKW)
9083 		syscall_restart_handle_post(status);
9084 
9085 	return status;
9086 }
9087 
9088 
9089 status_t
9090 _user_fsync(int fd)
9091 {
9092 	return common_sync(fd, false);
9093 }
9094 
9095 
9096 status_t
9097 _user_flock(int fd, int operation)
9098 {
9099 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9100 
9101 	// Check if the operation is valid
9102 	switch (operation & ~LOCK_NB) {
9103 		case LOCK_UN:
9104 		case LOCK_SH:
9105 		case LOCK_EX:
9106 			break;
9107 
9108 		default:
9109 			return B_BAD_VALUE;
9110 	}
9111 
9112 	struct file_descriptor* descriptor;
9113 	struct vnode* vnode;
9114 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9115 	if (descriptor == NULL)
9116 		return B_FILE_ERROR;
9117 
9118 	if (descriptor->type != FDTYPE_FILE) {
9119 		put_fd(descriptor);
9120 		return B_BAD_VALUE;
9121 	}
9122 
9123 	struct flock flock;
9124 	flock.l_start = 0;
9125 	flock.l_len = OFF_MAX;
9126 	flock.l_whence = 0;
9127 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9128 
9129 	status_t status;
9130 	if ((operation & LOCK_UN) != 0) {
9131 		if (HAS_FS_CALL(vnode, release_lock))
9132 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9133 		else
9134 			status = release_advisory_lock(vnode, &flock);
9135 	} else {
9136 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9137 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9138 				(operation & LOCK_NB) == 0);
9139 		} else {
9140 			status = acquire_advisory_lock(vnode,
9141 				thread_get_current_thread()->team->session_id, &flock,
9142 				(operation & LOCK_NB) == 0);
9143 		}
9144 	}
9145 
9146 	syscall_restart_handle_post(status);
9147 
9148 	put_fd(descriptor);
9149 	return status;
9150 }
9151 
9152 
9153 status_t
9154 _user_lock_node(int fd)
9155 {
9156 	return common_lock_node(fd, false);
9157 }
9158 
9159 
9160 status_t
9161 _user_unlock_node(int fd)
9162 {
9163 	return common_unlock_node(fd, false);
9164 }
9165 
9166 
9167 status_t
9168 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9169 	int perms)
9170 {
9171 	char name[B_FILE_NAME_LENGTH];
9172 	status_t status;
9173 
9174 	if (!IS_USER_ADDRESS(userName))
9175 		return B_BAD_ADDRESS;
9176 
9177 	status = user_strlcpy(name, userName, sizeof(name));
9178 	if (status < 0)
9179 		return status;
9180 
9181 	return dir_create_entry_ref(device, inode, name, perms, false);
9182 }
9183 
9184 
9185 status_t
9186 _user_create_dir(int fd, const char* userPath, int perms)
9187 {
9188 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9189 	if (pathBuffer.InitCheck() != B_OK)
9190 		return B_NO_MEMORY;
9191 
9192 	char* path = pathBuffer.LockBuffer();
9193 
9194 	if (!IS_USER_ADDRESS(userPath)
9195 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9196 		return B_BAD_ADDRESS;
9197 
9198 	return dir_create(fd, path, perms, false);
9199 }
9200 
9201 
9202 status_t
9203 _user_remove_dir(int fd, const char* userPath)
9204 {
9205 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9206 	if (pathBuffer.InitCheck() != B_OK)
9207 		return B_NO_MEMORY;
9208 
9209 	char* path = pathBuffer.LockBuffer();
9210 
9211 	if (userPath != NULL) {
9212 		if (!IS_USER_ADDRESS(userPath)
9213 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9214 			return B_BAD_ADDRESS;
9215 	}
9216 
9217 	return dir_remove(fd, userPath ? path : NULL, false);
9218 }
9219 
9220 
9221 status_t
9222 _user_read_link(int fd, const char* userPath, char* userBuffer,
9223 	size_t* userBufferSize)
9224 {
9225 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9226 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9227 		return B_NO_MEMORY;
9228 
9229 	size_t bufferSize;
9230 
9231 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9232 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9233 		return B_BAD_ADDRESS;
9234 
9235 	char* path = pathBuffer.LockBuffer();
9236 	char* buffer = linkBuffer.LockBuffer();
9237 
9238 	if (userPath) {
9239 		if (!IS_USER_ADDRESS(userPath)
9240 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9241 			return B_BAD_ADDRESS;
9242 
9243 		if (bufferSize > B_PATH_NAME_LENGTH)
9244 			bufferSize = B_PATH_NAME_LENGTH;
9245 	}
9246 
9247 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9248 		&bufferSize, false);
9249 
9250 	// we also update the bufferSize in case of errors
9251 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9252 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9253 		return B_BAD_ADDRESS;
9254 
9255 	if (status != B_OK)
9256 		return status;
9257 
9258 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9259 		return B_BAD_ADDRESS;
9260 
9261 	return B_OK;
9262 }
9263 
9264 
9265 status_t
9266 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9267 	int mode)
9268 {
9269 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9270 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9271 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9272 		return B_NO_MEMORY;
9273 
9274 	char* path = pathBuffer.LockBuffer();
9275 	char* toPath = toPathBuffer.LockBuffer();
9276 
9277 	if (!IS_USER_ADDRESS(userPath)
9278 		|| !IS_USER_ADDRESS(userToPath)
9279 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9280 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9281 		return B_BAD_ADDRESS;
9282 
9283 	return common_create_symlink(fd, path, toPath, mode, false);
9284 }
9285 
9286 
9287 status_t
9288 _user_create_link(int pathFD, const char* userPath, int toFD,
9289 	const char* userToPath, bool traverseLeafLink)
9290 {
9291 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9292 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9293 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9294 		return B_NO_MEMORY;
9295 
9296 	char* path = pathBuffer.LockBuffer();
9297 	char* toPath = toPathBuffer.LockBuffer();
9298 
9299 	if (!IS_USER_ADDRESS(userPath)
9300 		|| !IS_USER_ADDRESS(userToPath)
9301 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9302 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9303 		return B_BAD_ADDRESS;
9304 
9305 	status_t status = check_path(toPath);
9306 	if (status != B_OK)
9307 		return status;
9308 
9309 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9310 		false);
9311 }
9312 
9313 
9314 status_t
9315 _user_unlink(int fd, const char* userPath)
9316 {
9317 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9318 	if (pathBuffer.InitCheck() != B_OK)
9319 		return B_NO_MEMORY;
9320 
9321 	char* path = pathBuffer.LockBuffer();
9322 
9323 	if (!IS_USER_ADDRESS(userPath)
9324 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9325 		return B_BAD_ADDRESS;
9326 
9327 	return common_unlink(fd, path, false);
9328 }
9329 
9330 
9331 status_t
9332 _user_rename(int oldFD, const char* userOldPath, int newFD,
9333 	const char* userNewPath)
9334 {
9335 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9336 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9337 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9338 		return B_NO_MEMORY;
9339 
9340 	char* oldPath = oldPathBuffer.LockBuffer();
9341 	char* newPath = newPathBuffer.LockBuffer();
9342 
9343 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9344 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9345 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9346 		return B_BAD_ADDRESS;
9347 
9348 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9349 }
9350 
9351 
9352 status_t
9353 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9354 {
9355 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9356 	if (pathBuffer.InitCheck() != B_OK)
9357 		return B_NO_MEMORY;
9358 
9359 	char* path = pathBuffer.LockBuffer();
9360 
9361 	if (!IS_USER_ADDRESS(userPath)
9362 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9363 		return B_BAD_ADDRESS;
9364 	}
9365 
9366 	// split into directory vnode and filename path
9367 	char filename[B_FILE_NAME_LENGTH];
9368 	struct vnode* dir;
9369 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9370 	if (status != B_OK)
9371 		return status;
9372 
9373 	VNodePutter _(dir);
9374 
9375 	// the underlying FS needs to support creating FIFOs
9376 	if (!HAS_FS_CALL(dir, create_special_node))
9377 		return B_UNSUPPORTED;
9378 
9379 	// create the entry	-- the FIFO sub node is set up automatically
9380 	fs_vnode superVnode;
9381 	ino_t nodeID;
9382 	status = FS_CALL(dir, create_special_node, filename, NULL,
9383 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9384 
9385 	// create_special_node() acquired a reference for us that we don't need.
9386 	if (status == B_OK)
9387 		put_vnode(dir->mount->volume, nodeID);
9388 
9389 	return status;
9390 }
9391 
9392 
9393 status_t
9394 _user_create_pipe(int* userFDs)
9395 {
9396 	// rootfs should support creating FIFOs, but let's be sure
9397 	if (!HAS_FS_CALL(sRoot, create_special_node))
9398 		return B_UNSUPPORTED;
9399 
9400 	// create the node	-- the FIFO sub node is set up automatically
9401 	fs_vnode superVnode;
9402 	ino_t nodeID;
9403 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9404 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9405 	if (status != B_OK)
9406 		return status;
9407 
9408 	// We've got one reference to the node and need another one.
9409 	struct vnode* vnode;
9410 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9411 	if (status != B_OK) {
9412 		// that should not happen
9413 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9414 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9415 		return status;
9416 	}
9417 
9418 	// Everything looks good so far. Open two FDs for reading respectively
9419 	// writing.
9420 	int fds[2];
9421 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9422 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9423 
9424 	FDCloser closer0(fds[0], false);
9425 	FDCloser closer1(fds[1], false);
9426 
9427 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9428 
9429 	// copy FDs to userland
9430 	if (status == B_OK) {
9431 		if (!IS_USER_ADDRESS(userFDs)
9432 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9433 			status = B_BAD_ADDRESS;
9434 		}
9435 	}
9436 
9437 	// keep FDs, if everything went fine
9438 	if (status == B_OK) {
9439 		closer0.Detach();
9440 		closer1.Detach();
9441 	}
9442 
9443 	return status;
9444 }
9445 
9446 
9447 status_t
9448 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9449 {
9450 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9451 	if (pathBuffer.InitCheck() != B_OK)
9452 		return B_NO_MEMORY;
9453 
9454 	char* path = pathBuffer.LockBuffer();
9455 
9456 	if (!IS_USER_ADDRESS(userPath)
9457 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9458 		return B_BAD_ADDRESS;
9459 
9460 	return common_access(fd, path, mode, effectiveUserGroup, false);
9461 }
9462 
9463 
9464 status_t
9465 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9466 	struct stat* userStat, size_t statSize)
9467 {
9468 	struct stat stat;
9469 	status_t status;
9470 
9471 	if (statSize > sizeof(struct stat))
9472 		return B_BAD_VALUE;
9473 
9474 	if (!IS_USER_ADDRESS(userStat))
9475 		return B_BAD_ADDRESS;
9476 
9477 	if (userPath != NULL) {
9478 		// path given: get the stat of the node referred to by (fd, path)
9479 		if (!IS_USER_ADDRESS(userPath))
9480 			return B_BAD_ADDRESS;
9481 
9482 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9483 		if (pathBuffer.InitCheck() != B_OK)
9484 			return B_NO_MEMORY;
9485 
9486 		char* path = pathBuffer.LockBuffer();
9487 
9488 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9489 		if (length < B_OK)
9490 			return length;
9491 		if (length >= B_PATH_NAME_LENGTH)
9492 			return B_NAME_TOO_LONG;
9493 
9494 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9495 	} else {
9496 		// no path given: get the FD and use the FD operation
9497 		struct file_descriptor* descriptor
9498 			= get_fd(get_current_io_context(false), fd);
9499 		if (descriptor == NULL)
9500 			return B_FILE_ERROR;
9501 
9502 		if (descriptor->ops->fd_read_stat)
9503 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9504 		else
9505 			status = B_UNSUPPORTED;
9506 
9507 		put_fd(descriptor);
9508 	}
9509 
9510 	if (status != B_OK)
9511 		return status;
9512 
9513 	return user_memcpy(userStat, &stat, statSize);
9514 }
9515 
9516 
9517 status_t
9518 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9519 	const struct stat* userStat, size_t statSize, int statMask)
9520 {
9521 	if (statSize > sizeof(struct stat))
9522 		return B_BAD_VALUE;
9523 
9524 	struct stat stat;
9525 
9526 	if (!IS_USER_ADDRESS(userStat)
9527 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9528 		return B_BAD_ADDRESS;
9529 
9530 	// clear additional stat fields
9531 	if (statSize < sizeof(struct stat))
9532 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9533 
9534 	status_t status;
9535 
9536 	if (userPath != NULL) {
9537 		// path given: write the stat of the node referred to by (fd, path)
9538 		if (!IS_USER_ADDRESS(userPath))
9539 			return B_BAD_ADDRESS;
9540 
9541 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9542 		if (pathBuffer.InitCheck() != B_OK)
9543 			return B_NO_MEMORY;
9544 
9545 		char* path = pathBuffer.LockBuffer();
9546 
9547 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9548 		if (length < B_OK)
9549 			return length;
9550 		if (length >= B_PATH_NAME_LENGTH)
9551 			return B_NAME_TOO_LONG;
9552 
9553 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9554 			statMask, false);
9555 	} else {
9556 		// no path given: get the FD and use the FD operation
9557 		struct file_descriptor* descriptor
9558 			= get_fd(get_current_io_context(false), fd);
9559 		if (descriptor == NULL)
9560 			return B_FILE_ERROR;
9561 
9562 		if (descriptor->ops->fd_write_stat) {
9563 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9564 				statMask);
9565 		} else
9566 			status = B_UNSUPPORTED;
9567 
9568 		put_fd(descriptor);
9569 	}
9570 
9571 	return status;
9572 }
9573 
9574 
9575 int
9576 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9577 {
9578 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9579 	if (pathBuffer.InitCheck() != B_OK)
9580 		return B_NO_MEMORY;
9581 
9582 	char* path = pathBuffer.LockBuffer();
9583 
9584 	if (userPath != NULL) {
9585 		if (!IS_USER_ADDRESS(userPath)
9586 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9587 			return B_BAD_ADDRESS;
9588 	}
9589 
9590 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9591 }
9592 
9593 
9594 ssize_t
9595 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9596 	size_t readBytes)
9597 {
9598 	char attribute[B_FILE_NAME_LENGTH];
9599 
9600 	if (userAttribute == NULL)
9601 		return B_BAD_VALUE;
9602 	if (!IS_USER_ADDRESS(userAttribute)
9603 		|| user_strlcpy(attribute, userAttribute, sizeof(attribute)) < B_OK) {
9604 		return B_BAD_ADDRESS;
9605 	}
9606 
9607 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9608 	if (attr < 0)
9609 		return attr;
9610 
9611 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9612 	_user_close(attr);
9613 
9614 	return bytes;
9615 }
9616 
9617 
9618 ssize_t
9619 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9620 	const void* buffer, size_t writeBytes)
9621 {
9622 	char attribute[B_FILE_NAME_LENGTH];
9623 
9624 	if (userAttribute == NULL)
9625 		return B_BAD_VALUE;
9626 	if (!IS_USER_ADDRESS(userAttribute)
9627 		|| user_strlcpy(attribute, userAttribute, sizeof(attribute)) < B_OK) {
9628 		return B_BAD_ADDRESS;
9629 	}
9630 
9631 	// Try to support the BeOS typical truncation as well as the position
9632 	// argument
9633 	int attr = attr_create(fd, NULL, attribute, type,
9634 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9635 	if (attr < 0)
9636 		return attr;
9637 
9638 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9639 	_user_close(attr);
9640 
9641 	return bytes;
9642 }
9643 
9644 
9645 status_t
9646 _user_stat_attr(int fd, const char* userAttribute,
9647 	struct attr_info* userAttrInfo)
9648 {
9649 	char attribute[B_FILE_NAME_LENGTH];
9650 
9651 	if (userAttribute == NULL || userAttrInfo == NULL)
9652 		return B_BAD_VALUE;
9653 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo)
9654 		|| user_strlcpy(attribute, userAttribute, sizeof(attribute)) < B_OK) {
9655 		return B_BAD_ADDRESS;
9656 	}
9657 
9658 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9659 	if (attr < 0)
9660 		return attr;
9661 
9662 	struct file_descriptor* descriptor
9663 		= get_fd(get_current_io_context(false), attr);
9664 	if (descriptor == NULL) {
9665 		_user_close(attr);
9666 		return B_FILE_ERROR;
9667 	}
9668 
9669 	struct stat stat;
9670 	status_t status;
9671 	if (descriptor->ops->fd_read_stat)
9672 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9673 	else
9674 		status = B_UNSUPPORTED;
9675 
9676 	put_fd(descriptor);
9677 	_user_close(attr);
9678 
9679 	if (status == B_OK) {
9680 		attr_info info;
9681 		info.type = stat.st_type;
9682 		info.size = stat.st_size;
9683 
9684 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9685 			return B_BAD_ADDRESS;
9686 	}
9687 
9688 	return status;
9689 }
9690 
9691 
9692 int
9693 _user_open_attr(int fd, const char* userPath, const char* userName,
9694 	uint32 type, int openMode)
9695 {
9696 	char name[B_FILE_NAME_LENGTH];
9697 
9698 	if (!IS_USER_ADDRESS(userName)
9699 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9700 		return B_BAD_ADDRESS;
9701 
9702 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9703 	if (pathBuffer.InitCheck() != B_OK)
9704 		return B_NO_MEMORY;
9705 
9706 	char* path = pathBuffer.LockBuffer();
9707 
9708 	if (userPath != NULL) {
9709 		if (!IS_USER_ADDRESS(userPath)
9710 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9711 			return B_BAD_ADDRESS;
9712 	}
9713 
9714 	if ((openMode & O_CREAT) != 0) {
9715 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9716 			false);
9717 	}
9718 
9719 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9720 }
9721 
9722 
9723 status_t
9724 _user_remove_attr(int fd, const char* userName)
9725 {
9726 	char name[B_FILE_NAME_LENGTH];
9727 
9728 	if (!IS_USER_ADDRESS(userName)
9729 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9730 		return B_BAD_ADDRESS;
9731 
9732 	return attr_remove(fd, name, false);
9733 }
9734 
9735 
9736 status_t
9737 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9738 	const char* userToName)
9739 {
9740 	if (!IS_USER_ADDRESS(userFromName)
9741 		|| !IS_USER_ADDRESS(userToName))
9742 		return B_BAD_ADDRESS;
9743 
9744 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9745 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9746 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9747 		return B_NO_MEMORY;
9748 
9749 	char* fromName = fromNameBuffer.LockBuffer();
9750 	char* toName = toNameBuffer.LockBuffer();
9751 
9752 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9753 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9754 		return B_BAD_ADDRESS;
9755 
9756 	return attr_rename(fromFile, fromName, toFile, toName, false);
9757 }
9758 
9759 
9760 int
9761 _user_open_index_dir(dev_t device)
9762 {
9763 	return index_dir_open(device, false);
9764 }
9765 
9766 
9767 status_t
9768 _user_create_index(dev_t device, const char* userName, uint32 type,
9769 	uint32 flags)
9770 {
9771 	char name[B_FILE_NAME_LENGTH];
9772 
9773 	if (!IS_USER_ADDRESS(userName)
9774 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9775 		return B_BAD_ADDRESS;
9776 
9777 	return index_create(device, name, type, flags, false);
9778 }
9779 
9780 
9781 status_t
9782 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9783 {
9784 	char name[B_FILE_NAME_LENGTH];
9785 	struct stat stat;
9786 	status_t status;
9787 
9788 	if (!IS_USER_ADDRESS(userName)
9789 		|| !IS_USER_ADDRESS(userStat)
9790 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9791 		return B_BAD_ADDRESS;
9792 
9793 	status = index_name_read_stat(device, name, &stat, false);
9794 	if (status == B_OK) {
9795 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9796 			return B_BAD_ADDRESS;
9797 	}
9798 
9799 	return status;
9800 }
9801 
9802 
9803 status_t
9804 _user_remove_index(dev_t device, const char* userName)
9805 {
9806 	char name[B_FILE_NAME_LENGTH];
9807 
9808 	if (!IS_USER_ADDRESS(userName)
9809 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9810 		return B_BAD_ADDRESS;
9811 
9812 	return index_remove(device, name, false);
9813 }
9814 
9815 
9816 status_t
9817 _user_getcwd(char* userBuffer, size_t size)
9818 {
9819 	if (size == 0)
9820 		return B_BAD_VALUE;
9821 	if (!IS_USER_ADDRESS(userBuffer))
9822 		return B_BAD_ADDRESS;
9823 
9824 	if (size > kMaxPathLength)
9825 		size = kMaxPathLength;
9826 
9827 	KPath pathBuffer(size);
9828 	if (pathBuffer.InitCheck() != B_OK)
9829 		return B_NO_MEMORY;
9830 
9831 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9832 
9833 	char* path = pathBuffer.LockBuffer();
9834 
9835 	status_t status = get_cwd(path, size, false);
9836 	if (status != B_OK)
9837 		return status;
9838 
9839 	// Copy back the result
9840 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9841 		return B_BAD_ADDRESS;
9842 
9843 	return status;
9844 }
9845 
9846 
9847 status_t
9848 _user_setcwd(int fd, const char* userPath)
9849 {
9850 	TRACE(("user_setcwd: path = %p\n", userPath));
9851 
9852 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9853 	if (pathBuffer.InitCheck() != B_OK)
9854 		return B_NO_MEMORY;
9855 
9856 	char* path = pathBuffer.LockBuffer();
9857 
9858 	if (userPath != NULL) {
9859 		if (!IS_USER_ADDRESS(userPath)
9860 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9861 			return B_BAD_ADDRESS;
9862 	}
9863 
9864 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9865 }
9866 
9867 
9868 status_t
9869 _user_change_root(const char* userPath)
9870 {
9871 	// only root is allowed to chroot()
9872 	if (geteuid() != 0)
9873 		return B_NOT_ALLOWED;
9874 
9875 	// alloc path buffer
9876 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9877 	if (pathBuffer.InitCheck() != B_OK)
9878 		return B_NO_MEMORY;
9879 
9880 	// copy userland path to kernel
9881 	char* path = pathBuffer.LockBuffer();
9882 	if (userPath != NULL) {
9883 		if (!IS_USER_ADDRESS(userPath)
9884 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9885 			return B_BAD_ADDRESS;
9886 	}
9887 
9888 	// get the vnode
9889 	struct vnode* vnode;
9890 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9891 	if (status != B_OK)
9892 		return status;
9893 
9894 	// set the new root
9895 	struct io_context* context = get_current_io_context(false);
9896 	mutex_lock(&sIOContextRootLock);
9897 	struct vnode* oldRoot = context->root;
9898 	context->root = vnode;
9899 	mutex_unlock(&sIOContextRootLock);
9900 
9901 	put_vnode(oldRoot);
9902 
9903 	return B_OK;
9904 }
9905 
9906 
9907 int
9908 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9909 	uint32 flags, port_id port, int32 token)
9910 {
9911 	char* query;
9912 
9913 	if (device < 0 || userQuery == NULL || queryLength == 0)
9914 		return B_BAD_VALUE;
9915 
9916 	if (!IS_USER_ADDRESS(userQuery))
9917 		return B_BAD_ADDRESS;
9918 
9919 	// this is a safety restriction
9920 	if (queryLength >= 65536)
9921 		return B_NAME_TOO_LONG;
9922 
9923 	query = (char*)malloc(queryLength + 1);
9924 	if (query == NULL)
9925 		return B_NO_MEMORY;
9926 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9927 		free(query);
9928 		return B_BAD_ADDRESS;
9929 	}
9930 
9931 	int fd = query_open(device, query, flags, port, token, false);
9932 
9933 	free(query);
9934 	return fd;
9935 }
9936 
9937 
9938 #include "vfs_request_io.cpp"
9939