xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 16ad15142c48ee36cd6a807a24efc99c88d4310d)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <NodeMonitor.h>
30 #include <OS.h>
31 #include <StorageDefs.h>
32 
33 #include <AutoDeleter.h>
34 #include <block_cache.h>
35 #include <boot/kernel_args.h>
36 #include <debug_heap.h>
37 #include <disk_device_manager/KDiskDevice.h>
38 #include <disk_device_manager/KDiskDeviceManager.h>
39 #include <disk_device_manager/KDiskDeviceUtils.h>
40 #include <disk_device_manager/KDiskSystem.h>
41 #include <fd.h>
42 #include <file_cache.h>
43 #include <fs/node_monitor.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <slab/Slab.h>
48 #include <StackOrHeapArray.h>
49 #include <syscalls.h>
50 #include <syscall_restart.h>
51 #include <tracing.h>
52 #include <util/atomic.h>
53 #include <util/AutoLock.h>
54 #include <util/DoublyLinkedList.h>
55 #include <vfs.h>
56 #include <vm/vm.h>
57 #include <vm/VMCache.h>
58 #include <wait_for_objects.h>
59 
60 #include "EntryCache.h"
61 #include "fifo.h"
62 #include "IORequest.h"
63 #include "unused_vnodes.h"
64 #include "vfs_tracing.h"
65 #include "Vnode.h"
66 #include "../cache/vnode_store.h"
67 
68 
69 //#define TRACE_VFS
70 #ifdef TRACE_VFS
71 #	define TRACE(x) dprintf x
72 #	define FUNCTION(x) dprintf x
73 #else
74 #	define TRACE(x) ;
75 #	define FUNCTION(x) ;
76 #endif
77 
78 #define ADD_DEBUGGER_COMMANDS
79 
80 
81 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
82 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
83 
84 #if KDEBUG
85 #	define FS_CALL(vnode, op, params...) \
86 		( HAS_FS_CALL(vnode, op) ? \
87 			vnode->ops->op(vnode->mount->volume, vnode, params) \
88 			: (panic("FS_CALL op " #op " is NULL"), 0))
89 #	define FS_CALL_NO_PARAMS(vnode, op) \
90 		( HAS_FS_CALL(vnode, op) ? \
91 			vnode->ops->op(vnode->mount->volume, vnode) \
92 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
93 #	define FS_MOUNT_CALL(mount, op, params...) \
94 		( HAS_FS_MOUNT_CALL(mount, op) ? \
95 			mount->volume->ops->op(mount->volume, params) \
96 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
97 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
98 		( HAS_FS_MOUNT_CALL(mount, op) ? \
99 			mount->volume->ops->op(mount->volume) \
100 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
101 #else
102 #	define FS_CALL(vnode, op, params...) \
103 			vnode->ops->op(vnode->mount->volume, vnode, params)
104 #	define FS_CALL_NO_PARAMS(vnode, op) \
105 			vnode->ops->op(vnode->mount->volume, vnode)
106 #	define FS_MOUNT_CALL(mount, op, params...) \
107 			mount->volume->ops->op(mount->volume, params)
108 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
109 			mount->volume->ops->op(mount->volume)
110 #endif
111 
112 
113 const static size_t kMaxPathLength = 65536;
114 	// The absolute maximum path length (for getcwd() - this is not depending
115 	// on PATH_MAX
116 
117 
118 typedef DoublyLinkedList<vnode> VnodeList;
119 
120 /*!	\brief Structure to manage a mounted file system
121 
122 	Note: The root_vnode and root_vnode->covers fields (what others?) are
123 	initialized in fs_mount() and not changed afterwards. That is as soon
124 	as the mount is mounted and it is made sure it won't be unmounted
125 	(e.g. by holding a reference to a vnode of that mount) (read) access
126 	to those fields is always safe, even without additional locking. Morever
127 	while mounted the mount holds a reference to the root_vnode->covers vnode,
128 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
129 	safe if a reference to vnode is held (note that for the root mount
130 	root_vnode->covers is NULL, though).
131 */
132 struct fs_mount {
133 	fs_mount()
134 		:
135 		volume(NULL),
136 		device_name(NULL)
137 	{
138 		mutex_init(&lock, "mount lock");
139 	}
140 
141 	~fs_mount()
142 	{
143 		mutex_destroy(&lock);
144 		free(device_name);
145 
146 		while (volume) {
147 			fs_volume* superVolume = volume->super_volume;
148 
149 			if (volume->file_system != NULL)
150 				put_module(volume->file_system->info.name);
151 
152 			free(volume->file_system_name);
153 			free(volume);
154 			volume = superVolume;
155 		}
156 	}
157 
158 	struct fs_mount* next;
159 	dev_t			id;
160 	fs_volume*		volume;
161 	char*			device_name;
162 	mutex			lock;	// guards the vnodes list
163 	struct vnode*	root_vnode;
164 	struct vnode*	covers_vnode;	// immutable
165 	KPartition*		partition;
166 	VnodeList		vnodes;
167 	EntryCache		entry_cache;
168 	bool			unmounting;
169 	bool			owns_file_device;
170 };
171 
172 
173 namespace {
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	void*			bound_to;
178 	team_id			team;
179 	pid_t			session;
180 	off_t			start;
181 	off_t			end;
182 	bool			shared;
183 };
184 
185 typedef DoublyLinkedList<advisory_lock> LockList;
186 
187 } // namespace
188 
189 
190 struct advisory_locking {
191 	sem_id			lock;
192 	sem_id			wait_sem;
193 	LockList		locks;
194 
195 	advisory_locking()
196 		:
197 		lock(-1),
198 		wait_sem(-1)
199 	{
200 	}
201 
202 	~advisory_locking()
203 	{
204 		if (lock >= 0)
205 			delete_sem(lock);
206 		if (wait_sem >= 0)
207 			delete_sem(wait_sem);
208 	}
209 };
210 
211 /*!	\brief Guards sMountsTable.
212 
213 	The holder is allowed to read/write access the sMountsTable.
214 	Manipulation of the fs_mount structures themselves
215 	(and their destruction) requires different locks though.
216 */
217 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
218 
219 /*!	\brief Guards mount/unmount operations.
220 
221 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
222 	That is locking the lock ensures that no FS is mounted/unmounted. In
223 	particular this means that
224 	- sMountsTable will not be modified,
225 	- the fields immutable after initialization of the fs_mount structures in
226 	  sMountsTable will not be modified,
227 
228 	The thread trying to lock the lock must not hold sVnodeLock or
229 	sMountLock.
230 */
231 static recursive_lock sMountOpLock;
232 
233 /*!	\brief Guards sVnodeTable.
234 
235 	The holder is allowed read/write access to sVnodeTable and to
236 	any unbusy vnode in that table, save to the immutable fields (device, id,
237 	private_node, mount) to which only read-only access is allowed.
238 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
239 	well as the busy, removed, unused flags, and the vnode's type can also be
240 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
241 	locked. Write access to covered_by and covers requires to write lock
242 	sVnodeLock.
243 
244 	The thread trying to acquire the lock must not hold sMountLock.
245 	You must not hold this lock when calling create_sem(), as this might call
246 	vfs_free_unused_vnodes() and thus cause a deadlock.
247 */
248 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
249 
250 /*!	\brief Guards io_context::root.
251 
252 	Must be held when setting or getting the io_context::root field.
253 	The only operation allowed while holding this lock besides getting or
254 	setting the field is inc_vnode_ref_count() on io_context::root.
255 */
256 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
257 
258 
259 namespace {
260 
261 struct vnode_hash_key {
262 	dev_t	device;
263 	ino_t	vnode;
264 };
265 
266 struct VnodeHash {
267 	typedef vnode_hash_key	KeyType;
268 	typedef	struct vnode	ValueType;
269 
270 #define VHASH(mountid, vnodeid) \
271 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
272 
273 	size_t HashKey(KeyType key) const
274 	{
275 		return VHASH(key.device, key.vnode);
276 	}
277 
278 	size_t Hash(ValueType* vnode) const
279 	{
280 		return VHASH(vnode->device, vnode->id);
281 	}
282 
283 #undef VHASH
284 
285 	bool Compare(KeyType key, ValueType* vnode) const
286 	{
287 		return vnode->device == key.device && vnode->id == key.vnode;
288 	}
289 
290 	ValueType*& GetLink(ValueType* value) const
291 	{
292 		return value->next;
293 	}
294 };
295 
296 typedef BOpenHashTable<VnodeHash> VnodeTable;
297 
298 
299 struct MountHash {
300 	typedef dev_t			KeyType;
301 	typedef	struct fs_mount	ValueType;
302 
303 	size_t HashKey(KeyType key) const
304 	{
305 		return key;
306 	}
307 
308 	size_t Hash(ValueType* mount) const
309 	{
310 		return mount->id;
311 	}
312 
313 	bool Compare(KeyType key, ValueType* mount) const
314 	{
315 		return mount->id == key;
316 	}
317 
318 	ValueType*& GetLink(ValueType* value) const
319 	{
320 		return value->next;
321 	}
322 };
323 
324 typedef BOpenHashTable<MountHash> MountTable;
325 
326 } // namespace
327 
328 
329 object_cache* sPathNameCache;
330 object_cache* sFileDescriptorCache;
331 
332 #define VNODE_HASH_TABLE_SIZE 1024
333 static VnodeTable* sVnodeTable;
334 static struct vnode* sRoot;
335 
336 #define MOUNTS_HASH_TABLE_SIZE 16
337 static MountTable* sMountsTable;
338 static dev_t sNextMountID = 1;
339 
340 #define MAX_TEMP_IO_VECS 8
341 
342 // How long to wait for busy vnodes (10s)
343 #define BUSY_VNODE_RETRIES 2000
344 #define BUSY_VNODE_DELAY 5000
345 
346 mode_t __gUmask = 022;
347 
348 /* function declarations */
349 
350 static void free_unused_vnodes();
351 
352 // file descriptor operation prototypes
353 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
354 	void* buffer, size_t* _bytes);
355 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
356 	const void* buffer, size_t* _bytes);
357 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
358 	int seekType);
359 static void file_free_fd(struct file_descriptor* descriptor);
360 static status_t file_close(struct file_descriptor* descriptor);
361 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
362 	struct selectsync* sync);
363 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
364 	struct selectsync* sync);
365 static status_t dir_read(struct io_context* context,
366 	struct file_descriptor* descriptor, struct dirent* buffer,
367 	size_t bufferSize, uint32* _count);
368 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
369 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
370 static status_t dir_rewind(struct file_descriptor* descriptor);
371 static void dir_free_fd(struct file_descriptor* descriptor);
372 static status_t dir_close(struct file_descriptor* descriptor);
373 static status_t attr_dir_read(struct io_context* context,
374 	struct file_descriptor* descriptor, struct dirent* buffer,
375 	size_t bufferSize, uint32* _count);
376 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
377 static void attr_dir_free_fd(struct file_descriptor* descriptor);
378 static status_t attr_dir_close(struct file_descriptor* descriptor);
379 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
380 	void* buffer, size_t* _bytes);
381 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
382 	const void* buffer, size_t* _bytes);
383 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
384 	int seekType);
385 static void attr_free_fd(struct file_descriptor* descriptor);
386 static status_t attr_close(struct file_descriptor* descriptor);
387 static status_t attr_read_stat(struct file_descriptor* descriptor,
388 	struct stat* statData);
389 static status_t attr_write_stat(struct file_descriptor* descriptor,
390 	const struct stat* stat, int statMask);
391 static status_t index_dir_read(struct io_context* context,
392 	struct file_descriptor* descriptor, struct dirent* buffer,
393 	size_t bufferSize, uint32* _count);
394 static status_t index_dir_rewind(struct file_descriptor* descriptor);
395 static void index_dir_free_fd(struct file_descriptor* descriptor);
396 static status_t index_dir_close(struct file_descriptor* descriptor);
397 static status_t query_read(struct io_context* context,
398 	struct file_descriptor* descriptor, struct dirent* buffer,
399 	size_t bufferSize, uint32* _count);
400 static status_t query_rewind(struct file_descriptor* descriptor);
401 static void query_free_fd(struct file_descriptor* descriptor);
402 static status_t query_close(struct file_descriptor* descriptor);
403 
404 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
405 	void* buffer, size_t length);
406 static status_t common_read_stat(struct file_descriptor* descriptor,
407 	struct stat* statData);
408 static status_t common_write_stat(struct file_descriptor* descriptor,
409 	const struct stat* statData, int statMask);
410 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
411 	struct stat* stat, bool kernel);
412 
413 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
414 	bool traverseLeafLink, int count, bool kernel,
415 	struct vnode** _vnode, ino_t* _parentID);
416 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
417 	size_t bufferSize, bool kernel);
418 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
419 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
420 static void inc_vnode_ref_count(struct vnode* vnode);
421 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
422 	bool reenter);
423 static inline void put_vnode(struct vnode* vnode);
424 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
425 	bool kernel);
426 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
427 
428 
429 static struct fd_ops sFileOps = {
430 	file_read,
431 	file_write,
432 	file_seek,
433 	common_ioctl,
434 	NULL,		// set_flags
435 	file_select,
436 	file_deselect,
437 	NULL,		// read_dir()
438 	NULL,		// rewind_dir()
439 	common_read_stat,
440 	common_write_stat,
441 	file_close,
442 	file_free_fd
443 };
444 
445 static struct fd_ops sDirectoryOps = {
446 	NULL,		// read()
447 	NULL,		// write()
448 	NULL,		// seek()
449 	common_ioctl,
450 	NULL,		// set_flags
451 	NULL,		// select()
452 	NULL,		// deselect()
453 	dir_read,
454 	dir_rewind,
455 	common_read_stat,
456 	common_write_stat,
457 	dir_close,
458 	dir_free_fd
459 };
460 
461 static struct fd_ops sAttributeDirectoryOps = {
462 	NULL,		// read()
463 	NULL,		// write()
464 	NULL,		// seek()
465 	common_ioctl,
466 	NULL,		// set_flags
467 	NULL,		// select()
468 	NULL,		// deselect()
469 	attr_dir_read,
470 	attr_dir_rewind,
471 	common_read_stat,
472 	common_write_stat,
473 	attr_dir_close,
474 	attr_dir_free_fd
475 };
476 
477 static struct fd_ops sAttributeOps = {
478 	attr_read,
479 	attr_write,
480 	attr_seek,
481 	common_ioctl,
482 	NULL,		// set_flags
483 	NULL,		// select()
484 	NULL,		// deselect()
485 	NULL,		// read_dir()
486 	NULL,		// rewind_dir()
487 	attr_read_stat,
488 	attr_write_stat,
489 	attr_close,
490 	attr_free_fd
491 };
492 
493 static struct fd_ops sIndexDirectoryOps = {
494 	NULL,		// read()
495 	NULL,		// write()
496 	NULL,		// seek()
497 	NULL,		// ioctl()
498 	NULL,		// set_flags
499 	NULL,		// select()
500 	NULL,		// deselect()
501 	index_dir_read,
502 	index_dir_rewind,
503 	NULL,		// read_stat()
504 	NULL,		// write_stat()
505 	index_dir_close,
506 	index_dir_free_fd
507 };
508 
509 #if 0
510 static struct fd_ops sIndexOps = {
511 	NULL,		// read()
512 	NULL,		// write()
513 	NULL,		// seek()
514 	NULL,		// ioctl()
515 	NULL,		// set_flags
516 	NULL,		// select()
517 	NULL,		// deselect()
518 	NULL,		// dir_read()
519 	NULL,		// dir_rewind()
520 	index_read_stat,	// read_stat()
521 	NULL,		// write_stat()
522 	NULL,		// dir_close()
523 	NULL		// free_fd()
524 };
525 #endif
526 
527 static struct fd_ops sQueryOps = {
528 	NULL,		// read()
529 	NULL,		// write()
530 	NULL,		// seek()
531 	NULL,		// ioctl()
532 	NULL,		// set_flags
533 	NULL,		// select()
534 	NULL,		// deselect()
535 	query_read,
536 	query_rewind,
537 	NULL,		// read_stat()
538 	NULL,		// write_stat()
539 	query_close,
540 	query_free_fd
541 };
542 
543 
544 namespace {
545 
546 class VNodePutter {
547 public:
548 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
549 
550 	~VNodePutter()
551 	{
552 		Put();
553 	}
554 
555 	void SetTo(struct vnode* vnode)
556 	{
557 		Put();
558 		fVNode = vnode;
559 	}
560 
561 	void Put()
562 	{
563 		if (fVNode) {
564 			put_vnode(fVNode);
565 			fVNode = NULL;
566 		}
567 	}
568 
569 	struct vnode* Detach()
570 	{
571 		struct vnode* vnode = fVNode;
572 		fVNode = NULL;
573 		return vnode;
574 	}
575 
576 private:
577 	struct vnode* fVNode;
578 };
579 
580 
581 class FDCloser {
582 public:
583 	FDCloser() : fFD(-1), fKernel(true) {}
584 
585 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
586 
587 	~FDCloser()
588 	{
589 		Close();
590 	}
591 
592 	void SetTo(int fd, bool kernel)
593 	{
594 		Close();
595 		fFD = fd;
596 		fKernel = kernel;
597 	}
598 
599 	void Close()
600 	{
601 		if (fFD >= 0) {
602 			if (fKernel)
603 				_kern_close(fFD);
604 			else
605 				_user_close(fFD);
606 			fFD = -1;
607 		}
608 	}
609 
610 	int Detach()
611 	{
612 		int fd = fFD;
613 		fFD = -1;
614 		return fd;
615 	}
616 
617 private:
618 	int		fFD;
619 	bool	fKernel;
620 };
621 
622 } // namespace
623 
624 
625 #if VFS_PAGES_IO_TRACING
626 
627 namespace VFSPagesIOTracing {
628 
629 class PagesIOTraceEntry : public AbstractTraceEntry {
630 protected:
631 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
632 		const generic_io_vec* vecs, uint32 count, uint32 flags,
633 		generic_size_t bytesRequested, status_t status,
634 		generic_size_t bytesTransferred)
635 		:
636 		fVnode(vnode),
637 		fMountID(vnode->mount->id),
638 		fNodeID(vnode->id),
639 		fCookie(cookie),
640 		fPos(pos),
641 		fCount(count),
642 		fFlags(flags),
643 		fBytesRequested(bytesRequested),
644 		fStatus(status),
645 		fBytesTransferred(bytesTransferred)
646 	{
647 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
648 			sizeof(generic_io_vec) * count, false);
649 	}
650 
651 	void AddDump(TraceOutput& out, const char* mode)
652 	{
653 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
654 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
655 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
656 			(uint64)fBytesRequested);
657 
658 		if (fVecs != NULL) {
659 			for (uint32 i = 0; i < fCount; i++) {
660 				if (i > 0)
661 					out.Print(", ");
662 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
663 					(uint64)fVecs[i].length);
664 			}
665 		}
666 
667 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
668 			"transferred: %" B_PRIu64, fFlags, fStatus,
669 			(uint64)fBytesTransferred);
670 	}
671 
672 protected:
673 	struct vnode*	fVnode;
674 	dev_t			fMountID;
675 	ino_t			fNodeID;
676 	void*			fCookie;
677 	off_t			fPos;
678 	generic_io_vec*	fVecs;
679 	uint32			fCount;
680 	uint32			fFlags;
681 	generic_size_t	fBytesRequested;
682 	status_t		fStatus;
683 	generic_size_t	fBytesTransferred;
684 };
685 
686 
687 class ReadPages : public PagesIOTraceEntry {
688 public:
689 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
690 		const generic_io_vec* vecs, uint32 count, uint32 flags,
691 		generic_size_t bytesRequested, status_t status,
692 		generic_size_t bytesTransferred)
693 		:
694 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
695 			bytesRequested, status, bytesTransferred)
696 	{
697 		Initialized();
698 	}
699 
700 	virtual void AddDump(TraceOutput& out)
701 	{
702 		PagesIOTraceEntry::AddDump(out, "read");
703 	}
704 };
705 
706 
707 class WritePages : public PagesIOTraceEntry {
708 public:
709 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
710 		const generic_io_vec* vecs, uint32 count, uint32 flags,
711 		generic_size_t bytesRequested, status_t status,
712 		generic_size_t bytesTransferred)
713 		:
714 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
715 			bytesRequested, status, bytesTransferred)
716 	{
717 		Initialized();
718 	}
719 
720 	virtual void AddDump(TraceOutput& out)
721 	{
722 		PagesIOTraceEntry::AddDump(out, "write");
723 	}
724 };
725 
726 }	// namespace VFSPagesIOTracing
727 
728 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
729 #else
730 #	define TPIO(x) ;
731 #endif	// VFS_PAGES_IO_TRACING
732 
733 
734 /*! Finds the mounted device (the fs_mount structure) with the given ID.
735 	Note, you must hold the sMountLock lock when you call this function.
736 */
737 static struct fs_mount*
738 find_mount(dev_t id)
739 {
740 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
741 
742 	return sMountsTable->Lookup(id);
743 }
744 
745 
746 static status_t
747 get_mount(dev_t id, struct fs_mount** _mount)
748 {
749 	struct fs_mount* mount;
750 
751 	ReadLocker nodeLocker(sVnodeLock);
752 	ReadLocker mountLocker(sMountLock);
753 
754 	mount = find_mount(id);
755 	if (mount == NULL)
756 		return B_BAD_VALUE;
757 
758 	struct vnode* rootNode = mount->root_vnode;
759 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
760 		|| rootNode->ref_count == 0) {
761 		// might have been called during a mount/unmount operation
762 		return B_BUSY;
763 	}
764 
765 	inc_vnode_ref_count(rootNode);
766 	*_mount = mount;
767 	return B_OK;
768 }
769 
770 
771 static void
772 put_mount(struct fs_mount* mount)
773 {
774 	if (mount)
775 		put_vnode(mount->root_vnode);
776 }
777 
778 
779 /*!	Tries to open the specified file system module.
780 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
781 	Returns a pointer to file system module interface, or NULL if it
782 	could not open the module.
783 */
784 static file_system_module_info*
785 get_file_system(const char* fsName)
786 {
787 	char name[B_FILE_NAME_LENGTH];
788 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
789 		// construct module name if we didn't get one
790 		// (we currently support only one API)
791 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
792 		fsName = NULL;
793 	}
794 
795 	file_system_module_info* info;
796 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
797 		return NULL;
798 
799 	return info;
800 }
801 
802 
803 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
804 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
805 	The name is allocated for you, and you have to free() it when you're
806 	done with it.
807 	Returns NULL if the required memory is not available.
808 */
809 static char*
810 get_file_system_name(const char* fsName)
811 {
812 	const size_t length = strlen("file_systems/");
813 
814 	if (strncmp(fsName, "file_systems/", length)) {
815 		// the name already seems to be the module's file name
816 		return strdup(fsName);
817 	}
818 
819 	fsName += length;
820 	const char* end = strchr(fsName, '/');
821 	if (end == NULL) {
822 		// this doesn't seem to be a valid name, but well...
823 		return strdup(fsName);
824 	}
825 
826 	// cut off the trailing /v1
827 
828 	char* name = (char*)malloc(end + 1 - fsName);
829 	if (name == NULL)
830 		return NULL;
831 
832 	strlcpy(name, fsName, end + 1 - fsName);
833 	return name;
834 }
835 
836 
837 /*!	Accepts a list of file system names separated by a colon, one for each
838 	layer and returns the file system name for the specified layer.
839 	The name is allocated for you, and you have to free() it when you're
840 	done with it.
841 	Returns NULL if the required memory is not available or if there is no
842 	name for the specified layer.
843 */
844 static char*
845 get_file_system_name_for_layer(const char* fsNames, int32 layer)
846 {
847 	while (layer >= 0) {
848 		const char* end = strchr(fsNames, ':');
849 		if (end == NULL) {
850 			if (layer == 0)
851 				return strdup(fsNames);
852 			return NULL;
853 		}
854 
855 		if (layer == 0) {
856 			size_t length = end - fsNames + 1;
857 			char* result = (char*)malloc(length);
858 			strlcpy(result, fsNames, length);
859 			return result;
860 		}
861 
862 		fsNames = end + 1;
863 		layer--;
864 	}
865 
866 	return NULL;
867 }
868 
869 
870 static void
871 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
872 {
873 	MutexLocker _(mount->lock);
874 	mount->vnodes.Add(vnode);
875 }
876 
877 
878 static void
879 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
880 {
881 	MutexLocker _(mount->lock);
882 	mount->vnodes.Remove(vnode);
883 }
884 
885 
886 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
887 
888 	The caller must hold the sVnodeLock (read lock at least).
889 
890 	\param mountID the mount ID.
891 	\param vnodeID the node ID.
892 
893 	\return The vnode structure, if it was found in the hash table, \c NULL
894 			otherwise.
895 */
896 static struct vnode*
897 lookup_vnode(dev_t mountID, ino_t vnodeID)
898 {
899 	struct vnode_hash_key key;
900 
901 	key.device = mountID;
902 	key.vnode = vnodeID;
903 
904 	return sVnodeTable->Lookup(key);
905 }
906 
907 
908 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
909 
910 	This will also wait for BUSY_VNODE_DELAY before returning if one should
911 	still wait for the vnode becoming unbusy.
912 
913 	\return \c true if one should retry, \c false if not.
914 */
915 static bool
916 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
917 {
918 	if (--tries < 0) {
919 		// vnode doesn't seem to become unbusy
920 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
921 			" is not becoming unbusy!\n", mountID, vnodeID);
922 		return false;
923 	}
924 	snooze(BUSY_VNODE_DELAY);
925 	return true;
926 }
927 
928 
929 /*!	Creates a new vnode with the given mount and node ID.
930 	If the node already exists, it is returned instead and no new node is
931 	created. In either case -- but not, if an error occurs -- the function write
932 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
933 	error the lock is not held on return.
934 
935 	\param mountID The mount ID.
936 	\param vnodeID The vnode ID.
937 	\param _vnode Will be set to the new vnode on success.
938 	\param _nodeCreated Will be set to \c true when the returned vnode has
939 		been newly created, \c false when it already existed. Will not be
940 		changed on error.
941 	\return \c B_OK, when the vnode was successfully created and inserted or
942 		a node with the given ID was found, \c B_NO_MEMORY or
943 		\c B_ENTRY_NOT_FOUND on error.
944 */
945 static status_t
946 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
947 	bool& _nodeCreated)
948 {
949 	FUNCTION(("create_new_vnode_and_lock()\n"));
950 
951 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
952 	if (vnode == NULL)
953 		return B_NO_MEMORY;
954 
955 	// initialize basic values
956 	memset(vnode, 0, sizeof(struct vnode));
957 	vnode->device = mountID;
958 	vnode->id = vnodeID;
959 	vnode->ref_count = 1;
960 	vnode->SetBusy(true);
961 
962 	// look up the node -- it might have been added by someone else in the
963 	// meantime
964 	rw_lock_write_lock(&sVnodeLock);
965 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
966 	if (existingVnode != NULL) {
967 		free(vnode);
968 		_vnode = existingVnode;
969 		_nodeCreated = false;
970 		return B_OK;
971 	}
972 
973 	// get the mount structure
974 	rw_lock_read_lock(&sMountLock);
975 	vnode->mount = find_mount(mountID);
976 	if (!vnode->mount || vnode->mount->unmounting) {
977 		rw_lock_read_unlock(&sMountLock);
978 		rw_lock_write_unlock(&sVnodeLock);
979 		free(vnode);
980 		return B_ENTRY_NOT_FOUND;
981 	}
982 
983 	// add the vnode to the mount's node list and the hash table
984 	sVnodeTable->Insert(vnode);
985 	add_vnode_to_mount_list(vnode, vnode->mount);
986 
987 	rw_lock_read_unlock(&sMountLock);
988 
989 	_vnode = vnode;
990 	_nodeCreated = true;
991 
992 	// keep the vnode lock locked
993 	return B_OK;
994 }
995 
996 
997 /*!	Frees the vnode and all resources it has acquired, and removes
998 	it from the vnode hash as well as from its mount structure.
999 	Will also make sure that any cache modifications are written back.
1000 */
1001 static void
1002 free_vnode(struct vnode* vnode, bool reenter)
1003 {
1004 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1005 		vnode);
1006 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1007 
1008 	// write back any changes in this vnode's cache -- but only
1009 	// if the vnode won't be deleted, in which case the changes
1010 	// will be discarded
1011 
1012 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1013 		FS_CALL_NO_PARAMS(vnode, fsync);
1014 
1015 	// Note: If this vnode has a cache attached, there will still be two
1016 	// references to that cache at this point. The last one belongs to the vnode
1017 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1018 	// cache. Each but the last reference to a cache also includes a reference
1019 	// to the vnode. The file cache, however, released its reference (cf.
1020 	// file_cache_create()), so that this vnode's ref count has the chance to
1021 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1022 	// cache reference to be released, which will also release a (no longer
1023 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1024 	// count, so that it will neither become negative nor 0.
1025 	vnode->ref_count = 2;
1026 
1027 	if (!vnode->IsUnpublished()) {
1028 		if (vnode->IsRemoved())
1029 			FS_CALL(vnode, remove_vnode, reenter);
1030 		else
1031 			FS_CALL(vnode, put_vnode, reenter);
1032 	}
1033 
1034 	// If the vnode has a VMCache attached, make sure that it won't try to get
1035 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1036 	// long as the vnode is busy and in the hash, that won't happen, but as
1037 	// soon as we've removed it from the hash, it could reload the vnode -- with
1038 	// a new cache attached!
1039 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1040 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1041 
1042 	// The file system has removed the resources of the vnode now, so we can
1043 	// make it available again (by removing the busy vnode from the hash).
1044 	rw_lock_write_lock(&sVnodeLock);
1045 	sVnodeTable->Remove(vnode);
1046 	rw_lock_write_unlock(&sVnodeLock);
1047 
1048 	// if we have a VMCache attached, remove it
1049 	if (vnode->cache)
1050 		vnode->cache->ReleaseRef();
1051 
1052 	vnode->cache = NULL;
1053 
1054 	remove_vnode_from_mount_list(vnode, vnode->mount);
1055 
1056 	free(vnode);
1057 }
1058 
1059 
1060 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1061 	if the counter dropped to 0.
1062 
1063 	The caller must, of course, own a reference to the vnode to call this
1064 	function.
1065 	The caller must not hold the sVnodeLock or the sMountLock.
1066 
1067 	\param vnode the vnode.
1068 	\param alwaysFree don't move this vnode into the unused list, but really
1069 		   delete it if possible.
1070 	\param reenter \c true, if this function is called (indirectly) from within
1071 		   a file system. This will be passed to file system hooks only.
1072 	\return \c B_OK, if everything went fine, an error code otherwise.
1073 */
1074 static status_t
1075 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1076 {
1077 	ReadLocker locker(sVnodeLock);
1078 	AutoLocker<Vnode> nodeLocker(vnode);
1079 
1080 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1081 
1082 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1083 
1084 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1085 		vnode->ref_count));
1086 
1087 	if (oldRefCount != 1)
1088 		return B_OK;
1089 
1090 	if (vnode->IsBusy())
1091 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1092 
1093 	bool freeNode = false;
1094 	bool freeUnusedNodes = false;
1095 
1096 	// Just insert the vnode into an unused list if we don't need
1097 	// to delete it
1098 	if (vnode->IsRemoved() || alwaysFree) {
1099 		vnode_to_be_freed(vnode);
1100 		vnode->SetBusy(true);
1101 		freeNode = true;
1102 	} else
1103 		freeUnusedNodes = vnode_unused(vnode);
1104 
1105 	nodeLocker.Unlock();
1106 	locker.Unlock();
1107 
1108 	if (freeNode)
1109 		free_vnode(vnode, reenter);
1110 	else if (freeUnusedNodes)
1111 		free_unused_vnodes();
1112 
1113 	return B_OK;
1114 }
1115 
1116 
1117 /*!	\brief Increments the reference counter of the given vnode.
1118 
1119 	The caller must make sure that the node isn't deleted while this function
1120 	is called. This can be done either:
1121 	- by ensuring that a reference to the node exists and remains in existence,
1122 	  or
1123 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1124 	  or by holding sVnodeLock write locked.
1125 
1126 	In the second case the caller is responsible for dealing with the ref count
1127 	0 -> 1 transition. That is 1. this function must not be invoked when the
1128 	node is busy in the first place and 2. vnode_used() must be called for the
1129 	node.
1130 
1131 	\param vnode the vnode.
1132 */
1133 static void
1134 inc_vnode_ref_count(struct vnode* vnode)
1135 {
1136 	atomic_add(&vnode->ref_count, 1);
1137 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1138 		vnode->ref_count));
1139 }
1140 
1141 
1142 static bool
1143 is_special_node_type(int type)
1144 {
1145 	// at the moment only FIFOs are supported
1146 	return S_ISFIFO(type);
1147 }
1148 
1149 
1150 static status_t
1151 create_special_sub_node(struct vnode* vnode, uint32 flags)
1152 {
1153 	if (S_ISFIFO(vnode->Type()))
1154 		return create_fifo_vnode(vnode->mount->volume, vnode);
1155 
1156 	return B_BAD_VALUE;
1157 }
1158 
1159 
1160 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1161 
1162 	If the node is not yet in memory, it will be loaded.
1163 
1164 	The caller must not hold the sVnodeLock or the sMountLock.
1165 
1166 	\param mountID the mount ID.
1167 	\param vnodeID the node ID.
1168 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1169 		   retrieved vnode structure shall be written.
1170 	\param reenter \c true, if this function is called (indirectly) from within
1171 		   a file system.
1172 	\return \c B_OK, if everything when fine, an error code otherwise.
1173 */
1174 static status_t
1175 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1176 	int reenter)
1177 {
1178 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1179 		mountID, vnodeID, _vnode));
1180 
1181 	rw_lock_read_lock(&sVnodeLock);
1182 
1183 	int32 tries = BUSY_VNODE_RETRIES;
1184 restart:
1185 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1186 	AutoLocker<Vnode> nodeLocker(vnode);
1187 
1188 	if (vnode && vnode->IsBusy()) {
1189 		nodeLocker.Unlock();
1190 		rw_lock_read_unlock(&sVnodeLock);
1191 		if (!canWait) {
1192 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1193 				mountID, vnodeID);
1194 			return B_BUSY;
1195 		}
1196 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1197 			return B_BUSY;
1198 
1199 		rw_lock_read_lock(&sVnodeLock);
1200 		goto restart;
1201 	}
1202 
1203 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1204 
1205 	status_t status;
1206 
1207 	if (vnode) {
1208 		if (vnode->ref_count == 0) {
1209 			// this vnode has been unused before
1210 			vnode_used(vnode);
1211 		}
1212 		inc_vnode_ref_count(vnode);
1213 
1214 		nodeLocker.Unlock();
1215 		rw_lock_read_unlock(&sVnodeLock);
1216 	} else {
1217 		// we need to create a new vnode and read it in
1218 		rw_lock_read_unlock(&sVnodeLock);
1219 			// unlock -- create_new_vnode_and_lock() write-locks on success
1220 		bool nodeCreated;
1221 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1222 			nodeCreated);
1223 		if (status != B_OK)
1224 			return status;
1225 
1226 		if (!nodeCreated) {
1227 			rw_lock_read_lock(&sVnodeLock);
1228 			rw_lock_write_unlock(&sVnodeLock);
1229 			goto restart;
1230 		}
1231 
1232 		rw_lock_write_unlock(&sVnodeLock);
1233 
1234 		int type;
1235 		uint32 flags;
1236 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1237 			&flags, reenter);
1238 		if (status == B_OK && vnode->private_node == NULL)
1239 			status = B_BAD_VALUE;
1240 
1241 		bool gotNode = status == B_OK;
1242 		bool publishSpecialSubNode = false;
1243 		if (gotNode) {
1244 			vnode->SetType(type);
1245 			publishSpecialSubNode = is_special_node_type(type)
1246 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1247 		}
1248 
1249 		if (gotNode && publishSpecialSubNode)
1250 			status = create_special_sub_node(vnode, flags);
1251 
1252 		if (status != B_OK) {
1253 			if (gotNode)
1254 				FS_CALL(vnode, put_vnode, reenter);
1255 
1256 			rw_lock_write_lock(&sVnodeLock);
1257 			sVnodeTable->Remove(vnode);
1258 			remove_vnode_from_mount_list(vnode, vnode->mount);
1259 			rw_lock_write_unlock(&sVnodeLock);
1260 
1261 			free(vnode);
1262 			return status;
1263 		}
1264 
1265 		rw_lock_read_lock(&sVnodeLock);
1266 		vnode->Lock();
1267 
1268 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1269 		vnode->SetBusy(false);
1270 
1271 		vnode->Unlock();
1272 		rw_lock_read_unlock(&sVnodeLock);
1273 	}
1274 
1275 	TRACE(("get_vnode: returning %p\n", vnode));
1276 
1277 	*_vnode = vnode;
1278 	return B_OK;
1279 }
1280 
1281 
1282 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1283 	if the counter dropped to 0.
1284 
1285 	The caller must, of course, own a reference to the vnode to call this
1286 	function.
1287 	The caller must not hold the sVnodeLock or the sMountLock.
1288 
1289 	\param vnode the vnode.
1290 */
1291 static inline void
1292 put_vnode(struct vnode* vnode)
1293 {
1294 	dec_vnode_ref_count(vnode, false, false);
1295 }
1296 
1297 
1298 static void
1299 free_unused_vnodes(int32 level)
1300 {
1301 	unused_vnodes_check_started();
1302 
1303 	if (level == B_NO_LOW_RESOURCE) {
1304 		unused_vnodes_check_done();
1305 		return;
1306 	}
1307 
1308 	flush_hot_vnodes();
1309 
1310 	// determine how many nodes to free
1311 	uint32 count = 1;
1312 	{
1313 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1314 
1315 		switch (level) {
1316 			case B_LOW_RESOURCE_NOTE:
1317 				count = sUnusedVnodes / 100;
1318 				break;
1319 			case B_LOW_RESOURCE_WARNING:
1320 				count = sUnusedVnodes / 10;
1321 				break;
1322 			case B_LOW_RESOURCE_CRITICAL:
1323 				count = sUnusedVnodes;
1324 				break;
1325 		}
1326 
1327 		if (count > sUnusedVnodes)
1328 			count = sUnusedVnodes;
1329 	}
1330 
1331 	// Write back the modified pages of some unused vnodes and free them.
1332 
1333 	for (uint32 i = 0; i < count; i++) {
1334 		ReadLocker vnodesReadLocker(sVnodeLock);
1335 
1336 		// get the first node
1337 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1338 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1339 			&sUnusedVnodeList);
1340 		unusedVnodesLocker.Unlock();
1341 
1342 		if (vnode == NULL)
1343 			break;
1344 
1345 		// lock the node
1346 		AutoLocker<Vnode> nodeLocker(vnode);
1347 
1348 		// Check whether the node is still unused -- since we only append to the
1349 		// tail of the unused queue, the vnode should still be at its head.
1350 		// Alternatively we could check its ref count for 0 and its busy flag,
1351 		// but if the node is no longer at the head of the queue, it means it
1352 		// has been touched in the meantime, i.e. it is no longer the least
1353 		// recently used unused vnode and we rather don't free it.
1354 		unusedVnodesLocker.Lock();
1355 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1356 			continue;
1357 		unusedVnodesLocker.Unlock();
1358 
1359 		ASSERT(!vnode->IsBusy());
1360 
1361 		// grab a reference
1362 		inc_vnode_ref_count(vnode);
1363 		vnode_used(vnode);
1364 
1365 		// write back changes and free the node
1366 		nodeLocker.Unlock();
1367 		vnodesReadLocker.Unlock();
1368 
1369 		if (vnode->cache != NULL)
1370 			vnode->cache->WriteModified();
1371 
1372 		dec_vnode_ref_count(vnode, true, false);
1373 			// this should free the vnode when it's still unused
1374 	}
1375 
1376 	unused_vnodes_check_done();
1377 }
1378 
1379 
1380 /*!	Gets the vnode the given vnode is covering.
1381 
1382 	The caller must have \c sVnodeLock read-locked at least.
1383 
1384 	The function returns a reference to the retrieved vnode (if any), the caller
1385 	is responsible to free.
1386 
1387 	\param vnode The vnode whose covered node shall be returned.
1388 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1389 		vnode.
1390 */
1391 static inline Vnode*
1392 get_covered_vnode_locked(Vnode* vnode)
1393 {
1394 	if (Vnode* coveredNode = vnode->covers) {
1395 		while (coveredNode->covers != NULL)
1396 			coveredNode = coveredNode->covers;
1397 
1398 		inc_vnode_ref_count(coveredNode);
1399 		return coveredNode;
1400 	}
1401 
1402 	return NULL;
1403 }
1404 
1405 
1406 /*!	Gets the vnode the given vnode is covering.
1407 
1408 	The caller must not hold \c sVnodeLock. Note that this implies a race
1409 	condition, since the situation can change at any time.
1410 
1411 	The function returns a reference to the retrieved vnode (if any), the caller
1412 	is responsible to free.
1413 
1414 	\param vnode The vnode whose covered node shall be returned.
1415 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1416 		vnode.
1417 */
1418 static inline Vnode*
1419 get_covered_vnode(Vnode* vnode)
1420 {
1421 	if (!vnode->IsCovering())
1422 		return NULL;
1423 
1424 	ReadLocker vnodeReadLocker(sVnodeLock);
1425 	return get_covered_vnode_locked(vnode);
1426 }
1427 
1428 
1429 /*!	Gets the vnode the given vnode is covered by.
1430 
1431 	The caller must have \c sVnodeLock read-locked at least.
1432 
1433 	The function returns a reference to the retrieved vnode (if any), the caller
1434 	is responsible to free.
1435 
1436 	\param vnode The vnode whose covering node shall be returned.
1437 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1438 		any vnode.
1439 */
1440 static Vnode*
1441 get_covering_vnode_locked(Vnode* vnode)
1442 {
1443 	if (Vnode* coveringNode = vnode->covered_by) {
1444 		while (coveringNode->covered_by != NULL)
1445 			coveringNode = coveringNode->covered_by;
1446 
1447 		inc_vnode_ref_count(coveringNode);
1448 		return coveringNode;
1449 	}
1450 
1451 	return NULL;
1452 }
1453 
1454 
1455 /*!	Gets the vnode the given vnode is covered by.
1456 
1457 	The caller must not hold \c sVnodeLock. Note that this implies a race
1458 	condition, since the situation can change at any time.
1459 
1460 	The function returns a reference to the retrieved vnode (if any), the caller
1461 	is responsible to free.
1462 
1463 	\param vnode The vnode whose covering node shall be returned.
1464 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1465 		any vnode.
1466 */
1467 static inline Vnode*
1468 get_covering_vnode(Vnode* vnode)
1469 {
1470 	if (!vnode->IsCovered())
1471 		return NULL;
1472 
1473 	ReadLocker vnodeReadLocker(sVnodeLock);
1474 	return get_covering_vnode_locked(vnode);
1475 }
1476 
1477 
1478 static void
1479 free_unused_vnodes()
1480 {
1481 	free_unused_vnodes(
1482 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1483 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1484 }
1485 
1486 
1487 static void
1488 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1489 {
1490 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1491 
1492 	free_unused_vnodes(level);
1493 }
1494 
1495 
1496 static inline void
1497 put_advisory_locking(struct advisory_locking* locking)
1498 {
1499 	release_sem(locking->lock);
1500 }
1501 
1502 
1503 /*!	Returns the advisory_locking object of the \a vnode in case it
1504 	has one, and locks it.
1505 	You have to call put_advisory_locking() when you're done with
1506 	it.
1507 	Note, you must not have the vnode mutex locked when calling
1508 	this function.
1509 */
1510 static struct advisory_locking*
1511 get_advisory_locking(struct vnode* vnode)
1512 {
1513 	rw_lock_read_lock(&sVnodeLock);
1514 	vnode->Lock();
1515 
1516 	struct advisory_locking* locking = vnode->advisory_locking;
1517 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1518 
1519 	vnode->Unlock();
1520 	rw_lock_read_unlock(&sVnodeLock);
1521 
1522 	if (lock >= 0)
1523 		lock = acquire_sem(lock);
1524 	if (lock < 0) {
1525 		// This means the locking has been deleted in the mean time
1526 		// or had never existed in the first place - otherwise, we
1527 		// would get the lock at some point.
1528 		return NULL;
1529 	}
1530 
1531 	return locking;
1532 }
1533 
1534 
1535 /*!	Creates a locked advisory_locking object, and attaches it to the
1536 	given \a vnode.
1537 	Returns B_OK in case of success - also if the vnode got such an
1538 	object from someone else in the mean time, you'll still get this
1539 	one locked then.
1540 */
1541 static status_t
1542 create_advisory_locking(struct vnode* vnode)
1543 {
1544 	if (vnode == NULL)
1545 		return B_FILE_ERROR;
1546 
1547 	ObjectDeleter<advisory_locking> lockingDeleter;
1548 	struct advisory_locking* locking = NULL;
1549 
1550 	while (get_advisory_locking(vnode) == NULL) {
1551 		// no locking object set on the vnode yet, create one
1552 		if (locking == NULL) {
1553 			locking = new(std::nothrow) advisory_locking;
1554 			if (locking == NULL)
1555 				return B_NO_MEMORY;
1556 			lockingDeleter.SetTo(locking);
1557 
1558 			locking->wait_sem = create_sem(0, "advisory lock");
1559 			if (locking->wait_sem < 0)
1560 				return locking->wait_sem;
1561 
1562 			locking->lock = create_sem(0, "advisory locking");
1563 			if (locking->lock < 0)
1564 				return locking->lock;
1565 		}
1566 
1567 		// set our newly created locking object
1568 		ReadLocker _(sVnodeLock);
1569 		AutoLocker<Vnode> nodeLocker(vnode);
1570 		if (vnode->advisory_locking == NULL) {
1571 			vnode->advisory_locking = locking;
1572 			lockingDeleter.Detach();
1573 			return B_OK;
1574 		}
1575 	}
1576 
1577 	// The vnode already had a locking object. That's just as well.
1578 
1579 	return B_OK;
1580 }
1581 
1582 
1583 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1584 	with the advisory_lock \a lock.
1585 */
1586 static bool
1587 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1588 {
1589 	if (flock == NULL)
1590 		return true;
1591 
1592 	return lock->start <= flock->l_start - 1 + flock->l_len
1593 		&& lock->end >= flock->l_start;
1594 }
1595 
1596 
1597 /*!	Tests whether acquiring a lock would block.
1598 */
1599 static status_t
1600 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1601 {
1602 	flock->l_type = F_UNLCK;
1603 
1604 	struct advisory_locking* locking = get_advisory_locking(vnode);
1605 	if (locking == NULL)
1606 		return B_OK;
1607 
1608 	team_id team = team_get_current_team_id();
1609 
1610 	LockList::Iterator iterator = locking->locks.GetIterator();
1611 	while (iterator.HasNext()) {
1612 		struct advisory_lock* lock = iterator.Next();
1613 
1614 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1615 			// locks do overlap
1616 			if (flock->l_type != F_RDLCK || !lock->shared) {
1617 				// collision
1618 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1619 				flock->l_whence = SEEK_SET;
1620 				flock->l_start = lock->start;
1621 				flock->l_len = lock->end - lock->start + 1;
1622 				flock->l_pid = lock->team;
1623 				break;
1624 			}
1625 		}
1626 	}
1627 
1628 	put_advisory_locking(locking);
1629 	return B_OK;
1630 }
1631 
1632 
1633 /*!	Removes the specified lock, or all locks of the calling team
1634 	if \a flock is NULL.
1635 */
1636 static status_t
1637 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1638 	struct file_descriptor* descriptor, struct flock* flock)
1639 {
1640 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1641 
1642 	struct advisory_locking* locking = get_advisory_locking(vnode);
1643 	if (locking == NULL)
1644 		return B_OK;
1645 
1646 	// find matching lock entries
1647 
1648 	LockList::Iterator iterator = locking->locks.GetIterator();
1649 	while (iterator.HasNext()) {
1650 		struct advisory_lock* lock = iterator.Next();
1651 		bool removeLock = false;
1652 
1653 		if (descriptor != NULL && lock->bound_to == descriptor) {
1654 			// Remove flock() locks
1655 			removeLock = true;
1656 		} else if (lock->bound_to == context
1657 				&& advisory_lock_intersects(lock, flock)) {
1658 			// Remove POSIX locks
1659 			bool endsBeyond = false;
1660 			bool startsBefore = false;
1661 			if (flock != NULL) {
1662 				startsBefore = lock->start < flock->l_start;
1663 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1664 			}
1665 
1666 			if (!startsBefore && !endsBeyond) {
1667 				// lock is completely contained in flock
1668 				removeLock = true;
1669 			} else if (startsBefore && !endsBeyond) {
1670 				// cut the end of the lock
1671 				lock->end = flock->l_start - 1;
1672 			} else if (!startsBefore && endsBeyond) {
1673 				// cut the start of the lock
1674 				lock->start = flock->l_start + flock->l_len;
1675 			} else {
1676 				// divide the lock into two locks
1677 				struct advisory_lock* secondLock = new advisory_lock;
1678 				if (secondLock == NULL) {
1679 					// TODO: we should probably revert the locks we already
1680 					// changed... (ie. allocate upfront)
1681 					put_advisory_locking(locking);
1682 					return B_NO_MEMORY;
1683 				}
1684 
1685 				lock->end = flock->l_start - 1;
1686 
1687 				secondLock->bound_to = context;
1688 				secondLock->team = lock->team;
1689 				secondLock->session = lock->session;
1690 				// values must already be normalized when getting here
1691 				secondLock->start = flock->l_start + flock->l_len;
1692 				secondLock->end = lock->end;
1693 				secondLock->shared = lock->shared;
1694 
1695 				locking->locks.Add(secondLock);
1696 			}
1697 		}
1698 
1699 		if (removeLock) {
1700 			// this lock is no longer used
1701 			iterator.Remove();
1702 			free(lock);
1703 		}
1704 	}
1705 
1706 	bool removeLocking = locking->locks.IsEmpty();
1707 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1708 
1709 	put_advisory_locking(locking);
1710 
1711 	if (removeLocking) {
1712 		// We can remove the whole advisory locking structure; it's no
1713 		// longer used
1714 		locking = get_advisory_locking(vnode);
1715 		if (locking != NULL) {
1716 			ReadLocker locker(sVnodeLock);
1717 			AutoLocker<Vnode> nodeLocker(vnode);
1718 
1719 			// the locking could have been changed in the mean time
1720 			if (locking->locks.IsEmpty()) {
1721 				vnode->advisory_locking = NULL;
1722 				nodeLocker.Unlock();
1723 				locker.Unlock();
1724 
1725 				// we've detached the locking from the vnode, so we can
1726 				// safely delete it
1727 				delete locking;
1728 			} else {
1729 				// the locking is in use again
1730 				nodeLocker.Unlock();
1731 				locker.Unlock();
1732 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1733 			}
1734 		}
1735 	}
1736 
1737 	return B_OK;
1738 }
1739 
1740 
1741 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1742 	will wait for the lock to become available, if there are any collisions
1743 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1744 
1745 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1746 	BSD flock() semantics are used, that is, all children can unlock the file
1747 	in question (we even allow parents to remove the lock, though, but that
1748 	seems to be in line to what the BSD's are doing).
1749 */
1750 static status_t
1751 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1752 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1753 {
1754 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1755 		vnode, flock, wait ? "yes" : "no"));
1756 	dprintf("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1757 		vnode, flock, wait ? "yes" : "no");
1758 
1759 	bool shared = flock->l_type == F_RDLCK;
1760 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1761 	status_t status = B_OK;
1762 
1763 	// TODO: do deadlock detection!
1764 
1765 	struct advisory_locking* locking;
1766 
1767 	while (true) {
1768 		// if this vnode has an advisory_locking structure attached,
1769 		// lock that one and search for any colliding file lock
1770 		status = create_advisory_locking(vnode);
1771 		if (status != B_OK)
1772 			return status;
1773 
1774 		locking = vnode->advisory_locking;
1775 		team_id team = team_get_current_team_id();
1776 		sem_id waitForLock = -1;
1777 
1778 		// test for collisions
1779 		LockList::Iterator iterator = locking->locks.GetIterator();
1780 		while (iterator.HasNext()) {
1781 			struct advisory_lock* lock = iterator.Next();
1782 
1783 			// TODO: locks from the same team might be joinable!
1784 			if ((lock->team != team || lock->bound_to != boundTo)
1785 					&& advisory_lock_intersects(lock, flock)) {
1786 				// locks do overlap
1787 				if (!shared || !lock->shared) {
1788 					// we need to wait
1789 					waitForLock = locking->wait_sem;
1790 					break;
1791 				}
1792 			}
1793 		}
1794 
1795 		if (waitForLock < 0)
1796 			break;
1797 
1798 		// We need to wait. Do that or fail now, if we've been asked not to.
1799 
1800 		if (!wait) {
1801 			put_advisory_locking(locking);
1802 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1803 		}
1804 
1805 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1806 			B_CAN_INTERRUPT, 0);
1807 		if (status != B_OK && status != B_BAD_SEM_ID)
1808 			return status;
1809 
1810 		// We have been notified, but we need to re-lock the locking object. So
1811 		// go another round...
1812 	}
1813 
1814 	// install new lock
1815 
1816 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1817 		sizeof(struct advisory_lock));
1818 	if (lock == NULL) {
1819 		put_advisory_locking(locking);
1820 		return B_NO_MEMORY;
1821 	}
1822 
1823 	lock->bound_to = boundTo;
1824 	lock->team = team_get_current_team_id();
1825 	lock->session = thread_get_current_thread()->team->session_id;
1826 	// values must already be normalized when getting here
1827 	lock->start = flock->l_start;
1828 	lock->end = flock->l_start - 1 + flock->l_len;
1829 	lock->shared = shared;
1830 
1831 	locking->locks.Add(lock);
1832 	put_advisory_locking(locking);
1833 
1834 	return status;
1835 }
1836 
1837 
1838 /*!	Normalizes the \a flock structure to make it easier to compare the
1839 	structure with others. The l_start and l_len fields are set to absolute
1840 	values according to the l_whence field.
1841 */
1842 static status_t
1843 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1844 {
1845 	switch (flock->l_whence) {
1846 		case SEEK_SET:
1847 			break;
1848 		case SEEK_CUR:
1849 			flock->l_start += descriptor->pos;
1850 			break;
1851 		case SEEK_END:
1852 		{
1853 			struct vnode* vnode = descriptor->u.vnode;
1854 			struct stat stat;
1855 			status_t status;
1856 
1857 			if (!HAS_FS_CALL(vnode, read_stat))
1858 				return B_UNSUPPORTED;
1859 
1860 			status = FS_CALL(vnode, read_stat, &stat);
1861 			if (status != B_OK)
1862 				return status;
1863 
1864 			flock->l_start += stat.st_size;
1865 			break;
1866 		}
1867 		default:
1868 			return B_BAD_VALUE;
1869 	}
1870 
1871 	if (flock->l_start < 0)
1872 		flock->l_start = 0;
1873 	if (flock->l_len == 0)
1874 		flock->l_len = OFF_MAX;
1875 
1876 	// don't let the offset and length overflow
1877 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1878 		flock->l_len = OFF_MAX - flock->l_start;
1879 
1880 	if (flock->l_len < 0) {
1881 		// a negative length reverses the region
1882 		flock->l_start += flock->l_len;
1883 		flock->l_len = -flock->l_len;
1884 	}
1885 
1886 	return B_OK;
1887 }
1888 
1889 
1890 static void
1891 replace_vnode_if_disconnected(struct fs_mount* mount,
1892 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1893 	struct vnode* fallBack, bool lockRootLock)
1894 {
1895 	struct vnode* givenVnode = vnode;
1896 	bool vnodeReplaced = false;
1897 
1898 	ReadLocker vnodeReadLocker(sVnodeLock);
1899 
1900 	if (lockRootLock)
1901 		mutex_lock(&sIOContextRootLock);
1902 
1903 	while (vnode != NULL && vnode->mount == mount
1904 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1905 		if (vnode->covers != NULL) {
1906 			// redirect the vnode to the covered vnode
1907 			vnode = vnode->covers;
1908 		} else
1909 			vnode = fallBack;
1910 
1911 		vnodeReplaced = true;
1912 	}
1913 
1914 	// If we've replaced the node, grab a reference for the new one.
1915 	if (vnodeReplaced && vnode != NULL)
1916 		inc_vnode_ref_count(vnode);
1917 
1918 	if (lockRootLock)
1919 		mutex_unlock(&sIOContextRootLock);
1920 
1921 	vnodeReadLocker.Unlock();
1922 
1923 	if (vnodeReplaced)
1924 		put_vnode(givenVnode);
1925 }
1926 
1927 
1928 /*!	Disconnects all file descriptors that are associated with the
1929 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1930 	\a mount object.
1931 
1932 	Note, after you've called this function, there might still be ongoing
1933 	accesses - they won't be interrupted if they already happened before.
1934 	However, any subsequent access will fail.
1935 
1936 	This is not a cheap function and should be used with care and rarely.
1937 	TODO: there is currently no means to stop a blocking read/write!
1938 */
1939 static void
1940 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1941 	struct vnode* vnodeToDisconnect)
1942 {
1943 	// iterate over all teams and peek into their file descriptors
1944 	TeamListIterator teamIterator;
1945 	while (Team* team = teamIterator.Next()) {
1946 		BReference<Team> teamReference(team, true);
1947 		TeamLocker teamLocker(team);
1948 
1949 		// lock the I/O context
1950 		io_context* context = team->io_context;
1951 		if (context == NULL)
1952 			continue;
1953 		MutexLocker contextLocker(context->io_mutex);
1954 
1955 		teamLocker.Unlock();
1956 
1957 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1958 			sRoot, true);
1959 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1960 			sRoot, false);
1961 
1962 		for (uint32 i = 0; i < context->table_size; i++) {
1963 			struct file_descriptor* descriptor = context->fds[i];
1964 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1965 				continue;
1966 
1967 			inc_fd_ref_count(descriptor);
1968 
1969 			// if this descriptor points at this mount, we
1970 			// need to disconnect it to be able to unmount
1971 			struct vnode* vnode = fd_vnode(descriptor);
1972 			if (vnodeToDisconnect != NULL) {
1973 				if (vnode == vnodeToDisconnect)
1974 					disconnect_fd(descriptor);
1975 			} else if ((vnode != NULL && vnode->mount == mount)
1976 				|| (vnode == NULL && descriptor->u.mount == mount))
1977 				disconnect_fd(descriptor);
1978 
1979 			put_fd(descriptor);
1980 		}
1981 	}
1982 }
1983 
1984 
1985 /*!	\brief Gets the root node of the current IO context.
1986 	If \a kernel is \c true, the kernel IO context will be used.
1987 	The caller obtains a reference to the returned node.
1988 */
1989 struct vnode*
1990 get_root_vnode(bool kernel)
1991 {
1992 	if (!kernel) {
1993 		// Get current working directory from io context
1994 		struct io_context* context = get_current_io_context(kernel);
1995 
1996 		mutex_lock(&sIOContextRootLock);
1997 
1998 		struct vnode* root = context->root;
1999 		if (root != NULL)
2000 			inc_vnode_ref_count(root);
2001 
2002 		mutex_unlock(&sIOContextRootLock);
2003 
2004 		if (root != NULL)
2005 			return root;
2006 
2007 		// That should never happen.
2008 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2009 			"have a root\n", team_get_current_team_id());
2010 	}
2011 
2012 	inc_vnode_ref_count(sRoot);
2013 	return sRoot;
2014 }
2015 
2016 
2017 /*!	\brief Gets the directory path and leaf name for a given path.
2018 
2019 	The supplied \a path is transformed to refer to the directory part of
2020 	the entry identified by the original path, and into the buffer \a filename
2021 	the leaf name of the original entry is written.
2022 	Neither the returned path nor the leaf name can be expected to be
2023 	canonical.
2024 
2025 	\param path The path to be analyzed. Must be able to store at least one
2026 		   additional character.
2027 	\param filename The buffer into which the leaf name will be written.
2028 		   Must be of size B_FILE_NAME_LENGTH at least.
2029 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2030 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2031 		   if the given path name is empty.
2032 */
2033 static status_t
2034 get_dir_path_and_leaf(char* path, char* filename)
2035 {
2036 	if (*path == '\0')
2037 		return B_ENTRY_NOT_FOUND;
2038 
2039 	char* last = strrchr(path, '/');
2040 		// '/' are not allowed in file names!
2041 
2042 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2043 
2044 	if (last == NULL) {
2045 		// this path is single segment with no '/' in it
2046 		// ex. "foo"
2047 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2048 			return B_NAME_TOO_LONG;
2049 
2050 		strcpy(path, ".");
2051 	} else {
2052 		last++;
2053 		if (last[0] == '\0') {
2054 			// special case: the path ends in one or more '/' - remove them
2055 			while (*--last == '/' && last != path);
2056 			last[1] = '\0';
2057 
2058 			if (last == path && last[0] == '/') {
2059 				// This path points to the root of the file system
2060 				strcpy(filename, ".");
2061 				return B_OK;
2062 			}
2063 			for (; last != path && *(last - 1) != '/'; last--);
2064 				// rewind to the start of the leaf before the '/'
2065 		}
2066 
2067 		// normal leaf: replace the leaf portion of the path with a '.'
2068 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2069 			return B_NAME_TOO_LONG;
2070 
2071 		last[0] = '.';
2072 		last[1] = '\0';
2073 	}
2074 	return B_OK;
2075 }
2076 
2077 
2078 static status_t
2079 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2080 	bool traverse, bool kernel, struct vnode** _vnode)
2081 {
2082 	char clonedName[B_FILE_NAME_LENGTH + 1];
2083 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2084 		return B_NAME_TOO_LONG;
2085 
2086 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2087 	struct vnode* directory;
2088 
2089 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2090 	if (status < 0)
2091 		return status;
2092 
2093 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2094 		_vnode, NULL);
2095 }
2096 
2097 
2098 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2099 	and returns the respective vnode.
2100 	On success a reference to the vnode is acquired for the caller.
2101 */
2102 static status_t
2103 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2104 {
2105 	ino_t id;
2106 	bool missing;
2107 
2108 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2109 		return missing ? B_ENTRY_NOT_FOUND
2110 			: get_vnode(dir->device, id, _vnode, true, false);
2111 	}
2112 
2113 	status_t status = FS_CALL(dir, lookup, name, &id);
2114 	if (status != B_OK)
2115 		return status;
2116 
2117 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2118 	// have a reference and just need to look the node up.
2119 	rw_lock_read_lock(&sVnodeLock);
2120 	*_vnode = lookup_vnode(dir->device, id);
2121 	rw_lock_read_unlock(&sVnodeLock);
2122 
2123 	if (*_vnode == NULL) {
2124 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2125 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2126 		return B_ENTRY_NOT_FOUND;
2127 	}
2128 
2129 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2130 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2131 //		(*_vnode)->mount->id, (*_vnode)->id);
2132 
2133 	return B_OK;
2134 }
2135 
2136 
2137 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2138 	\a path must not be NULL.
2139 	If it returns successfully, \a path contains the name of the last path
2140 	component. This function clobbers the buffer pointed to by \a path only
2141 	if it does contain more than one component.
2142 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2143 	it is successful or not!
2144 */
2145 static status_t
2146 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2147 	int count, struct io_context* ioContext, struct vnode** _vnode,
2148 	ino_t* _parentID)
2149 {
2150 	status_t status = B_OK;
2151 	ino_t lastParentID = vnode->id;
2152 
2153 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2154 
2155 	if (path == NULL) {
2156 		put_vnode(vnode);
2157 		return B_BAD_VALUE;
2158 	}
2159 
2160 	if (*path == '\0') {
2161 		put_vnode(vnode);
2162 		return B_ENTRY_NOT_FOUND;
2163 	}
2164 
2165 	while (true) {
2166 		struct vnode* nextVnode;
2167 		char* nextPath;
2168 
2169 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2170 			path));
2171 
2172 		// done?
2173 		if (path[0] == '\0')
2174 			break;
2175 
2176 		// walk to find the next path component ("path" will point to a single
2177 		// path component), and filter out multiple slashes
2178 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2179 				nextPath++);
2180 
2181 		if (*nextPath == '/') {
2182 			*nextPath = '\0';
2183 			do
2184 				nextPath++;
2185 			while (*nextPath == '/');
2186 		}
2187 
2188 		// See if the '..' is at a covering vnode move to the covered
2189 		// vnode so we pass the '..' path to the underlying filesystem.
2190 		// Also prevent breaking the root of the IO context.
2191 		if (strcmp("..", path) == 0) {
2192 			if (vnode == ioContext->root) {
2193 				// Attempted prison break! Keep it contained.
2194 				path = nextPath;
2195 				continue;
2196 			}
2197 
2198 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2199 				nextVnode = coveredVnode;
2200 				put_vnode(vnode);
2201 				vnode = nextVnode;
2202 			}
2203 		}
2204 
2205 		// check if vnode is really a directory
2206 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2207 			status = B_NOT_A_DIRECTORY;
2208 
2209 		// Check if we have the right to search the current directory vnode.
2210 		// If a file system doesn't have the access() function, we assume that
2211 		// searching a directory is always allowed
2212 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2213 			status = FS_CALL(vnode, access, X_OK);
2214 
2215 		// Tell the filesystem to get the vnode of this path component (if we
2216 		// got the permission from the call above)
2217 		if (status == B_OK)
2218 			status = lookup_dir_entry(vnode, path, &nextVnode);
2219 
2220 		if (status != B_OK) {
2221 			put_vnode(vnode);
2222 			return status;
2223 		}
2224 
2225 		// If the new node is a symbolic link, resolve it (if we've been told
2226 		// to do it)
2227 		if (S_ISLNK(nextVnode->Type())
2228 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2229 			size_t bufferSize;
2230 			char* buffer;
2231 
2232 			TRACE(("traverse link\n"));
2233 
2234 			// it's not exactly nice style using goto in this way, but hey,
2235 			// it works :-/
2236 			if (count + 1 > B_MAX_SYMLINKS) {
2237 				status = B_LINK_LIMIT;
2238 				goto resolve_link_error;
2239 			}
2240 
2241 			bufferSize = B_PATH_NAME_LENGTH;
2242 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2243 			if (buffer == NULL) {
2244 				status = B_NO_MEMORY;
2245 				goto resolve_link_error;
2246 			}
2247 
2248 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2249 				bufferSize--;
2250 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2251 				// null-terminate
2252 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2253 					buffer[bufferSize] = '\0';
2254 			} else
2255 				status = B_BAD_VALUE;
2256 
2257 			if (status != B_OK) {
2258 				free(buffer);
2259 
2260 		resolve_link_error:
2261 				put_vnode(vnode);
2262 				put_vnode(nextVnode);
2263 
2264 				return status;
2265 			}
2266 			put_vnode(nextVnode);
2267 
2268 			// Check if we start from the root directory or the current
2269 			// directory ("vnode" still points to that one).
2270 			// Cut off all leading slashes if it's the root directory
2271 			path = buffer;
2272 			bool absoluteSymlink = false;
2273 			if (path[0] == '/') {
2274 				// we don't need the old directory anymore
2275 				put_vnode(vnode);
2276 
2277 				while (*++path == '/')
2278 					;
2279 
2280 				mutex_lock(&sIOContextRootLock);
2281 				vnode = ioContext->root;
2282 				inc_vnode_ref_count(vnode);
2283 				mutex_unlock(&sIOContextRootLock);
2284 
2285 				absoluteSymlink = true;
2286 			}
2287 
2288 			inc_vnode_ref_count(vnode);
2289 				// balance the next recursion - we will decrement the
2290 				// ref_count of the vnode, no matter if we succeeded or not
2291 
2292 			if (absoluteSymlink && *path == '\0') {
2293 				// symlink was just "/"
2294 				nextVnode = vnode;
2295 			} else {
2296 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2297 					ioContext, &nextVnode, &lastParentID);
2298 			}
2299 
2300 			object_cache_free(sPathNameCache, buffer, 0);
2301 
2302 			if (status != B_OK) {
2303 				put_vnode(vnode);
2304 				return status;
2305 			}
2306 		} else
2307 			lastParentID = vnode->id;
2308 
2309 		// decrease the ref count on the old dir we just looked up into
2310 		put_vnode(vnode);
2311 
2312 		path = nextPath;
2313 		vnode = nextVnode;
2314 
2315 		// see if we hit a covered node
2316 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2317 			put_vnode(vnode);
2318 			vnode = coveringNode;
2319 		}
2320 	}
2321 
2322 	*_vnode = vnode;
2323 	if (_parentID)
2324 		*_parentID = lastParentID;
2325 
2326 	return B_OK;
2327 }
2328 
2329 
2330 static status_t
2331 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2332 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2333 {
2334 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2335 		get_current_io_context(kernel), _vnode, _parentID);
2336 }
2337 
2338 
2339 static status_t
2340 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2341 	ino_t* _parentID, bool kernel)
2342 {
2343 	struct vnode* start = NULL;
2344 
2345 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2346 
2347 	if (!path)
2348 		return B_BAD_VALUE;
2349 
2350 	if (*path == '\0')
2351 		return B_ENTRY_NOT_FOUND;
2352 
2353 	// figure out if we need to start at root or at cwd
2354 	if (*path == '/') {
2355 		if (sRoot == NULL) {
2356 			// we're a bit early, aren't we?
2357 			return B_ERROR;
2358 		}
2359 
2360 		while (*++path == '/')
2361 			;
2362 		start = get_root_vnode(kernel);
2363 
2364 		if (*path == '\0') {
2365 			*_vnode = start;
2366 			return B_OK;
2367 		}
2368 
2369 	} else {
2370 		struct io_context* context = get_current_io_context(kernel);
2371 
2372 		mutex_lock(&context->io_mutex);
2373 		start = context->cwd;
2374 		if (start != NULL)
2375 			inc_vnode_ref_count(start);
2376 		mutex_unlock(&context->io_mutex);
2377 
2378 		if (start == NULL)
2379 			return B_ERROR;
2380 	}
2381 
2382 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2383 		_parentID);
2384 }
2385 
2386 
2387 /*! Returns the vnode in the next to last segment of the path, and returns
2388 	the last portion in filename.
2389 	The path buffer must be able to store at least one additional character.
2390 */
2391 static status_t
2392 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2393 	bool kernel)
2394 {
2395 	status_t status = get_dir_path_and_leaf(path, filename);
2396 	if (status != B_OK)
2397 		return status;
2398 
2399 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2400 }
2401 
2402 
2403 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2404 		   to by a FD + path pair.
2405 
2406 	\a path must be given in either case. \a fd might be omitted, in which
2407 	case \a path is either an absolute path or one relative to the current
2408 	directory. If both a supplied and \a path is relative it is reckoned off
2409 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2410 	ignored.
2411 
2412 	The caller has the responsibility to call put_vnode() on the returned
2413 	directory vnode.
2414 
2415 	\param fd The FD. May be < 0.
2416 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2417 	       is modified by this function. It must have at least room for a
2418 	       string one character longer than the path it contains.
2419 	\param _vnode A pointer to a variable the directory vnode shall be written
2420 		   into.
2421 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2422 		   the leaf name of the specified entry will be written.
2423 	\param kernel \c true, if invoked from inside the kernel, \c false if
2424 		   invoked from userland.
2425 	\return \c B_OK, if everything went fine, another error code otherwise.
2426 */
2427 static status_t
2428 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2429 	char* filename, bool kernel)
2430 {
2431 	if (!path)
2432 		return B_BAD_VALUE;
2433 	if (*path == '\0')
2434 		return B_ENTRY_NOT_FOUND;
2435 	if (fd < 0)
2436 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2437 
2438 	status_t status = get_dir_path_and_leaf(path, filename);
2439 	if (status != B_OK)
2440 		return status;
2441 
2442 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2443 }
2444 
2445 
2446 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2447 		   to by a vnode + path pair.
2448 
2449 	\a path must be given in either case. \a vnode might be omitted, in which
2450 	case \a path is either an absolute path or one relative to the current
2451 	directory. If both a supplied and \a path is relative it is reckoned off
2452 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2453 	ignored.
2454 
2455 	The caller has the responsibility to call put_vnode() on the returned
2456 	directory vnode.
2457 
2458 	\param vnode The vnode. May be \c NULL.
2459 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2460 	       is modified by this function. It must have at least room for a
2461 	       string one character longer than the path it contains.
2462 	\param _vnode A pointer to a variable the directory vnode shall be written
2463 		   into.
2464 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2465 		   the leaf name of the specified entry will be written.
2466 	\param kernel \c true, if invoked from inside the kernel, \c false if
2467 		   invoked from userland.
2468 	\return \c B_OK, if everything went fine, another error code otherwise.
2469 */
2470 static status_t
2471 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2472 	struct vnode** _vnode, char* filename, bool kernel)
2473 {
2474 	if (!path)
2475 		return B_BAD_VALUE;
2476 	if (*path == '\0')
2477 		return B_ENTRY_NOT_FOUND;
2478 	if (vnode == NULL || path[0] == '/')
2479 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2480 
2481 	status_t status = get_dir_path_and_leaf(path, filename);
2482 	if (status != B_OK)
2483 		return status;
2484 
2485 	inc_vnode_ref_count(vnode);
2486 		// vnode_path_to_vnode() always decrements the ref count
2487 
2488 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2489 }
2490 
2491 
2492 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2493 */
2494 static status_t
2495 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2496 	size_t bufferSize, struct io_context* ioContext)
2497 {
2498 	if (bufferSize < sizeof(struct dirent))
2499 		return B_BAD_VALUE;
2500 
2501 	// See if the vnode is covering another vnode and move to the covered
2502 	// vnode so we get the underlying file system
2503 	VNodePutter vnodePutter;
2504 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2505 		vnode = coveredVnode;
2506 		vnodePutter.SetTo(vnode);
2507 	}
2508 
2509 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2510 		// The FS supports getting the name of a vnode.
2511 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2512 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2513 			return B_OK;
2514 	}
2515 
2516 	// The FS doesn't support getting the name of a vnode. So we search the
2517 	// parent directory for the vnode, if the caller let us.
2518 
2519 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2520 		return B_UNSUPPORTED;
2521 
2522 	void* cookie;
2523 
2524 	status_t status = FS_CALL(parent, open_dir, &cookie);
2525 	if (status >= B_OK) {
2526 		while (true) {
2527 			uint32 num = 1;
2528 			// We use the FS hook directly instead of dir_read(), since we don't
2529 			// want the entries to be fixed. We have already resolved vnode to
2530 			// the covered node.
2531 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2532 				&num);
2533 			if (status != B_OK)
2534 				break;
2535 			if (num == 0) {
2536 				status = B_ENTRY_NOT_FOUND;
2537 				break;
2538 			}
2539 
2540 			if (vnode->id == buffer->d_ino) {
2541 				// found correct entry!
2542 				break;
2543 			}
2544 		}
2545 
2546 		FS_CALL(parent, close_dir, cookie);
2547 		FS_CALL(parent, free_dir_cookie, cookie);
2548 	}
2549 	return status;
2550 }
2551 
2552 
2553 static status_t
2554 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2555 	size_t nameSize, bool kernel)
2556 {
2557 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2558 	struct dirent* dirent = (struct dirent*)buffer;
2559 
2560 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2561 		get_current_io_context(kernel));
2562 	if (status != B_OK)
2563 		return status;
2564 
2565 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2566 		return B_BUFFER_OVERFLOW;
2567 
2568 	return B_OK;
2569 }
2570 
2571 
2572 /*!	Gets the full path to a given directory vnode.
2573 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2574 	file system doesn't support this call, it will fall back to iterating
2575 	through the parent directory to get the name of the child.
2576 
2577 	To protect against circular loops, it supports a maximum tree depth
2578 	of 256 levels.
2579 
2580 	Note that the path may not be correct the time this function returns!
2581 	It doesn't use any locking to prevent returning the correct path, as
2582 	paths aren't safe anyway: the path to a file can change at any time.
2583 
2584 	It might be a good idea, though, to check if the returned path exists
2585 	in the calling function (it's not done here because of efficiency)
2586 */
2587 static status_t
2588 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2589 	bool kernel)
2590 {
2591 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2592 
2593 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2594 		return B_BAD_VALUE;
2595 
2596 	if (!S_ISDIR(vnode->Type()))
2597 		return B_NOT_A_DIRECTORY;
2598 
2599 	char* path = buffer;
2600 	int32 insert = bufferSize;
2601 	int32 maxLevel = 256;
2602 	int32 length;
2603 	status_t status = B_OK;
2604 	struct io_context* ioContext = get_current_io_context(kernel);
2605 
2606 	// we don't use get_vnode() here because this call is more
2607 	// efficient and does all we need from get_vnode()
2608 	inc_vnode_ref_count(vnode);
2609 
2610 	path[--insert] = '\0';
2611 		// the path is filled right to left
2612 
2613 	while (true) {
2614 		// If the node is the context's root, bail out. Otherwise resolve mount
2615 		// points.
2616 		if (vnode == ioContext->root)
2617 			break;
2618 
2619 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2620 			put_vnode(vnode);
2621 			vnode = coveredVnode;
2622 		}
2623 
2624 		// lookup the parent vnode
2625 		struct vnode* parentVnode;
2626 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2627 		if (status != B_OK)
2628 			goto out;
2629 
2630 		if (parentVnode == vnode) {
2631 			// The caller apparently got their hands on a node outside of their
2632 			// context's root. Now we've hit the global root.
2633 			put_vnode(parentVnode);
2634 			break;
2635 		}
2636 
2637 		// get the node's name
2638 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2639 			// also used for fs_read_dir()
2640 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2641 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2642 			sizeof(nameBuffer), ioContext);
2643 
2644 		// release the current vnode, we only need its parent from now on
2645 		put_vnode(vnode);
2646 		vnode = parentVnode;
2647 
2648 		if (status != B_OK)
2649 			goto out;
2650 
2651 		// TODO: add an explicit check for loops in about 10 levels to do
2652 		// real loop detection
2653 
2654 		// don't go deeper as 'maxLevel' to prevent circular loops
2655 		if (maxLevel-- < 0) {
2656 			status = B_LINK_LIMIT;
2657 			goto out;
2658 		}
2659 
2660 		// add the name in front of the current path
2661 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2662 		length = strlen(name);
2663 		insert -= length;
2664 		if (insert <= 0) {
2665 			status = B_RESULT_NOT_REPRESENTABLE;
2666 			goto out;
2667 		}
2668 		memcpy(path + insert, name, length);
2669 		path[--insert] = '/';
2670 	}
2671 
2672 	// the root dir will result in an empty path: fix it
2673 	if (path[insert] == '\0')
2674 		path[--insert] = '/';
2675 
2676 	TRACE(("  path is: %s\n", path + insert));
2677 
2678 	// move the path to the start of the buffer
2679 	length = bufferSize - insert;
2680 	memmove(buffer, path + insert, length);
2681 
2682 out:
2683 	put_vnode(vnode);
2684 	return status;
2685 }
2686 
2687 
2688 /*!	Checks the length of every path component, and adds a '.'
2689 	if the path ends in a slash.
2690 	The given path buffer must be able to store at least one
2691 	additional character.
2692 */
2693 static status_t
2694 check_path(char* to)
2695 {
2696 	int32 length = 0;
2697 
2698 	// check length of every path component
2699 
2700 	while (*to) {
2701 		char* begin;
2702 		if (*to == '/')
2703 			to++, length++;
2704 
2705 		begin = to;
2706 		while (*to != '/' && *to)
2707 			to++, length++;
2708 
2709 		if (to - begin > B_FILE_NAME_LENGTH)
2710 			return B_NAME_TOO_LONG;
2711 	}
2712 
2713 	if (length == 0)
2714 		return B_ENTRY_NOT_FOUND;
2715 
2716 	// complete path if there is a slash at the end
2717 
2718 	if (*(to - 1) == '/') {
2719 		if (length > B_PATH_NAME_LENGTH - 2)
2720 			return B_NAME_TOO_LONG;
2721 
2722 		to[0] = '.';
2723 		to[1] = '\0';
2724 	}
2725 
2726 	return B_OK;
2727 }
2728 
2729 
2730 static struct file_descriptor*
2731 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2732 {
2733 	struct file_descriptor* descriptor
2734 		= get_fd(get_current_io_context(kernel), fd);
2735 	if (descriptor == NULL)
2736 		return NULL;
2737 
2738 	struct vnode* vnode = fd_vnode(descriptor);
2739 	if (vnode == NULL) {
2740 		put_fd(descriptor);
2741 		return NULL;
2742 	}
2743 
2744 	// ToDo: when we can close a file descriptor at any point, investigate
2745 	//	if this is still valid to do (accessing the vnode without ref_count
2746 	//	or locking)
2747 	*_vnode = vnode;
2748 	return descriptor;
2749 }
2750 
2751 
2752 static struct vnode*
2753 get_vnode_from_fd(int fd, bool kernel)
2754 {
2755 	struct file_descriptor* descriptor;
2756 	struct vnode* vnode;
2757 
2758 	descriptor = get_fd(get_current_io_context(kernel), fd);
2759 	if (descriptor == NULL)
2760 		return NULL;
2761 
2762 	vnode = fd_vnode(descriptor);
2763 	if (vnode != NULL)
2764 		inc_vnode_ref_count(vnode);
2765 
2766 	put_fd(descriptor);
2767 	return vnode;
2768 }
2769 
2770 
2771 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2772 	only the path will be considered. In this case, the \a path must not be
2773 	NULL.
2774 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2775 	and should be NULL for files.
2776 */
2777 static status_t
2778 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2779 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2780 {
2781 	if (fd < 0 && !path)
2782 		return B_BAD_VALUE;
2783 
2784 	if (path != NULL && *path == '\0')
2785 		return B_ENTRY_NOT_FOUND;
2786 
2787 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2788 		// no FD or absolute path
2789 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2790 	}
2791 
2792 	// FD only, or FD + relative path
2793 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2794 	if (vnode == NULL)
2795 		return B_FILE_ERROR;
2796 
2797 	if (path != NULL) {
2798 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2799 			_vnode, _parentID);
2800 	}
2801 
2802 	// there is no relative path to take into account
2803 
2804 	*_vnode = vnode;
2805 	if (_parentID)
2806 		*_parentID = -1;
2807 
2808 	return B_OK;
2809 }
2810 
2811 
2812 static int
2813 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2814 	void* cookie, int openMode, bool kernel)
2815 {
2816 	struct file_descriptor* descriptor;
2817 	int fd;
2818 
2819 	// If the vnode is locked, we don't allow creating a new file/directory
2820 	// file_descriptor for it
2821 	if (vnode && vnode->mandatory_locked_by != NULL
2822 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2823 		return B_BUSY;
2824 
2825 	descriptor = alloc_fd();
2826 	if (!descriptor)
2827 		return B_NO_MEMORY;
2828 
2829 	if (vnode)
2830 		descriptor->u.vnode = vnode;
2831 	else
2832 		descriptor->u.mount = mount;
2833 	descriptor->cookie = cookie;
2834 
2835 	switch (type) {
2836 		// vnode types
2837 		case FDTYPE_FILE:
2838 			descriptor->ops = &sFileOps;
2839 			break;
2840 		case FDTYPE_DIR:
2841 			descriptor->ops = &sDirectoryOps;
2842 			break;
2843 		case FDTYPE_ATTR:
2844 			descriptor->ops = &sAttributeOps;
2845 			break;
2846 		case FDTYPE_ATTR_DIR:
2847 			descriptor->ops = &sAttributeDirectoryOps;
2848 			break;
2849 
2850 		// mount types
2851 		case FDTYPE_INDEX_DIR:
2852 			descriptor->ops = &sIndexDirectoryOps;
2853 			break;
2854 		case FDTYPE_QUERY:
2855 			descriptor->ops = &sQueryOps;
2856 			break;
2857 
2858 		default:
2859 			panic("get_new_fd() called with unknown type %d\n", type);
2860 			break;
2861 	}
2862 	descriptor->type = type;
2863 	descriptor->open_mode = openMode;
2864 
2865 	io_context* context = get_current_io_context(kernel);
2866 	fd = new_fd(context, descriptor);
2867 	if (fd < 0) {
2868 		descriptor->ops = NULL;
2869 		put_fd(descriptor);
2870 		return B_NO_MORE_FDS;
2871 	}
2872 
2873 	mutex_lock(&context->io_mutex);
2874 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2875 	mutex_unlock(&context->io_mutex);
2876 
2877 	return fd;
2878 }
2879 
2880 
2881 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2882 	vfs_normalize_path(). See there for more documentation.
2883 */
2884 static status_t
2885 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2886 {
2887 	VNodePutter dirPutter;
2888 	struct vnode* dir = NULL;
2889 	status_t error;
2890 
2891 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2892 		// get dir vnode + leaf name
2893 		struct vnode* nextDir;
2894 		char leaf[B_FILE_NAME_LENGTH];
2895 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2896 		if (error != B_OK)
2897 			return error;
2898 
2899 		dir = nextDir;
2900 		strcpy(path, leaf);
2901 		dirPutter.SetTo(dir);
2902 
2903 		// get file vnode, if we shall resolve links
2904 		bool fileExists = false;
2905 		struct vnode* fileVnode;
2906 		VNodePutter fileVnodePutter;
2907 		if (traverseLink) {
2908 			inc_vnode_ref_count(dir);
2909 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2910 					NULL) == B_OK) {
2911 				fileVnodePutter.SetTo(fileVnode);
2912 				fileExists = true;
2913 			}
2914 		}
2915 
2916 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2917 			// we're done -- construct the path
2918 			bool hasLeaf = true;
2919 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2920 				// special cases "." and ".." -- get the dir, forget the leaf
2921 				inc_vnode_ref_count(dir);
2922 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2923 					&nextDir, NULL);
2924 				if (error != B_OK)
2925 					return error;
2926 				dir = nextDir;
2927 				dirPutter.SetTo(dir);
2928 				hasLeaf = false;
2929 			}
2930 
2931 			// get the directory path
2932 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2933 			if (error != B_OK)
2934 				return error;
2935 
2936 			// append the leaf name
2937 			if (hasLeaf) {
2938 				// insert a directory separator if this is not the file system
2939 				// root
2940 				if ((strcmp(path, "/") != 0
2941 					&& strlcat(path, "/", pathSize) >= pathSize)
2942 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2943 					return B_NAME_TOO_LONG;
2944 				}
2945 			}
2946 
2947 			return B_OK;
2948 		}
2949 
2950 		// read link
2951 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2952 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2953 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2954 			if (error != B_OK)
2955 				return error;
2956 			if (bufferSize < B_PATH_NAME_LENGTH)
2957 				path[bufferSize] = '\0';
2958 		} else
2959 			return B_BAD_VALUE;
2960 	}
2961 
2962 	return B_LINK_LIMIT;
2963 }
2964 
2965 
2966 static status_t
2967 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2968 	struct io_context* ioContext)
2969 {
2970 	// Make sure the IO context root is not bypassed.
2971 	if (parent == ioContext->root) {
2972 		*_device = parent->device;
2973 		*_node = parent->id;
2974 		return B_OK;
2975 	}
2976 
2977 	inc_vnode_ref_count(parent);
2978 		// vnode_path_to_vnode() puts the node
2979 
2980 	// ".." is guaranteed not to be clobbered by this call
2981 	struct vnode* vnode;
2982 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2983 		ioContext, &vnode, NULL);
2984 	if (status == B_OK) {
2985 		*_device = vnode->device;
2986 		*_node = vnode->id;
2987 		put_vnode(vnode);
2988 	}
2989 
2990 	return status;
2991 }
2992 
2993 
2994 #ifdef ADD_DEBUGGER_COMMANDS
2995 
2996 
2997 static void
2998 _dump_advisory_locking(advisory_locking* locking)
2999 {
3000 	if (locking == NULL)
3001 		return;
3002 
3003 	kprintf("   lock:        %" B_PRId32, locking->lock);
3004 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
3005 
3006 	int32 index = 0;
3007 	LockList::Iterator iterator = locking->locks.GetIterator();
3008 	while (iterator.HasNext()) {
3009 		struct advisory_lock* lock = iterator.Next();
3010 
3011 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3012 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3013 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3014 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3015 	}
3016 }
3017 
3018 
3019 static void
3020 _dump_mount(struct fs_mount* mount)
3021 {
3022 	kprintf("MOUNT: %p\n", mount);
3023 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3024 	kprintf(" device_name:   %s\n", mount->device_name);
3025 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3026 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3027 	kprintf(" partition:     %p\n", mount->partition);
3028 	kprintf(" lock:          %p\n", &mount->lock);
3029 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3030 		mount->owns_file_device ? " owns_file_device" : "");
3031 
3032 	fs_volume* volume = mount->volume;
3033 	while (volume != NULL) {
3034 		kprintf(" volume %p:\n", volume);
3035 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3036 		kprintf("  private_volume:   %p\n", volume->private_volume);
3037 		kprintf("  ops:              %p\n", volume->ops);
3038 		kprintf("  file_system:      %p\n", volume->file_system);
3039 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3040 		volume = volume->super_volume;
3041 	}
3042 
3043 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3044 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3045 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3046 	set_debug_variable("_partition", (addr_t)mount->partition);
3047 }
3048 
3049 
3050 static bool
3051 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3052 	const char* name)
3053 {
3054 	bool insertSlash = buffer[bufferSize] != '\0';
3055 	size_t nameLength = strlen(name);
3056 
3057 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3058 		return false;
3059 
3060 	if (insertSlash)
3061 		buffer[--bufferSize] = '/';
3062 
3063 	bufferSize -= nameLength;
3064 	memcpy(buffer + bufferSize, name, nameLength);
3065 
3066 	return true;
3067 }
3068 
3069 
3070 static bool
3071 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3072 	ino_t nodeID)
3073 {
3074 	if (bufferSize == 0)
3075 		return false;
3076 
3077 	bool insertSlash = buffer[bufferSize] != '\0';
3078 	if (insertSlash)
3079 		buffer[--bufferSize] = '/';
3080 
3081 	size_t size = snprintf(buffer, bufferSize,
3082 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3083 	if (size > bufferSize) {
3084 		if (insertSlash)
3085 			bufferSize++;
3086 		return false;
3087 	}
3088 
3089 	if (size < bufferSize)
3090 		memmove(buffer + bufferSize - size, buffer, size);
3091 
3092 	bufferSize -= size;
3093 	return true;
3094 }
3095 
3096 
3097 static char*
3098 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3099 	bool& _truncated)
3100 {
3101 	// null-terminate the path
3102 	buffer[--bufferSize] = '\0';
3103 
3104 	while (true) {
3105 		while (vnode->covers != NULL)
3106 			vnode = vnode->covers;
3107 
3108 		if (vnode == sRoot) {
3109 			_truncated = bufferSize == 0;
3110 			if (!_truncated)
3111 				buffer[--bufferSize] = '/';
3112 			return buffer + bufferSize;
3113 		}
3114 
3115 		// resolve the name
3116 		ino_t dirID;
3117 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3118 			vnode->id, dirID);
3119 		if (name == NULL) {
3120 			// Failed to resolve the name -- prepend "<dev,node>/".
3121 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3122 				vnode->mount->id, vnode->id);
3123 			return buffer + bufferSize;
3124 		}
3125 
3126 		// prepend the name
3127 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3128 			_truncated = true;
3129 			return buffer + bufferSize;
3130 		}
3131 
3132 		// resolve the directory node
3133 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3134 		if (nextVnode == NULL) {
3135 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3136 				vnode->mount->id, dirID);
3137 			return buffer + bufferSize;
3138 		}
3139 
3140 		vnode = nextVnode;
3141 	}
3142 }
3143 
3144 
3145 static void
3146 _dump_vnode(struct vnode* vnode, bool printPath)
3147 {
3148 	kprintf("VNODE: %p\n", vnode);
3149 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3150 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3151 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3152 	kprintf(" private_node:  %p\n", vnode->private_node);
3153 	kprintf(" mount:         %p\n", vnode->mount);
3154 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3155 	kprintf(" covers:        %p\n", vnode->covers);
3156 	kprintf(" cache:         %p\n", vnode->cache);
3157 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3158 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3159 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3160 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3161 
3162 	_dump_advisory_locking(vnode->advisory_locking);
3163 
3164 	if (printPath) {
3165 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3166 		if (buffer != NULL) {
3167 			bool truncated;
3168 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3169 				B_PATH_NAME_LENGTH, truncated);
3170 			if (path != NULL) {
3171 				kprintf(" path:          ");
3172 				if (truncated)
3173 					kputs("<truncated>/");
3174 				kputs(path);
3175 				kputs("\n");
3176 			} else
3177 				kprintf("Failed to resolve vnode path.\n");
3178 
3179 			debug_free(buffer);
3180 		} else
3181 			kprintf("Failed to allocate memory for constructing the path.\n");
3182 	}
3183 
3184 	set_debug_variable("_node", (addr_t)vnode->private_node);
3185 	set_debug_variable("_mount", (addr_t)vnode->mount);
3186 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3187 	set_debug_variable("_covers", (addr_t)vnode->covers);
3188 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3189 }
3190 
3191 
3192 static int
3193 dump_mount(int argc, char** argv)
3194 {
3195 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3196 		kprintf("usage: %s [id|address]\n", argv[0]);
3197 		return 0;
3198 	}
3199 
3200 	ulong val = parse_expression(argv[1]);
3201 	uint32 id = val;
3202 
3203 	struct fs_mount* mount = sMountsTable->Lookup(id);
3204 	if (mount == NULL) {
3205 		if (IS_USER_ADDRESS(id)) {
3206 			kprintf("fs_mount not found\n");
3207 			return 0;
3208 		}
3209 		mount = (fs_mount*)val;
3210 	}
3211 
3212 	_dump_mount(mount);
3213 	return 0;
3214 }
3215 
3216 
3217 static int
3218 dump_mounts(int argc, char** argv)
3219 {
3220 	if (argc != 1) {
3221 		kprintf("usage: %s\n", argv[0]);
3222 		return 0;
3223 	}
3224 
3225 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3226 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3227 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3228 
3229 	struct fs_mount* mount;
3230 
3231 	MountTable::Iterator iterator(sMountsTable);
3232 	while (iterator.HasNext()) {
3233 		mount = iterator.Next();
3234 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3235 			mount->root_vnode->covers, mount->volume->private_volume,
3236 			mount->volume->file_system_name);
3237 
3238 		fs_volume* volume = mount->volume;
3239 		while (volume->super_volume != NULL) {
3240 			volume = volume->super_volume;
3241 			kprintf("                                     %p %s\n",
3242 				volume->private_volume, volume->file_system_name);
3243 		}
3244 	}
3245 
3246 	return 0;
3247 }
3248 
3249 
3250 static int
3251 dump_vnode(int argc, char** argv)
3252 {
3253 	bool printPath = false;
3254 	int argi = 1;
3255 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3256 		printPath = true;
3257 		argi++;
3258 	}
3259 
3260 	if (argi >= argc || argi + 2 < argc) {
3261 		print_debugger_command_usage(argv[0]);
3262 		return 0;
3263 	}
3264 
3265 	struct vnode* vnode = NULL;
3266 
3267 	if (argi + 1 == argc) {
3268 		vnode = (struct vnode*)parse_expression(argv[argi]);
3269 		if (IS_USER_ADDRESS(vnode)) {
3270 			kprintf("invalid vnode address\n");
3271 			return 0;
3272 		}
3273 		_dump_vnode(vnode, printPath);
3274 		return 0;
3275 	}
3276 
3277 	dev_t device = parse_expression(argv[argi]);
3278 	ino_t id = parse_expression(argv[argi + 1]);
3279 
3280 	VnodeTable::Iterator iterator(sVnodeTable);
3281 	while (iterator.HasNext()) {
3282 		vnode = iterator.Next();
3283 		if (vnode->id != id || vnode->device != device)
3284 			continue;
3285 
3286 		_dump_vnode(vnode, printPath);
3287 	}
3288 
3289 	return 0;
3290 }
3291 
3292 
3293 static int
3294 dump_vnodes(int argc, char** argv)
3295 {
3296 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3297 		kprintf("usage: %s [device]\n", argv[0]);
3298 		return 0;
3299 	}
3300 
3301 	// restrict dumped nodes to a certain device if requested
3302 	dev_t device = parse_expression(argv[1]);
3303 
3304 	struct vnode* vnode;
3305 
3306 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3307 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3308 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3309 
3310 	VnodeTable::Iterator iterator(sVnodeTable);
3311 	while (iterator.HasNext()) {
3312 		vnode = iterator.Next();
3313 		if (vnode->device != device)
3314 			continue;
3315 
3316 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3317 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3318 			vnode->private_node, vnode->advisory_locking,
3319 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3320 			vnode->IsUnpublished() ? "u" : "-");
3321 	}
3322 
3323 	return 0;
3324 }
3325 
3326 
3327 static int
3328 dump_vnode_caches(int argc, char** argv)
3329 {
3330 	struct vnode* vnode;
3331 
3332 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3333 		kprintf("usage: %s [device]\n", argv[0]);
3334 		return 0;
3335 	}
3336 
3337 	// restrict dumped nodes to a certain device if requested
3338 	dev_t device = -1;
3339 	if (argc > 1)
3340 		device = parse_expression(argv[1]);
3341 
3342 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3343 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3344 
3345 	VnodeTable::Iterator iterator(sVnodeTable);
3346 	while (iterator.HasNext()) {
3347 		vnode = iterator.Next();
3348 		if (vnode->cache == NULL)
3349 			continue;
3350 		if (device != -1 && vnode->device != device)
3351 			continue;
3352 
3353 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3354 			vnode, vnode->device, vnode->id, vnode->cache,
3355 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3356 			vnode->cache->page_count);
3357 	}
3358 
3359 	return 0;
3360 }
3361 
3362 
3363 int
3364 dump_io_context(int argc, char** argv)
3365 {
3366 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3367 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3368 		return 0;
3369 	}
3370 
3371 	struct io_context* context = NULL;
3372 
3373 	if (argc > 1) {
3374 		ulong num = parse_expression(argv[1]);
3375 		if (IS_KERNEL_ADDRESS(num))
3376 			context = (struct io_context*)num;
3377 		else {
3378 			Team* team = team_get_team_struct_locked(num);
3379 			if (team == NULL) {
3380 				kprintf("could not find team with ID %lu\n", num);
3381 				return 0;
3382 			}
3383 			context = (struct io_context*)team->io_context;
3384 		}
3385 	} else
3386 		context = get_current_io_context(true);
3387 
3388 	kprintf("I/O CONTEXT: %p\n", context);
3389 	kprintf(" root vnode:\t%p\n", context->root);
3390 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3391 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3392 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3393 
3394 	if (context->num_used_fds) {
3395 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3396 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3397 	}
3398 
3399 	for (uint32 i = 0; i < context->table_size; i++) {
3400 		struct file_descriptor* fd = context->fds[i];
3401 		if (fd == NULL)
3402 			continue;
3403 
3404 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3405 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3406 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3407 			fd->pos, fd->cookie,
3408 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3409 				? "mount" : "vnode",
3410 			fd->u.vnode);
3411 	}
3412 
3413 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3414 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3415 
3416 	set_debug_variable("_cwd", (addr_t)context->cwd);
3417 
3418 	return 0;
3419 }
3420 
3421 
3422 int
3423 dump_vnode_usage(int argc, char** argv)
3424 {
3425 	if (argc != 1) {
3426 		kprintf("usage: %s\n", argv[0]);
3427 		return 0;
3428 	}
3429 
3430 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3431 		sUnusedVnodes, kMaxUnusedVnodes);
3432 
3433 	uint32 count = sVnodeTable->CountElements();
3434 
3435 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3436 		count - sUnusedVnodes);
3437 	return 0;
3438 }
3439 
3440 #endif	// ADD_DEBUGGER_COMMANDS
3441 
3442 
3443 /*!	Clears memory specified by an iovec array.
3444 */
3445 static void
3446 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3447 {
3448 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3449 		size_t length = std::min(vecs[i].iov_len, bytes);
3450 		memset(vecs[i].iov_base, 0, length);
3451 		bytes -= length;
3452 	}
3453 }
3454 
3455 
3456 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3457 	and calls the file system hooks to read/write the request to disk.
3458 */
3459 static status_t
3460 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3461 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3462 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3463 	bool doWrite)
3464 {
3465 	if (fileVecCount == 0) {
3466 		// There are no file vecs at this offset, so we're obviously trying
3467 		// to access the file outside of its bounds
3468 		return B_BAD_VALUE;
3469 	}
3470 
3471 	size_t numBytes = *_numBytes;
3472 	uint32 fileVecIndex;
3473 	size_t vecOffset = *_vecOffset;
3474 	uint32 vecIndex = *_vecIndex;
3475 	status_t status;
3476 	size_t size;
3477 
3478 	if (!doWrite && vecOffset == 0) {
3479 		// now directly read the data from the device
3480 		// the first file_io_vec can be read directly
3481 
3482 		if (fileVecs[0].length < (off_t)numBytes)
3483 			size = fileVecs[0].length;
3484 		else
3485 			size = numBytes;
3486 
3487 		if (fileVecs[0].offset >= 0) {
3488 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3489 				&vecs[vecIndex], vecCount - vecIndex, &size);
3490 		} else {
3491 			// sparse read
3492 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3493 			status = B_OK;
3494 		}
3495 		if (status != B_OK)
3496 			return status;
3497 
3498 		// TODO: this is a work-around for buggy device drivers!
3499 		//	When our own drivers honour the length, we can:
3500 		//	a) also use this direct I/O for writes (otherwise, it would
3501 		//	   overwrite precious data)
3502 		//	b) panic if the term below is true (at least for writes)
3503 		if ((off_t)size > fileVecs[0].length) {
3504 			//dprintf("warning: device driver %p doesn't respect total length "
3505 			//	"in read_pages() call!\n", ref->device);
3506 			size = fileVecs[0].length;
3507 		}
3508 
3509 		ASSERT((off_t)size <= fileVecs[0].length);
3510 
3511 		// If the file portion was contiguous, we're already done now
3512 		if (size == numBytes)
3513 			return B_OK;
3514 
3515 		// if we reached the end of the file, we can return as well
3516 		if ((off_t)size != fileVecs[0].length) {
3517 			*_numBytes = size;
3518 			return B_OK;
3519 		}
3520 
3521 		fileVecIndex = 1;
3522 
3523 		// first, find out where we have to continue in our iovecs
3524 		for (; vecIndex < vecCount; vecIndex++) {
3525 			if (size < vecs[vecIndex].iov_len)
3526 				break;
3527 
3528 			size -= vecs[vecIndex].iov_len;
3529 		}
3530 
3531 		vecOffset = size;
3532 	} else {
3533 		fileVecIndex = 0;
3534 		size = 0;
3535 	}
3536 
3537 	// Too bad, let's process the rest of the file_io_vecs
3538 
3539 	size_t totalSize = size;
3540 	size_t bytesLeft = numBytes - size;
3541 
3542 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3543 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3544 		off_t fileOffset = fileVec.offset;
3545 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3546 
3547 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3548 			fileLeft));
3549 
3550 		// process the complete fileVec
3551 		while (fileLeft > 0) {
3552 			iovec tempVecs[MAX_TEMP_IO_VECS];
3553 			uint32 tempCount = 0;
3554 
3555 			// size tracks how much of what is left of the current fileVec
3556 			// (fileLeft) has been assigned to tempVecs
3557 			size = 0;
3558 
3559 			// assign what is left of the current fileVec to the tempVecs
3560 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3561 					&& tempCount < MAX_TEMP_IO_VECS;) {
3562 				// try to satisfy one iovec per iteration (or as much as
3563 				// possible)
3564 
3565 				// bytes left of the current iovec
3566 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3567 				if (vecLeft == 0) {
3568 					vecOffset = 0;
3569 					vecIndex++;
3570 					continue;
3571 				}
3572 
3573 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3574 					vecIndex, vecOffset, size));
3575 
3576 				// actually available bytes
3577 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3578 
3579 				tempVecs[tempCount].iov_base
3580 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3581 				tempVecs[tempCount].iov_len = tempVecSize;
3582 				tempCount++;
3583 
3584 				size += tempVecSize;
3585 				vecOffset += tempVecSize;
3586 			}
3587 
3588 			size_t bytes = size;
3589 
3590 			if (fileOffset == -1) {
3591 				if (doWrite) {
3592 					panic("sparse write attempt: vnode %p", vnode);
3593 					status = B_IO_ERROR;
3594 				} else {
3595 					// sparse read
3596 					zero_iovecs(tempVecs, tempCount, bytes);
3597 					status = B_OK;
3598 				}
3599 			} else if (doWrite) {
3600 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3601 					tempVecs, tempCount, &bytes);
3602 			} else {
3603 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3604 					tempVecs, tempCount, &bytes);
3605 			}
3606 			if (status != B_OK)
3607 				return status;
3608 
3609 			totalSize += bytes;
3610 			bytesLeft -= size;
3611 			if (fileOffset >= 0)
3612 				fileOffset += size;
3613 			fileLeft -= size;
3614 			//dprintf("-> file left = %Lu\n", fileLeft);
3615 
3616 			if (size != bytes || vecIndex >= vecCount) {
3617 				// there are no more bytes or iovecs, let's bail out
3618 				*_numBytes = totalSize;
3619 				return B_OK;
3620 			}
3621 		}
3622 	}
3623 
3624 	*_vecIndex = vecIndex;
3625 	*_vecOffset = vecOffset;
3626 	*_numBytes = totalSize;
3627 	return B_OK;
3628 }
3629 
3630 
3631 static bool
3632 is_user_in_group(gid_t gid)
3633 {
3634 	if (gid == getegid())
3635 		return true;
3636 
3637 	gid_t groups[NGROUPS_MAX];
3638 	int groupCount = getgroups(NGROUPS_MAX, groups);
3639 	for (int i = 0; i < groupCount; i++) {
3640 		if (gid == groups[i])
3641 			return true;
3642 	}
3643 
3644 	return false;
3645 }
3646 
3647 
3648 static status_t
3649 free_io_context(io_context* context)
3650 {
3651 	uint32 i;
3652 
3653 	TIOC(FreeIOContext(context));
3654 
3655 	if (context->root)
3656 		put_vnode(context->root);
3657 
3658 	if (context->cwd)
3659 		put_vnode(context->cwd);
3660 
3661 	mutex_lock(&context->io_mutex);
3662 
3663 	for (i = 0; i < context->table_size; i++) {
3664 		if (struct file_descriptor* descriptor = context->fds[i]) {
3665 			close_fd(context, descriptor);
3666 			put_fd(descriptor);
3667 		}
3668 	}
3669 
3670 	mutex_destroy(&context->io_mutex);
3671 
3672 	remove_node_monitors(context);
3673 	free(context->fds);
3674 	free(context);
3675 
3676 	return B_OK;
3677 }
3678 
3679 
3680 static status_t
3681 resize_monitor_table(struct io_context* context, const int newSize)
3682 {
3683 	int	status = B_OK;
3684 
3685 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3686 		return B_BAD_VALUE;
3687 
3688 	mutex_lock(&context->io_mutex);
3689 
3690 	if ((size_t)newSize < context->num_monitors) {
3691 		status = B_BUSY;
3692 		goto out;
3693 	}
3694 	context->max_monitors = newSize;
3695 
3696 out:
3697 	mutex_unlock(&context->io_mutex);
3698 	return status;
3699 }
3700 
3701 
3702 //	#pragma mark - public API for file systems
3703 
3704 
3705 extern "C" status_t
3706 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3707 	fs_vnode_ops* ops)
3708 {
3709 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3710 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3711 
3712 	if (privateNode == NULL)
3713 		return B_BAD_VALUE;
3714 
3715 	int32 tries = BUSY_VNODE_RETRIES;
3716 restart:
3717 	// create the node
3718 	bool nodeCreated;
3719 	struct vnode* vnode;
3720 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3721 		nodeCreated);
3722 	if (status != B_OK)
3723 		return status;
3724 
3725 	WriteLocker nodeLocker(sVnodeLock, true);
3726 		// create_new_vnode_and_lock() has locked for us
3727 
3728 	if (!nodeCreated && vnode->IsBusy()) {
3729 		nodeLocker.Unlock();
3730 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3731 			return B_BUSY;
3732 		goto restart;
3733 	}
3734 
3735 	// file system integrity check:
3736 	// test if the vnode already exists and bail out if this is the case!
3737 	if (!nodeCreated) {
3738 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3739 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3740 			vnode->private_node);
3741 		return B_ERROR;
3742 	}
3743 
3744 	vnode->private_node = privateNode;
3745 	vnode->ops = ops;
3746 	vnode->SetUnpublished(true);
3747 
3748 	TRACE(("returns: %s\n", strerror(status)));
3749 
3750 	return status;
3751 }
3752 
3753 
3754 extern "C" status_t
3755 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3756 	fs_vnode_ops* ops, int type, uint32 flags)
3757 {
3758 	FUNCTION(("publish_vnode()\n"));
3759 
3760 	int32 tries = BUSY_VNODE_RETRIES;
3761 restart:
3762 	WriteLocker locker(sVnodeLock);
3763 
3764 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3765 
3766 	bool nodeCreated = false;
3767 	if (vnode == NULL) {
3768 		if (privateNode == NULL)
3769 			return B_BAD_VALUE;
3770 
3771 		// create the node
3772 		locker.Unlock();
3773 			// create_new_vnode_and_lock() will re-lock for us on success
3774 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3775 			nodeCreated);
3776 		if (status != B_OK)
3777 			return status;
3778 
3779 		locker.SetTo(sVnodeLock, true);
3780 	}
3781 
3782 	if (nodeCreated) {
3783 		vnode->private_node = privateNode;
3784 		vnode->ops = ops;
3785 		vnode->SetUnpublished(true);
3786 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3787 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3788 		// already known, but not published
3789 	} else if (vnode->IsBusy()) {
3790 		locker.Unlock();
3791 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3792 			return B_BUSY;
3793 		goto restart;
3794 	} else
3795 		return B_BAD_VALUE;
3796 
3797 	bool publishSpecialSubNode = false;
3798 
3799 	vnode->SetType(type);
3800 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3801 	publishSpecialSubNode = is_special_node_type(type)
3802 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3803 
3804 	status_t status = B_OK;
3805 
3806 	// create sub vnodes, if necessary
3807 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3808 		locker.Unlock();
3809 
3810 		fs_volume* subVolume = volume;
3811 		if (volume->sub_volume != NULL) {
3812 			while (status == B_OK && subVolume->sub_volume != NULL) {
3813 				subVolume = subVolume->sub_volume;
3814 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3815 					vnode);
3816 			}
3817 		}
3818 
3819 		if (status == B_OK && publishSpecialSubNode)
3820 			status = create_special_sub_node(vnode, flags);
3821 
3822 		if (status != B_OK) {
3823 			// error -- clean up the created sub vnodes
3824 			while (subVolume->super_volume != volume) {
3825 				subVolume = subVolume->super_volume;
3826 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3827 			}
3828 		}
3829 
3830 		if (status == B_OK) {
3831 			ReadLocker vnodesReadLocker(sVnodeLock);
3832 			AutoLocker<Vnode> nodeLocker(vnode);
3833 			vnode->SetBusy(false);
3834 			vnode->SetUnpublished(false);
3835 		} else {
3836 			locker.Lock();
3837 			sVnodeTable->Remove(vnode);
3838 			remove_vnode_from_mount_list(vnode, vnode->mount);
3839 			free(vnode);
3840 		}
3841 	} else {
3842 		// we still hold the write lock -- mark the node unbusy and published
3843 		vnode->SetBusy(false);
3844 		vnode->SetUnpublished(false);
3845 	}
3846 
3847 	TRACE(("returns: %s\n", strerror(status)));
3848 
3849 	return status;
3850 }
3851 
3852 
3853 extern "C" status_t
3854 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3855 {
3856 	struct vnode* vnode;
3857 
3858 	if (volume == NULL)
3859 		return B_BAD_VALUE;
3860 
3861 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3862 	if (status != B_OK)
3863 		return status;
3864 
3865 	// If this is a layered FS, we need to get the node cookie for the requested
3866 	// layer.
3867 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3868 		fs_vnode resolvedNode;
3869 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3870 			&resolvedNode);
3871 		if (status != B_OK) {
3872 			panic("get_vnode(): Failed to get super node for vnode %p, "
3873 				"volume: %p", vnode, volume);
3874 			put_vnode(vnode);
3875 			return status;
3876 		}
3877 
3878 		if (_privateNode != NULL)
3879 			*_privateNode = resolvedNode.private_node;
3880 	} else if (_privateNode != NULL)
3881 		*_privateNode = vnode->private_node;
3882 
3883 	return B_OK;
3884 }
3885 
3886 
3887 extern "C" status_t
3888 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3889 {
3890 	struct vnode* vnode;
3891 
3892 	rw_lock_read_lock(&sVnodeLock);
3893 	vnode = lookup_vnode(volume->id, vnodeID);
3894 	rw_lock_read_unlock(&sVnodeLock);
3895 
3896 	if (vnode == NULL)
3897 		return B_BAD_VALUE;
3898 
3899 	inc_vnode_ref_count(vnode);
3900 	return B_OK;
3901 }
3902 
3903 
3904 extern "C" status_t
3905 put_vnode(fs_volume* volume, ino_t vnodeID)
3906 {
3907 	struct vnode* vnode;
3908 
3909 	rw_lock_read_lock(&sVnodeLock);
3910 	vnode = lookup_vnode(volume->id, vnodeID);
3911 	rw_lock_read_unlock(&sVnodeLock);
3912 
3913 	if (vnode == NULL)
3914 		return B_BAD_VALUE;
3915 
3916 	dec_vnode_ref_count(vnode, false, true);
3917 	return B_OK;
3918 }
3919 
3920 
3921 extern "C" status_t
3922 remove_vnode(fs_volume* volume, ino_t vnodeID)
3923 {
3924 	ReadLocker locker(sVnodeLock);
3925 
3926 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3927 	if (vnode == NULL)
3928 		return B_ENTRY_NOT_FOUND;
3929 
3930 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3931 		// this vnode is in use
3932 		return B_BUSY;
3933 	}
3934 
3935 	vnode->Lock();
3936 
3937 	vnode->SetRemoved(true);
3938 	bool removeUnpublished = false;
3939 
3940 	if (vnode->IsUnpublished()) {
3941 		// prepare the vnode for deletion
3942 		removeUnpublished = true;
3943 		vnode->SetBusy(true);
3944 	}
3945 
3946 	vnode->Unlock();
3947 	locker.Unlock();
3948 
3949 	if (removeUnpublished) {
3950 		// If the vnode hasn't been published yet, we delete it here
3951 		atomic_add(&vnode->ref_count, -1);
3952 		free_vnode(vnode, true);
3953 	}
3954 
3955 	return B_OK;
3956 }
3957 
3958 
3959 extern "C" status_t
3960 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3961 {
3962 	struct vnode* vnode;
3963 
3964 	rw_lock_read_lock(&sVnodeLock);
3965 
3966 	vnode = lookup_vnode(volume->id, vnodeID);
3967 	if (vnode) {
3968 		AutoLocker<Vnode> nodeLocker(vnode);
3969 		vnode->SetRemoved(false);
3970 	}
3971 
3972 	rw_lock_read_unlock(&sVnodeLock);
3973 	return B_OK;
3974 }
3975 
3976 
3977 extern "C" status_t
3978 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3979 {
3980 	ReadLocker _(sVnodeLock);
3981 
3982 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3983 		if (_removed != NULL)
3984 			*_removed = vnode->IsRemoved();
3985 		return B_OK;
3986 	}
3987 
3988 	return B_BAD_VALUE;
3989 }
3990 
3991 
3992 extern "C" status_t
3993 mark_vnode_busy(fs_volume* volume, ino_t vnodeID, bool busy)
3994 {
3995 	ReadLocker locker(sVnodeLock);
3996 
3997 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3998 	if (vnode == NULL)
3999 		return B_ENTRY_NOT_FOUND;
4000 
4001 	// are we trying to mark an already busy node busy again?
4002 	if (busy && vnode->IsBusy())
4003 		return B_BUSY;
4004 
4005 	vnode->Lock();
4006 	vnode->SetBusy(busy);
4007 	vnode->Unlock();
4008 
4009 	return B_OK;
4010 }
4011 
4012 
4013 extern "C" status_t
4014 change_vnode_id(fs_volume* volume, ino_t vnodeID, ino_t newID)
4015 {
4016 	WriteLocker locker(sVnodeLock);
4017 
4018 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
4019 	if (vnode == NULL)
4020 		return B_ENTRY_NOT_FOUND;
4021 
4022 	sVnodeTable->Remove(vnode);
4023 	vnode->id = newID;
4024 	sVnodeTable->Insert(vnode);
4025 
4026 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
4027 		((VMVnodeCache*)vnode->cache)->SetVnodeID(newID);
4028 
4029 	return B_OK;
4030 }
4031 
4032 
4033 extern "C" fs_volume*
4034 volume_for_vnode(fs_vnode* _vnode)
4035 {
4036 	if (_vnode == NULL)
4037 		return NULL;
4038 
4039 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
4040 	return vnode->mount->volume;
4041 }
4042 
4043 
4044 extern "C" status_t
4045 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
4046 	uid_t nodeUserID)
4047 {
4048 	// get node permissions
4049 	int userPermissions = (mode & S_IRWXU) >> 6;
4050 	int groupPermissions = (mode & S_IRWXG) >> 3;
4051 	int otherPermissions = mode & S_IRWXO;
4052 
4053 	// get the node permissions for this uid/gid
4054 	int permissions = 0;
4055 	uid_t uid = geteuid();
4056 
4057 	if (uid == 0) {
4058 		// user is root
4059 		// root has always read/write permission, but at least one of the
4060 		// X bits must be set for execute permission
4061 		permissions = userPermissions | groupPermissions | otherPermissions
4062 			| S_IROTH | S_IWOTH;
4063 		if (S_ISDIR(mode))
4064 			permissions |= S_IXOTH;
4065 	} else if (uid == nodeUserID) {
4066 		// user is node owner
4067 		permissions = userPermissions;
4068 	} else if (is_user_in_group(nodeGroupID)) {
4069 		// user is in owning group
4070 		permissions = groupPermissions;
4071 	} else {
4072 		// user is one of the others
4073 		permissions = otherPermissions;
4074 	}
4075 
4076 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4077 }
4078 
4079 
4080 #if 0
4081 extern "C" status_t
4082 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4083 	size_t* _numBytes)
4084 {
4085 	struct file_descriptor* descriptor;
4086 	struct vnode* vnode;
4087 
4088 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4089 	if (descriptor == NULL)
4090 		return B_FILE_ERROR;
4091 
4092 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4093 		count, 0, _numBytes);
4094 
4095 	put_fd(descriptor);
4096 	return status;
4097 }
4098 
4099 
4100 extern "C" status_t
4101 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4102 	size_t* _numBytes)
4103 {
4104 	struct file_descriptor* descriptor;
4105 	struct vnode* vnode;
4106 
4107 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4108 	if (descriptor == NULL)
4109 		return B_FILE_ERROR;
4110 
4111 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4112 		count, 0, _numBytes);
4113 
4114 	put_fd(descriptor);
4115 	return status;
4116 }
4117 #endif
4118 
4119 
4120 extern "C" status_t
4121 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4122 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4123 	size_t* _bytes)
4124 {
4125 	struct file_descriptor* descriptor;
4126 	struct vnode* vnode;
4127 
4128 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4129 	if (descriptor == NULL)
4130 		return B_FILE_ERROR;
4131 
4132 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4133 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4134 		false);
4135 
4136 	put_fd(descriptor);
4137 	return status;
4138 }
4139 
4140 
4141 extern "C" status_t
4142 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4143 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4144 	size_t* _bytes)
4145 {
4146 	struct file_descriptor* descriptor;
4147 	struct vnode* vnode;
4148 
4149 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4150 	if (descriptor == NULL)
4151 		return B_FILE_ERROR;
4152 
4153 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4154 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4155 		true);
4156 
4157 	put_fd(descriptor);
4158 	return status;
4159 }
4160 
4161 
4162 extern "C" status_t
4163 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4164 {
4165 	// lookup mount -- the caller is required to make sure that the mount
4166 	// won't go away
4167 	ReadLocker locker(sMountLock);
4168 	struct fs_mount* mount = find_mount(mountID);
4169 	if (mount == NULL)
4170 		return B_BAD_VALUE;
4171 	locker.Unlock();
4172 
4173 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4174 }
4175 
4176 
4177 extern "C" status_t
4178 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4179 {
4180 	// lookup mount -- the caller is required to make sure that the mount
4181 	// won't go away
4182 	ReadLocker locker(sMountLock);
4183 	struct fs_mount* mount = find_mount(mountID);
4184 	if (mount == NULL)
4185 		return B_BAD_VALUE;
4186 	locker.Unlock();
4187 
4188 	return mount->entry_cache.Add(dirID, name, -1, true);
4189 }
4190 
4191 
4192 extern "C" status_t
4193 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4194 {
4195 	// lookup mount -- the caller is required to make sure that the mount
4196 	// won't go away
4197 	ReadLocker locker(sMountLock);
4198 	struct fs_mount* mount = find_mount(mountID);
4199 	if (mount == NULL)
4200 		return B_BAD_VALUE;
4201 	locker.Unlock();
4202 
4203 	return mount->entry_cache.Remove(dirID, name);
4204 }
4205 
4206 
4207 //	#pragma mark - private VFS API
4208 //	Functions the VFS exports for other parts of the kernel
4209 
4210 
4211 /*! Acquires another reference to the vnode that has to be released
4212 	by calling vfs_put_vnode().
4213 */
4214 void
4215 vfs_acquire_vnode(struct vnode* vnode)
4216 {
4217 	inc_vnode_ref_count(vnode);
4218 }
4219 
4220 
4221 /*! This is currently called from file_cache_create() only.
4222 	It's probably a temporary solution as long as devfs requires that
4223 	fs_read_pages()/fs_write_pages() are called with the standard
4224 	open cookie and not with a device cookie.
4225 	If that's done differently, remove this call; it has no other
4226 	purpose.
4227 */
4228 extern "C" status_t
4229 vfs_get_cookie_from_fd(int fd, void** _cookie)
4230 {
4231 	struct file_descriptor* descriptor;
4232 
4233 	descriptor = get_fd(get_current_io_context(true), fd);
4234 	if (descriptor == NULL)
4235 		return B_FILE_ERROR;
4236 
4237 	*_cookie = descriptor->cookie;
4238 	return B_OK;
4239 }
4240 
4241 
4242 extern "C" status_t
4243 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4244 {
4245 	*vnode = get_vnode_from_fd(fd, kernel);
4246 
4247 	if (*vnode == NULL)
4248 		return B_FILE_ERROR;
4249 
4250 	return B_NO_ERROR;
4251 }
4252 
4253 
4254 extern "C" status_t
4255 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4256 {
4257 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4258 		path, kernel));
4259 
4260 	KPath pathBuffer;
4261 	if (pathBuffer.InitCheck() != B_OK)
4262 		return B_NO_MEMORY;
4263 
4264 	char* buffer = pathBuffer.LockBuffer();
4265 	strlcpy(buffer, path, pathBuffer.BufferSize());
4266 
4267 	struct vnode* vnode;
4268 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4269 	if (status != B_OK)
4270 		return status;
4271 
4272 	*_vnode = vnode;
4273 	return B_OK;
4274 }
4275 
4276 
4277 extern "C" status_t
4278 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4279 {
4280 	struct vnode* vnode = NULL;
4281 
4282 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4283 	if (status != B_OK)
4284 		return status;
4285 
4286 	*_vnode = vnode;
4287 	return B_OK;
4288 }
4289 
4290 
4291 extern "C" status_t
4292 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4293 	const char* name, struct vnode** _vnode)
4294 {
4295 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4296 }
4297 
4298 
4299 extern "C" void
4300 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4301 {
4302 	*_mountID = vnode->device;
4303 	*_vnodeID = vnode->id;
4304 }
4305 
4306 
4307 /*!
4308 	Helper function abstracting the process of "converting" a given
4309 	vnode-pointer to a fs_vnode-pointer.
4310 	Currently only used in bindfs.
4311 */
4312 extern "C" fs_vnode*
4313 vfs_fsnode_for_vnode(struct vnode* vnode)
4314 {
4315 	return vnode;
4316 }
4317 
4318 
4319 /*!
4320 	Calls fs_open() on the given vnode and returns a new
4321 	file descriptor for it
4322 */
4323 int
4324 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4325 {
4326 	return open_vnode(vnode, openMode, kernel);
4327 }
4328 
4329 
4330 /*!	Looks up a vnode with the given mount and vnode ID.
4331 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4332 	to the node.
4333 	It's currently only be used by file_cache_create().
4334 */
4335 extern "C" status_t
4336 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4337 {
4338 	rw_lock_read_lock(&sVnodeLock);
4339 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4340 	rw_lock_read_unlock(&sVnodeLock);
4341 
4342 	if (vnode == NULL)
4343 		return B_ERROR;
4344 
4345 	*_vnode = vnode;
4346 	return B_OK;
4347 }
4348 
4349 
4350 extern "C" status_t
4351 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4352 	bool traverseLeafLink, bool kernel, void** _node)
4353 {
4354 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4355 		volume, path, kernel));
4356 
4357 	KPath pathBuffer;
4358 	if (pathBuffer.InitCheck() != B_OK)
4359 		return B_NO_MEMORY;
4360 
4361 	fs_mount* mount;
4362 	status_t status = get_mount(volume->id, &mount);
4363 	if (status != B_OK)
4364 		return status;
4365 
4366 	char* buffer = pathBuffer.LockBuffer();
4367 	strlcpy(buffer, path, pathBuffer.BufferSize());
4368 
4369 	struct vnode* vnode = mount->root_vnode;
4370 
4371 	if (buffer[0] == '/')
4372 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4373 	else {
4374 		inc_vnode_ref_count(vnode);
4375 			// vnode_path_to_vnode() releases a reference to the starting vnode
4376 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4377 			kernel, &vnode, NULL);
4378 	}
4379 
4380 	put_mount(mount);
4381 
4382 	if (status != B_OK)
4383 		return status;
4384 
4385 	if (vnode->device != volume->id) {
4386 		// wrong mount ID - must not gain access on foreign file system nodes
4387 		put_vnode(vnode);
4388 		return B_BAD_VALUE;
4389 	}
4390 
4391 	// Use get_vnode() to resolve the cookie for the right layer.
4392 	status = get_vnode(volume, vnode->id, _node);
4393 	put_vnode(vnode);
4394 
4395 	return status;
4396 }
4397 
4398 
4399 status_t
4400 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4401 	struct stat* stat, bool kernel)
4402 {
4403 	status_t status;
4404 
4405 	if (path != NULL) {
4406 		// path given: get the stat of the node referred to by (fd, path)
4407 		KPath pathBuffer(path);
4408 		if (pathBuffer.InitCheck() != B_OK)
4409 			return B_NO_MEMORY;
4410 
4411 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4412 			traverseLeafLink, stat, kernel);
4413 	} else {
4414 		// no path given: get the FD and use the FD operation
4415 		struct file_descriptor* descriptor
4416 			= get_fd(get_current_io_context(kernel), fd);
4417 		if (descriptor == NULL)
4418 			return B_FILE_ERROR;
4419 
4420 		if (descriptor->ops->fd_read_stat)
4421 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4422 		else
4423 			status = B_UNSUPPORTED;
4424 
4425 		put_fd(descriptor);
4426 	}
4427 
4428 	return status;
4429 }
4430 
4431 
4432 /*!	Finds the full path to the file that contains the module \a moduleName,
4433 	puts it into \a pathBuffer, and returns B_OK for success.
4434 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4435 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4436 	\a pathBuffer is clobbered in any case and must not be relied on if this
4437 	functions returns unsuccessfully.
4438 	\a basePath and \a pathBuffer must not point to the same space.
4439 */
4440 status_t
4441 vfs_get_module_path(const char* basePath, const char* moduleName,
4442 	char* pathBuffer, size_t bufferSize)
4443 {
4444 	struct vnode* dir;
4445 	struct vnode* file;
4446 	status_t status;
4447 	size_t length;
4448 	char* path;
4449 
4450 	if (bufferSize == 0
4451 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4452 		return B_BUFFER_OVERFLOW;
4453 
4454 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4455 	if (status != B_OK)
4456 		return status;
4457 
4458 	// the path buffer had been clobbered by the above call
4459 	length = strlcpy(pathBuffer, basePath, bufferSize);
4460 	if (pathBuffer[length - 1] != '/')
4461 		pathBuffer[length++] = '/';
4462 
4463 	path = pathBuffer + length;
4464 	bufferSize -= length;
4465 
4466 	while (moduleName) {
4467 		char* nextPath = strchr(moduleName, '/');
4468 		if (nextPath == NULL)
4469 			length = strlen(moduleName);
4470 		else {
4471 			length = nextPath - moduleName;
4472 			nextPath++;
4473 		}
4474 
4475 		if (length + 1 >= bufferSize) {
4476 			status = B_BUFFER_OVERFLOW;
4477 			goto err;
4478 		}
4479 
4480 		memcpy(path, moduleName, length);
4481 		path[length] = '\0';
4482 		moduleName = nextPath;
4483 
4484 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4485 		if (status != B_OK) {
4486 			// vnode_path_to_vnode() has already released the reference to dir
4487 			return status;
4488 		}
4489 
4490 		if (S_ISDIR(file->Type())) {
4491 			// goto the next directory
4492 			path[length] = '/';
4493 			path[length + 1] = '\0';
4494 			path += length + 1;
4495 			bufferSize -= length + 1;
4496 
4497 			dir = file;
4498 		} else if (S_ISREG(file->Type())) {
4499 			// it's a file so it should be what we've searched for
4500 			put_vnode(file);
4501 
4502 			return B_OK;
4503 		} else {
4504 			TRACE(("vfs_get_module_path(): something is strange here: "
4505 				"0x%08" B_PRIx32 "...\n", file->Type()));
4506 			status = B_ERROR;
4507 			dir = file;
4508 			goto err;
4509 		}
4510 	}
4511 
4512 	// if we got here, the moduleName just pointed to a directory, not to
4513 	// a real module - what should we do in this case?
4514 	status = B_ENTRY_NOT_FOUND;
4515 
4516 err:
4517 	put_vnode(dir);
4518 	return status;
4519 }
4520 
4521 
4522 /*!	\brief Normalizes a given path.
4523 
4524 	The path must refer to an existing or non-existing entry in an existing
4525 	directory, that is chopping off the leaf component the remaining path must
4526 	refer to an existing directory.
4527 
4528 	The returned will be canonical in that it will be absolute, will not
4529 	contain any "." or ".." components or duplicate occurrences of '/'s,
4530 	and none of the directory components will by symbolic links.
4531 
4532 	Any two paths referring to the same entry, will result in the same
4533 	normalized path (well, that is pretty much the definition of `normalized',
4534 	isn't it :-).
4535 
4536 	\param path The path to be normalized.
4537 	\param buffer The buffer into which the normalized path will be written.
4538 		   May be the same one as \a path.
4539 	\param bufferSize The size of \a buffer.
4540 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4541 	\param kernel \c true, if the IO context of the kernel shall be used,
4542 		   otherwise that of the team this thread belongs to. Only relevant,
4543 		   if the path is relative (to get the CWD).
4544 	\return \c B_OK if everything went fine, another error code otherwise.
4545 */
4546 status_t
4547 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4548 	bool traverseLink, bool kernel)
4549 {
4550 	if (!path || !buffer || bufferSize < 1)
4551 		return B_BAD_VALUE;
4552 
4553 	if (path != buffer) {
4554 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4555 			return B_BUFFER_OVERFLOW;
4556 	}
4557 
4558 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4559 }
4560 
4561 
4562 /*!	\brief Gets the parent of the passed in node.
4563 
4564 	Gets the parent of the passed in node, and correctly resolves covered
4565 	nodes.
4566 */
4567 extern "C" status_t
4568 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4569 {
4570 	return resolve_covered_parent(parent, device, node,
4571 		get_current_io_context(true));
4572 }
4573 
4574 
4575 /*!	\brief Creates a special node in the file system.
4576 
4577 	The caller gets a reference to the newly created node (which is passed
4578 	back through \a _createdVnode) and is responsible for releasing it.
4579 
4580 	\param path The path where to create the entry for the node. Can be \c NULL,
4581 		in which case the node is created without an entry in the root FS -- it
4582 		will automatically be deleted when the last reference has been released.
4583 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4584 		the target file system will just create the node with its standard
4585 		operations. Depending on the type of the node a subnode might be created
4586 		automatically, though.
4587 	\param mode The type and permissions for the node to be created.
4588 	\param flags Flags to be passed to the creating FS.
4589 	\param kernel \c true, if called in the kernel context (relevant only if
4590 		\a path is not \c NULL and not absolute).
4591 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4592 		file system creating the node, with the private data pointer and
4593 		operations for the super node. Can be \c NULL.
4594 	\param _createVnode Pointer to pre-allocated storage where to store the
4595 		pointer to the newly created node.
4596 	\return \c B_OK, if everything went fine, another error code otherwise.
4597 */
4598 status_t
4599 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4600 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4601 	struct vnode** _createdVnode)
4602 {
4603 	struct vnode* dirNode;
4604 	char _leaf[B_FILE_NAME_LENGTH];
4605 	char* leaf = NULL;
4606 
4607 	if (path) {
4608 		// We've got a path. Get the dir vnode and the leaf name.
4609 		KPath tmpPathBuffer;
4610 		if (tmpPathBuffer.InitCheck() != B_OK)
4611 			return B_NO_MEMORY;
4612 
4613 		char* tmpPath = tmpPathBuffer.LockBuffer();
4614 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4615 			return B_NAME_TOO_LONG;
4616 
4617 		// get the dir vnode and the leaf name
4618 		leaf = _leaf;
4619 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4620 		if (error != B_OK)
4621 			return error;
4622 	} else {
4623 		// No path. Create the node in the root FS.
4624 		dirNode = sRoot;
4625 		inc_vnode_ref_count(dirNode);
4626 	}
4627 
4628 	VNodePutter _(dirNode);
4629 
4630 	// check support for creating special nodes
4631 	if (!HAS_FS_CALL(dirNode, create_special_node))
4632 		return B_UNSUPPORTED;
4633 
4634 	// create the node
4635 	fs_vnode superVnode;
4636 	ino_t nodeID;
4637 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4638 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4639 	if (status != B_OK)
4640 		return status;
4641 
4642 	// lookup the node
4643 	rw_lock_read_lock(&sVnodeLock);
4644 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4645 	rw_lock_read_unlock(&sVnodeLock);
4646 
4647 	if (*_createdVnode == NULL) {
4648 		panic("vfs_create_special_node(): lookup of node failed");
4649 		return B_ERROR;
4650 	}
4651 
4652 	return B_OK;
4653 }
4654 
4655 
4656 extern "C" void
4657 vfs_put_vnode(struct vnode* vnode)
4658 {
4659 	put_vnode(vnode);
4660 }
4661 
4662 
4663 extern "C" status_t
4664 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4665 {
4666 	// Get current working directory from io context
4667 	struct io_context* context = get_current_io_context(false);
4668 	status_t status = B_OK;
4669 
4670 	mutex_lock(&context->io_mutex);
4671 
4672 	if (context->cwd != NULL) {
4673 		*_mountID = context->cwd->device;
4674 		*_vnodeID = context->cwd->id;
4675 	} else
4676 		status = B_ERROR;
4677 
4678 	mutex_unlock(&context->io_mutex);
4679 	return status;
4680 }
4681 
4682 
4683 status_t
4684 vfs_unmount(dev_t mountID, uint32 flags)
4685 {
4686 	return fs_unmount(NULL, mountID, flags, true);
4687 }
4688 
4689 
4690 extern "C" status_t
4691 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4692 {
4693 	struct vnode* vnode;
4694 
4695 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4696 	if (status != B_OK)
4697 		return status;
4698 
4699 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4700 	put_vnode(vnode);
4701 	return B_OK;
4702 }
4703 
4704 
4705 extern "C" void
4706 vfs_free_unused_vnodes(int32 level)
4707 {
4708 	vnode_low_resource_handler(NULL,
4709 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4710 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4711 		level);
4712 }
4713 
4714 
4715 extern "C" bool
4716 vfs_can_page(struct vnode* vnode, void* cookie)
4717 {
4718 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4719 
4720 	if (HAS_FS_CALL(vnode, can_page))
4721 		return FS_CALL(vnode, can_page, cookie);
4722 	return false;
4723 }
4724 
4725 
4726 extern "C" status_t
4727 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4728 	const generic_io_vec* vecs, size_t count, uint32 flags,
4729 	generic_size_t* _numBytes)
4730 {
4731 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4732 		vecs, pos));
4733 
4734 #if VFS_PAGES_IO_TRACING
4735 	generic_size_t bytesRequested = *_numBytes;
4736 #endif
4737 
4738 	IORequest request;
4739 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4740 	if (status == B_OK) {
4741 		status = vfs_vnode_io(vnode, cookie, &request);
4742 		if (status == B_OK)
4743 			status = request.Wait();
4744 		*_numBytes = request.TransferredBytes();
4745 	}
4746 
4747 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4748 		status, *_numBytes));
4749 
4750 	return status;
4751 }
4752 
4753 
4754 extern "C" status_t
4755 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4756 	const generic_io_vec* vecs, size_t count, uint32 flags,
4757 	generic_size_t* _numBytes)
4758 {
4759 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4760 		vecs, pos));
4761 
4762 #if VFS_PAGES_IO_TRACING
4763 	generic_size_t bytesRequested = *_numBytes;
4764 #endif
4765 
4766 	IORequest request;
4767 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4768 	if (status == B_OK) {
4769 		status = vfs_vnode_io(vnode, cookie, &request);
4770 		if (status == B_OK)
4771 			status = request.Wait();
4772 		*_numBytes = request.TransferredBytes();
4773 	}
4774 
4775 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4776 		status, *_numBytes));
4777 
4778 	return status;
4779 }
4780 
4781 
4782 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4783 	created if \a allocate is \c true.
4784 	In case it's successful, it will also grab a reference to the cache
4785 	it returns.
4786 */
4787 extern "C" status_t
4788 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4789 {
4790 	if (vnode->cache != NULL) {
4791 		vnode->cache->AcquireRef();
4792 		*_cache = vnode->cache;
4793 		return B_OK;
4794 	}
4795 
4796 	rw_lock_read_lock(&sVnodeLock);
4797 	vnode->Lock();
4798 
4799 	status_t status = B_OK;
4800 
4801 	// The cache could have been created in the meantime
4802 	if (vnode->cache == NULL) {
4803 		if (allocate) {
4804 			// TODO: actually the vnode needs to be busy already here, or
4805 			//	else this won't work...
4806 			bool wasBusy = vnode->IsBusy();
4807 			vnode->SetBusy(true);
4808 
4809 			vnode->Unlock();
4810 			rw_lock_read_unlock(&sVnodeLock);
4811 
4812 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4813 
4814 			rw_lock_read_lock(&sVnodeLock);
4815 			vnode->Lock();
4816 			vnode->SetBusy(wasBusy);
4817 		} else
4818 			status = B_BAD_VALUE;
4819 	}
4820 
4821 	vnode->Unlock();
4822 	rw_lock_read_unlock(&sVnodeLock);
4823 
4824 	if (status == B_OK) {
4825 		vnode->cache->AcquireRef();
4826 		*_cache = vnode->cache;
4827 	}
4828 
4829 	return status;
4830 }
4831 
4832 
4833 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4834 	their own.
4835 	In case it's successful, it will also grab a reference to the cache
4836 	it returns.
4837 */
4838 extern "C" status_t
4839 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4840 {
4841 	rw_lock_read_lock(&sVnodeLock);
4842 	vnode->Lock();
4843 
4844 	status_t status = B_OK;
4845 	if (vnode->cache != NULL) {
4846 		status = B_NOT_ALLOWED;
4847 	} else {
4848 		vnode->cache = _cache;
4849 		_cache->AcquireRef();
4850 	}
4851 
4852 	vnode->Unlock();
4853 	rw_lock_read_unlock(&sVnodeLock);
4854 	return status;
4855 }
4856 
4857 
4858 status_t
4859 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4860 	file_io_vec* vecs, size_t* _count)
4861 {
4862 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4863 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4864 
4865 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4866 }
4867 
4868 
4869 status_t
4870 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4871 {
4872 	status_t status = FS_CALL(vnode, read_stat, stat);
4873 
4874 	// fill in the st_dev and st_ino fields
4875 	if (status == B_OK) {
4876 		stat->st_dev = vnode->device;
4877 		stat->st_ino = vnode->id;
4878 		// the rdev field must stay unset for non-special files
4879 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4880 			stat->st_rdev = -1;
4881 	}
4882 
4883 	return status;
4884 }
4885 
4886 
4887 status_t
4888 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4889 {
4890 	struct vnode* vnode;
4891 	status_t status = get_vnode(device, inode, &vnode, true, false);
4892 	if (status != B_OK)
4893 		return status;
4894 
4895 	status = vfs_stat_vnode(vnode, stat);
4896 
4897 	put_vnode(vnode);
4898 	return status;
4899 }
4900 
4901 
4902 status_t
4903 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4904 {
4905 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4906 }
4907 
4908 
4909 status_t
4910 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4911 	bool kernel, char* path, size_t pathLength)
4912 {
4913 	struct vnode* vnode;
4914 	status_t status;
4915 
4916 	// filter invalid leaf names
4917 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4918 		return B_BAD_VALUE;
4919 
4920 	// get the vnode matching the dir's node_ref
4921 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4922 		// special cases "." and "..": we can directly get the vnode of the
4923 		// referenced directory
4924 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4925 		leaf = NULL;
4926 	} else
4927 		status = get_vnode(device, inode, &vnode, true, false);
4928 	if (status != B_OK)
4929 		return status;
4930 
4931 	// get the directory path
4932 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4933 	put_vnode(vnode);
4934 		// we don't need the vnode anymore
4935 	if (status != B_OK)
4936 		return status;
4937 
4938 	// append the leaf name
4939 	if (leaf) {
4940 		// insert a directory separator if this is not the file system root
4941 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4942 				>= pathLength)
4943 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4944 			return B_NAME_TOO_LONG;
4945 		}
4946 	}
4947 
4948 	return B_OK;
4949 }
4950 
4951 
4952 /*!	If the given descriptor locked its vnode, that lock will be released. */
4953 void
4954 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4955 {
4956 	struct vnode* vnode = fd_vnode(descriptor);
4957 
4958 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4959 		vnode->mandatory_locked_by = NULL;
4960 }
4961 
4962 
4963 /*!	Releases any POSIX locks on the file descriptor. */
4964 status_t
4965 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4966 {
4967 	struct vnode* vnode = descriptor->u.vnode;
4968 	if (vnode == NULL)
4969 		return B_OK;
4970 
4971 	if (HAS_FS_CALL(vnode, release_lock))
4972 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4973 
4974 	return release_advisory_lock(vnode, context, NULL, NULL);
4975 }
4976 
4977 
4978 /*!	Closes all file descriptors of the specified I/O context that
4979 	have the O_CLOEXEC flag set.
4980 */
4981 void
4982 vfs_exec_io_context(io_context* context)
4983 {
4984 	uint32 i;
4985 
4986 	for (i = 0; i < context->table_size; i++) {
4987 		mutex_lock(&context->io_mutex);
4988 
4989 		struct file_descriptor* descriptor = context->fds[i];
4990 		bool remove = false;
4991 
4992 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4993 			context->fds[i] = NULL;
4994 			context->num_used_fds--;
4995 
4996 			remove = true;
4997 		}
4998 
4999 		mutex_unlock(&context->io_mutex);
5000 
5001 		if (remove) {
5002 			close_fd(context, descriptor);
5003 			put_fd(descriptor);
5004 		}
5005 	}
5006 }
5007 
5008 
5009 /*! Sets up a new io_control structure, and inherits the properties
5010 	of the parent io_control if it is given.
5011 */
5012 io_context*
5013 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
5014 {
5015 	io_context* context = (io_context*)malloc(sizeof(io_context));
5016 	if (context == NULL)
5017 		return NULL;
5018 
5019 	TIOC(NewIOContext(context, parentContext));
5020 
5021 	memset(context, 0, sizeof(io_context));
5022 	context->ref_count = 1;
5023 
5024 	MutexLocker parentLocker;
5025 
5026 	size_t tableSize;
5027 	if (parentContext != NULL) {
5028 		parentLocker.SetTo(parentContext->io_mutex, false);
5029 		tableSize = parentContext->table_size;
5030 	} else
5031 		tableSize = DEFAULT_FD_TABLE_SIZE;
5032 
5033 	// allocate space for FDs and their close-on-exec flag
5034 	context->fds = (file_descriptor**)malloc(
5035 		sizeof(struct file_descriptor*) * tableSize
5036 		+ sizeof(struct select_sync*) * tableSize
5037 		+ (tableSize + 7) / 8);
5038 	if (context->fds == NULL) {
5039 		free(context);
5040 		return NULL;
5041 	}
5042 
5043 	context->select_infos = (select_info**)(context->fds + tableSize);
5044 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
5045 
5046 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
5047 		+ sizeof(struct select_sync*) * tableSize
5048 		+ (tableSize + 7) / 8);
5049 
5050 	mutex_init(&context->io_mutex, "I/O context");
5051 
5052 	// Copy all parent file descriptors
5053 
5054 	if (parentContext != NULL) {
5055 		size_t i;
5056 
5057 		mutex_lock(&sIOContextRootLock);
5058 		context->root = parentContext->root;
5059 		if (context->root)
5060 			inc_vnode_ref_count(context->root);
5061 		mutex_unlock(&sIOContextRootLock);
5062 
5063 		context->cwd = parentContext->cwd;
5064 		if (context->cwd)
5065 			inc_vnode_ref_count(context->cwd);
5066 
5067 		if (parentContext->inherit_fds) {
5068 			for (i = 0; i < tableSize; i++) {
5069 				struct file_descriptor* descriptor = parentContext->fds[i];
5070 
5071 				if (descriptor != NULL
5072 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
5073 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5074 					if (closeOnExec && purgeCloseOnExec)
5075 						continue;
5076 
5077 					TFD(InheritFD(context, i, descriptor, parentContext));
5078 
5079 					context->fds[i] = descriptor;
5080 					context->num_used_fds++;
5081 					atomic_add(&descriptor->ref_count, 1);
5082 					atomic_add(&descriptor->open_count, 1);
5083 
5084 					if (closeOnExec)
5085 						fd_set_close_on_exec(context, i, true);
5086 				}
5087 			}
5088 		}
5089 
5090 		parentLocker.Unlock();
5091 	} else {
5092 		context->root = sRoot;
5093 		context->cwd = sRoot;
5094 
5095 		if (context->root)
5096 			inc_vnode_ref_count(context->root);
5097 
5098 		if (context->cwd)
5099 			inc_vnode_ref_count(context->cwd);
5100 	}
5101 
5102 	context->table_size = tableSize;
5103 	context->inherit_fds = parentContext != NULL;
5104 
5105 	list_init(&context->node_monitors);
5106 	context->max_monitors = DEFAULT_NODE_MONITORS;
5107 
5108 	return context;
5109 }
5110 
5111 
5112 void
5113 vfs_get_io_context(io_context* context)
5114 {
5115 	atomic_add(&context->ref_count, 1);
5116 }
5117 
5118 
5119 void
5120 vfs_put_io_context(io_context* context)
5121 {
5122 	if (atomic_add(&context->ref_count, -1) == 1)
5123 		free_io_context(context);
5124 }
5125 
5126 
5127 status_t
5128 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5129 {
5130 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5131 		return B_BAD_VALUE;
5132 
5133 	TIOC(ResizeIOContext(context, newSize));
5134 
5135 	MutexLocker _(context->io_mutex);
5136 
5137 	uint32 oldSize = context->table_size;
5138 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5139 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5140 
5141 	// If the tables shrink, make sure none of the fds being dropped are in use.
5142 	if (newSize < oldSize) {
5143 		for (uint32 i = oldSize; i-- > newSize;) {
5144 			if (context->fds[i])
5145 				return B_BUSY;
5146 		}
5147 	}
5148 
5149 	// store pointers to the old tables
5150 	file_descriptor** oldFDs = context->fds;
5151 	select_info** oldSelectInfos = context->select_infos;
5152 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5153 
5154 	// allocate new tables
5155 	file_descriptor** newFDs = (file_descriptor**)malloc(
5156 		sizeof(struct file_descriptor*) * newSize
5157 		+ sizeof(struct select_sync*) * newSize
5158 		+ newCloseOnExitBitmapSize);
5159 	if (newFDs == NULL)
5160 		return B_NO_MEMORY;
5161 
5162 	context->fds = newFDs;
5163 	context->select_infos = (select_info**)(context->fds + newSize);
5164 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5165 	context->table_size = newSize;
5166 
5167 	// copy entries from old tables
5168 	uint32 toCopy = min_c(oldSize, newSize);
5169 
5170 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5171 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5172 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5173 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5174 
5175 	// clear additional entries, if the tables grow
5176 	if (newSize > oldSize) {
5177 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5178 		memset(context->select_infos + oldSize, 0,
5179 			sizeof(void*) * (newSize - oldSize));
5180 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5181 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5182 	}
5183 
5184 	free(oldFDs);
5185 
5186 	return B_OK;
5187 }
5188 
5189 
5190 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5191 
5192 	Given an arbitrary vnode (identified by mount and node ID), the function
5193 	checks, whether the vnode is covered by another vnode. If it is, the
5194 	function returns the mount and node ID of the covering vnode. Otherwise
5195 	it simply returns the supplied mount and node ID.
5196 
5197 	In case of error (e.g. the supplied node could not be found) the variables
5198 	for storing the resolved mount and node ID remain untouched and an error
5199 	code is returned.
5200 
5201 	\param mountID The mount ID of the vnode in question.
5202 	\param nodeID The node ID of the vnode in question.
5203 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5204 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5205 	\return
5206 	- \c B_OK, if everything went fine,
5207 	- another error code, if something went wrong.
5208 */
5209 status_t
5210 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5211 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5212 {
5213 	// get the node
5214 	struct vnode* node;
5215 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5216 	if (error != B_OK)
5217 		return error;
5218 
5219 	// resolve the node
5220 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5221 		put_vnode(node);
5222 		node = coveringNode;
5223 	}
5224 
5225 	// set the return values
5226 	*resolvedMountID = node->device;
5227 	*resolvedNodeID = node->id;
5228 
5229 	put_vnode(node);
5230 
5231 	return B_OK;
5232 }
5233 
5234 
5235 status_t
5236 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5237 	ino_t* _mountPointNodeID)
5238 {
5239 	ReadLocker nodeLocker(sVnodeLock);
5240 	ReadLocker mountLocker(sMountLock);
5241 
5242 	struct fs_mount* mount = find_mount(mountID);
5243 	if (mount == NULL)
5244 		return B_BAD_VALUE;
5245 
5246 	Vnode* mountPoint = mount->covers_vnode;
5247 
5248 	*_mountPointMountID = mountPoint->device;
5249 	*_mountPointNodeID = mountPoint->id;
5250 
5251 	return B_OK;
5252 }
5253 
5254 
5255 status_t
5256 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5257 	ino_t coveredNodeID)
5258 {
5259 	// get the vnodes
5260 	Vnode* vnode;
5261 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5262 	if (error != B_OK)
5263 		return B_BAD_VALUE;
5264 	VNodePutter vnodePutter(vnode);
5265 
5266 	Vnode* coveredVnode;
5267 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5268 		false);
5269 	if (error != B_OK)
5270 		return B_BAD_VALUE;
5271 	VNodePutter coveredVnodePutter(coveredVnode);
5272 
5273 	// establish the covered/covering links
5274 	WriteLocker locker(sVnodeLock);
5275 
5276 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5277 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5278 		return B_BUSY;
5279 	}
5280 
5281 	vnode->covers = coveredVnode;
5282 	vnode->SetCovering(true);
5283 
5284 	coveredVnode->covered_by = vnode;
5285 	coveredVnode->SetCovered(true);
5286 
5287 	// the vnodes do now reference each other
5288 	inc_vnode_ref_count(vnode);
5289 	inc_vnode_ref_count(coveredVnode);
5290 
5291 	return B_OK;
5292 }
5293 
5294 
5295 int
5296 vfs_getrlimit(int resource, struct rlimit* rlp)
5297 {
5298 	if (!rlp)
5299 		return B_BAD_ADDRESS;
5300 
5301 	switch (resource) {
5302 		case RLIMIT_NOFILE:
5303 		{
5304 			struct io_context* context = get_current_io_context(false);
5305 			MutexLocker _(context->io_mutex);
5306 
5307 			rlp->rlim_cur = context->table_size;
5308 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5309 			return 0;
5310 		}
5311 
5312 		case RLIMIT_NOVMON:
5313 		{
5314 			struct io_context* context = get_current_io_context(false);
5315 			MutexLocker _(context->io_mutex);
5316 
5317 			rlp->rlim_cur = context->max_monitors;
5318 			rlp->rlim_max = MAX_NODE_MONITORS;
5319 			return 0;
5320 		}
5321 
5322 		default:
5323 			return B_BAD_VALUE;
5324 	}
5325 }
5326 
5327 
5328 int
5329 vfs_setrlimit(int resource, const struct rlimit* rlp)
5330 {
5331 	if (!rlp)
5332 		return B_BAD_ADDRESS;
5333 
5334 	switch (resource) {
5335 		case RLIMIT_NOFILE:
5336 			/* TODO: check getuid() */
5337 			if (rlp->rlim_max != RLIM_SAVED_MAX
5338 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5339 				return B_NOT_ALLOWED;
5340 
5341 			return vfs_resize_fd_table(get_current_io_context(false),
5342 				rlp->rlim_cur);
5343 
5344 		case RLIMIT_NOVMON:
5345 			/* TODO: check getuid() */
5346 			if (rlp->rlim_max != RLIM_SAVED_MAX
5347 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5348 				return B_NOT_ALLOWED;
5349 
5350 			return resize_monitor_table(get_current_io_context(false),
5351 				rlp->rlim_cur);
5352 
5353 		default:
5354 			return B_BAD_VALUE;
5355 	}
5356 }
5357 
5358 
5359 status_t
5360 vfs_init(kernel_args* args)
5361 {
5362 	vnode::StaticInit();
5363 
5364 	sVnodeTable = new(std::nothrow) VnodeTable();
5365 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5366 		panic("vfs_init: error creating vnode hash table\n");
5367 
5368 	struct vnode dummy_vnode;
5369 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5370 
5371 	struct fs_mount dummyMount;
5372 	sMountsTable = new(std::nothrow) MountTable();
5373 	if (sMountsTable == NULL
5374 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5375 		panic("vfs_init: error creating mounts hash table\n");
5376 
5377 	sPathNameCache = create_object_cache("vfs path names",
5378 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5379 	if (sPathNameCache == NULL)
5380 		panic("vfs_init: error creating path name object_cache\n");
5381 
5382 	sFileDescriptorCache = create_object_cache("vfs fds",
5383 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5384 	if (sFileDescriptorCache == NULL)
5385 		panic("vfs_init: error creating file descriptor object_cache\n");
5386 
5387 	node_monitor_init();
5388 
5389 	sRoot = NULL;
5390 
5391 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5392 
5393 	if (block_cache_init() != B_OK)
5394 		return B_ERROR;
5395 
5396 #ifdef ADD_DEBUGGER_COMMANDS
5397 	// add some debugger commands
5398 	add_debugger_command_etc("vnode", &dump_vnode,
5399 		"Print info about the specified vnode",
5400 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5401 		"Prints information about the vnode specified by address <vnode> or\n"
5402 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5403 		"constructed and printed. It might not be possible to construct a\n"
5404 		"complete path, though.\n",
5405 		0);
5406 	add_debugger_command("vnodes", &dump_vnodes,
5407 		"list all vnodes (from the specified device)");
5408 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5409 		"list all vnode caches");
5410 	add_debugger_command("mount", &dump_mount,
5411 		"info about the specified fs_mount");
5412 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5413 	add_debugger_command("io_context", &dump_io_context,
5414 		"info about the I/O context");
5415 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5416 		"info about vnode usage");
5417 #endif
5418 
5419 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5420 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5421 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5422 		0);
5423 
5424 	fifo_init();
5425 	file_map_init();
5426 
5427 	return file_cache_init();
5428 }
5429 
5430 
5431 //	#pragma mark - fd_ops implementations
5432 
5433 
5434 /*!
5435 	Calls fs_open() on the given vnode and returns a new
5436 	file descriptor for it
5437 */
5438 static int
5439 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5440 {
5441 	void* cookie;
5442 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5443 	if (status != B_OK)
5444 		return status;
5445 
5446 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5447 	if (fd < 0) {
5448 		FS_CALL(vnode, close, cookie);
5449 		FS_CALL(vnode, free_cookie, cookie);
5450 	}
5451 	return fd;
5452 }
5453 
5454 
5455 /*!
5456 	Calls fs_open() on the given vnode and returns a new
5457 	file descriptor for it
5458 */
5459 static int
5460 create_vnode(struct vnode* directory, const char* name, int openMode,
5461 	int perms, bool kernel)
5462 {
5463 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5464 	status_t status = B_ERROR;
5465 	struct vnode* vnode;
5466 	void* cookie;
5467 	ino_t newID;
5468 
5469 	// This is somewhat tricky: If the entry already exists, the FS responsible
5470 	// for the directory might not necessarily also be the one responsible for
5471 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5472 	// we can actually never call the create() hook without O_EXCL. Instead we
5473 	// try to look the entry up first. If it already exists, we just open the
5474 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5475 	// introduces a race condition, since someone else might have created the
5476 	// entry in the meantime. We hope the respective FS returns the correct
5477 	// error code and retry (up to 3 times) again.
5478 
5479 	for (int i = 0; i < 3 && status != B_OK; i++) {
5480 		// look the node up
5481 		status = lookup_dir_entry(directory, name, &vnode);
5482 		if (status == B_OK) {
5483 			VNodePutter putter(vnode);
5484 
5485 			if ((openMode & O_EXCL) != 0)
5486 				return B_FILE_EXISTS;
5487 
5488 			// If the node is a symlink, we have to follow it, unless
5489 			// O_NOTRAVERSE is set.
5490 			if (S_ISLNK(vnode->Type()) && traverse) {
5491 				putter.Put();
5492 				char clonedName[B_FILE_NAME_LENGTH + 1];
5493 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5494 						>= B_FILE_NAME_LENGTH) {
5495 					return B_NAME_TOO_LONG;
5496 				}
5497 
5498 				inc_vnode_ref_count(directory);
5499 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5500 					kernel, &vnode, NULL);
5501 				if (status != B_OK)
5502 					return status;
5503 
5504 				putter.SetTo(vnode);
5505 			}
5506 
5507 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5508 				return B_LINK_LIMIT;
5509 
5510 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5511 			// on success keep the vnode reference for the FD
5512 			if (fd >= 0)
5513 				putter.Detach();
5514 
5515 			return fd;
5516 		}
5517 
5518 		// it doesn't exist yet -- try to create it
5519 
5520 		if (!HAS_FS_CALL(directory, create))
5521 			return B_READ_ONLY_DEVICE;
5522 
5523 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5524 			&cookie, &newID);
5525 		if (status != B_OK
5526 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5527 			return status;
5528 		}
5529 	}
5530 
5531 	if (status != B_OK)
5532 		return status;
5533 
5534 	// the node has been created successfully
5535 
5536 	rw_lock_read_lock(&sVnodeLock);
5537 	vnode = lookup_vnode(directory->device, newID);
5538 	rw_lock_read_unlock(&sVnodeLock);
5539 
5540 	if (vnode == NULL) {
5541 		panic("vfs: fs_create() returned success but there is no vnode, "
5542 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5543 		return B_BAD_VALUE;
5544 	}
5545 
5546 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5547 	if (fd >= 0)
5548 		return fd;
5549 
5550 	status = fd;
5551 
5552 	// something went wrong, clean up
5553 
5554 	FS_CALL(vnode, close, cookie);
5555 	FS_CALL(vnode, free_cookie, cookie);
5556 	put_vnode(vnode);
5557 
5558 	FS_CALL(directory, unlink, name);
5559 
5560 	return status;
5561 }
5562 
5563 
5564 /*! Calls fs open_dir() on the given vnode and returns a new
5565 	file descriptor for it
5566 */
5567 static int
5568 open_dir_vnode(struct vnode* vnode, bool kernel)
5569 {
5570 	void* cookie;
5571 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5572 	if (status != B_OK)
5573 		return status;
5574 
5575 	// directory is opened, create a fd
5576 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5577 	if (status >= 0)
5578 		return status;
5579 
5580 	FS_CALL(vnode, close_dir, cookie);
5581 	FS_CALL(vnode, free_dir_cookie, cookie);
5582 
5583 	return status;
5584 }
5585 
5586 
5587 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5588 	file descriptor for it.
5589 	Used by attr_dir_open(), and attr_dir_open_fd().
5590 */
5591 static int
5592 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5593 {
5594 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5595 		return B_UNSUPPORTED;
5596 
5597 	void* cookie;
5598 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5599 	if (status != B_OK)
5600 		return status;
5601 
5602 	// directory is opened, create a fd
5603 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5604 		kernel);
5605 	if (status >= 0)
5606 		return status;
5607 
5608 	FS_CALL(vnode, close_attr_dir, cookie);
5609 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5610 
5611 	return status;
5612 }
5613 
5614 
5615 static int
5616 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5617 	int openMode, int perms, bool kernel)
5618 {
5619 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5620 		"kernel %d\n", name, openMode, perms, kernel));
5621 
5622 	// get directory to put the new file in
5623 	struct vnode* directory;
5624 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5625 	if (status != B_OK)
5626 		return status;
5627 
5628 	status = create_vnode(directory, name, openMode, perms, kernel);
5629 	put_vnode(directory);
5630 
5631 	return status;
5632 }
5633 
5634 
5635 static int
5636 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5637 {
5638 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5639 		openMode, perms, kernel));
5640 
5641 	// get directory to put the new file in
5642 	char name[B_FILE_NAME_LENGTH];
5643 	struct vnode* directory;
5644 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5645 		kernel);
5646 	if (status < 0)
5647 		return status;
5648 
5649 	status = create_vnode(directory, name, openMode, perms, kernel);
5650 
5651 	put_vnode(directory);
5652 	return status;
5653 }
5654 
5655 
5656 static int
5657 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5658 	int openMode, bool kernel)
5659 {
5660 	if (name == NULL || *name == '\0')
5661 		return B_BAD_VALUE;
5662 
5663 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5664 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5665 
5666 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5667 
5668 	// get the vnode matching the entry_ref
5669 	struct vnode* vnode;
5670 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5671 		kernel, &vnode);
5672 	if (status != B_OK)
5673 		return status;
5674 
5675 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5676 		put_vnode(vnode);
5677 		return B_LINK_LIMIT;
5678 	}
5679 
5680 	int newFD = open_vnode(vnode, openMode, kernel);
5681 	if (newFD >= 0) {
5682 		// The vnode reference has been transferred to the FD
5683 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5684 			directoryID, vnode->id, name);
5685 	} else
5686 		put_vnode(vnode);
5687 
5688 	return newFD;
5689 }
5690 
5691 
5692 static int
5693 file_open(int fd, char* path, int openMode, bool kernel)
5694 {
5695 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5696 
5697 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5698 		fd, path, openMode, kernel));
5699 
5700 	// get the vnode matching the vnode + path combination
5701 	struct vnode* vnode;
5702 	ino_t parentID;
5703 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5704 		&parentID, kernel);
5705 	if (status != B_OK)
5706 		return status;
5707 
5708 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5709 		put_vnode(vnode);
5710 		return B_LINK_LIMIT;
5711 	}
5712 
5713 	// open the vnode
5714 	int newFD = open_vnode(vnode, openMode, kernel);
5715 	if (newFD >= 0) {
5716 		// The vnode reference has been transferred to the FD
5717 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5718 			vnode->device, parentID, vnode->id, NULL);
5719 	} else
5720 		put_vnode(vnode);
5721 
5722 	return newFD;
5723 }
5724 
5725 
5726 static status_t
5727 file_close(struct file_descriptor* descriptor)
5728 {
5729 	struct vnode* vnode = descriptor->u.vnode;
5730 	status_t status = B_OK;
5731 
5732 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5733 
5734 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5735 		vnode->id);
5736 	if (HAS_FS_CALL(vnode, close)) {
5737 		status = FS_CALL(vnode, close, descriptor->cookie);
5738 	}
5739 
5740 	if (status == B_OK) {
5741 		// remove all outstanding locks for this team
5742 		if (HAS_FS_CALL(vnode, release_lock))
5743 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5744 		else
5745 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5746 	}
5747 	return status;
5748 }
5749 
5750 
5751 static void
5752 file_free_fd(struct file_descriptor* descriptor)
5753 {
5754 	struct vnode* vnode = descriptor->u.vnode;
5755 
5756 	if (vnode != NULL) {
5757 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5758 		put_vnode(vnode);
5759 	}
5760 }
5761 
5762 
5763 static status_t
5764 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5765 	size_t* length)
5766 {
5767 	struct vnode* vnode = descriptor->u.vnode;
5768 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5769 		pos, length, *length));
5770 
5771 	if (S_ISDIR(vnode->Type()))
5772 		return B_IS_A_DIRECTORY;
5773 
5774 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5775 }
5776 
5777 
5778 static status_t
5779 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5780 	size_t* length)
5781 {
5782 	struct vnode* vnode = descriptor->u.vnode;
5783 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5784 		length));
5785 
5786 	if (S_ISDIR(vnode->Type()))
5787 		return B_IS_A_DIRECTORY;
5788 	if (!HAS_FS_CALL(vnode, write))
5789 		return B_READ_ONLY_DEVICE;
5790 
5791 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5792 }
5793 
5794 
5795 static off_t
5796 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5797 {
5798 	struct vnode* vnode = descriptor->u.vnode;
5799 	off_t offset;
5800 	bool isDevice = false;
5801 
5802 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5803 		seekType));
5804 
5805 	// some kinds of files are not seekable
5806 	switch (vnode->Type() & S_IFMT) {
5807 		case S_IFIFO:
5808 		case S_IFSOCK:
5809 			return ESPIPE;
5810 
5811 		// drivers publish block devices as chr, so pick both
5812 		case S_IFBLK:
5813 		case S_IFCHR:
5814 			isDevice = true;
5815 			break;
5816 		// The Open Group Base Specs don't mention any file types besides pipes,
5817 		// fifos, and sockets specially, so we allow seeking them.
5818 		case S_IFREG:
5819 		case S_IFDIR:
5820 		case S_IFLNK:
5821 			break;
5822 	}
5823 
5824 	switch (seekType) {
5825 		case SEEK_SET:
5826 			offset = 0;
5827 			break;
5828 		case SEEK_CUR:
5829 			offset = descriptor->pos;
5830 			break;
5831 		case SEEK_END:
5832 		{
5833 			// stat() the node
5834 			if (!HAS_FS_CALL(vnode, read_stat))
5835 				return B_UNSUPPORTED;
5836 
5837 			struct stat stat;
5838 			status_t status = FS_CALL(vnode, read_stat, &stat);
5839 			if (status != B_OK)
5840 				return status;
5841 
5842 			offset = stat.st_size;
5843 
5844 			if (offset == 0 && isDevice) {
5845 				// stat() on regular drivers doesn't report size
5846 				device_geometry geometry;
5847 
5848 				if (HAS_FS_CALL(vnode, ioctl)) {
5849 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5850 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5851 					if (status == B_OK)
5852 						offset = (off_t)geometry.bytes_per_sector
5853 							* geometry.sectors_per_track
5854 							* geometry.cylinder_count
5855 							* geometry.head_count;
5856 				}
5857 			}
5858 
5859 			break;
5860 		}
5861 		default:
5862 			return B_BAD_VALUE;
5863 	}
5864 
5865 	// assumes off_t is 64 bits wide
5866 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5867 		return B_BUFFER_OVERFLOW;
5868 
5869 	pos += offset;
5870 	if (pos < 0)
5871 		return B_BAD_VALUE;
5872 
5873 	return descriptor->pos = pos;
5874 }
5875 
5876 
5877 static status_t
5878 file_select(struct file_descriptor* descriptor, uint8 event,
5879 	struct selectsync* sync)
5880 {
5881 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5882 
5883 	struct vnode* vnode = descriptor->u.vnode;
5884 
5885 	// If the FS has no select() hook, notify select() now.
5886 	if (!HAS_FS_CALL(vnode, select)) {
5887 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5888 			return notify_select_event(sync, event);
5889 		else
5890 			return B_OK;
5891 	}
5892 
5893 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5894 }
5895 
5896 
5897 static status_t
5898 file_deselect(struct file_descriptor* descriptor, uint8 event,
5899 	struct selectsync* sync)
5900 {
5901 	struct vnode* vnode = descriptor->u.vnode;
5902 
5903 	if (!HAS_FS_CALL(vnode, deselect))
5904 		return B_OK;
5905 
5906 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5907 }
5908 
5909 
5910 static status_t
5911 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5912 	bool kernel)
5913 {
5914 	struct vnode* vnode;
5915 	status_t status;
5916 
5917 	if (name == NULL || *name == '\0')
5918 		return B_BAD_VALUE;
5919 
5920 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5921 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5922 
5923 	status = get_vnode(mountID, parentID, &vnode, true, false);
5924 	if (status != B_OK)
5925 		return status;
5926 
5927 	if (HAS_FS_CALL(vnode, create_dir))
5928 		status = FS_CALL(vnode, create_dir, name, perms);
5929 	else
5930 		status = B_READ_ONLY_DEVICE;
5931 
5932 	put_vnode(vnode);
5933 	return status;
5934 }
5935 
5936 
5937 static status_t
5938 dir_create(int fd, char* path, int perms, bool kernel)
5939 {
5940 	char filename[B_FILE_NAME_LENGTH];
5941 	struct vnode* vnode;
5942 	status_t status;
5943 
5944 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5945 		kernel));
5946 
5947 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5948 	if (status < 0)
5949 		return status;
5950 
5951 	if (HAS_FS_CALL(vnode, create_dir)) {
5952 		status = FS_CALL(vnode, create_dir, filename, perms);
5953 	} else
5954 		status = B_READ_ONLY_DEVICE;
5955 
5956 	put_vnode(vnode);
5957 	return status;
5958 }
5959 
5960 
5961 static int
5962 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5963 {
5964 	FUNCTION(("dir_open_entry_ref()\n"));
5965 
5966 	if (name && name[0] == '\0')
5967 		return B_BAD_VALUE;
5968 
5969 	// get the vnode matching the entry_ref/node_ref
5970 	struct vnode* vnode;
5971 	status_t status;
5972 	if (name) {
5973 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5974 			&vnode);
5975 	} else
5976 		status = get_vnode(mountID, parentID, &vnode, true, false);
5977 	if (status != B_OK)
5978 		return status;
5979 
5980 	int newFD = open_dir_vnode(vnode, kernel);
5981 	if (newFD >= 0) {
5982 		// The vnode reference has been transferred to the FD
5983 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5984 			vnode->id, name);
5985 	} else
5986 		put_vnode(vnode);
5987 
5988 	return newFD;
5989 }
5990 
5991 
5992 static int
5993 dir_open(int fd, char* path, bool kernel)
5994 {
5995 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5996 		kernel));
5997 
5998 	// get the vnode matching the vnode + path combination
5999 	struct vnode* vnode = NULL;
6000 	ino_t parentID;
6001 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
6002 		kernel);
6003 	if (status != B_OK)
6004 		return status;
6005 
6006 	// open the dir
6007 	int newFD = open_dir_vnode(vnode, kernel);
6008 	if (newFD >= 0) {
6009 		// The vnode reference has been transferred to the FD
6010 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6011 			parentID, vnode->id, NULL);
6012 	} else
6013 		put_vnode(vnode);
6014 
6015 	return newFD;
6016 }
6017 
6018 
6019 static status_t
6020 dir_close(struct file_descriptor* descriptor)
6021 {
6022 	struct vnode* vnode = descriptor->u.vnode;
6023 
6024 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6025 
6026 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6027 		vnode->id);
6028 	if (HAS_FS_CALL(vnode, close_dir))
6029 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6030 
6031 	return B_OK;
6032 }
6033 
6034 
6035 static void
6036 dir_free_fd(struct file_descriptor* descriptor)
6037 {
6038 	struct vnode* vnode = descriptor->u.vnode;
6039 
6040 	if (vnode != NULL) {
6041 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6042 		put_vnode(vnode);
6043 	}
6044 }
6045 
6046 
6047 static status_t
6048 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6049 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6050 {
6051 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6052 		bufferSize, _count);
6053 }
6054 
6055 
6056 static status_t
6057 fix_dirent(struct vnode* parent, struct dirent* entry,
6058 	struct io_context* ioContext)
6059 {
6060 	// set d_pdev and d_pino
6061 	entry->d_pdev = parent->device;
6062 	entry->d_pino = parent->id;
6063 
6064 	// If this is the ".." entry and the directory covering another vnode,
6065 	// we need to replace d_dev and d_ino with the actual values.
6066 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6067 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6068 			ioContext);
6069 	}
6070 
6071 	// resolve covered vnodes
6072 	ReadLocker _(&sVnodeLock);
6073 
6074 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6075 	if (vnode != NULL && vnode->covered_by != NULL) {
6076 		do {
6077 			vnode = vnode->covered_by;
6078 		} while (vnode->covered_by != NULL);
6079 
6080 		entry->d_dev = vnode->device;
6081 		entry->d_ino = vnode->id;
6082 	}
6083 
6084 	return B_OK;
6085 }
6086 
6087 
6088 static status_t
6089 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6090 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6091 {
6092 	if (!HAS_FS_CALL(vnode, read_dir))
6093 		return B_UNSUPPORTED;
6094 
6095 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6096 		_count);
6097 	if (error != B_OK)
6098 		return error;
6099 
6100 	// we need to adjust the read dirents
6101 	uint32 count = *_count;
6102 	for (uint32 i = 0; i < count; i++) {
6103 		error = fix_dirent(vnode, buffer, ioContext);
6104 		if (error != B_OK)
6105 			return error;
6106 
6107 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6108 	}
6109 
6110 	return error;
6111 }
6112 
6113 
6114 static status_t
6115 dir_rewind(struct file_descriptor* descriptor)
6116 {
6117 	struct vnode* vnode = descriptor->u.vnode;
6118 
6119 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6120 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6121 	}
6122 
6123 	return B_UNSUPPORTED;
6124 }
6125 
6126 
6127 static status_t
6128 dir_remove(int fd, char* path, bool kernel)
6129 {
6130 	char name[B_FILE_NAME_LENGTH];
6131 	struct vnode* directory;
6132 	status_t status;
6133 
6134 	if (path != NULL) {
6135 		// we need to make sure our path name doesn't stop with "/", ".",
6136 		// or ".."
6137 		char* lastSlash;
6138 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6139 			char* leaf = lastSlash + 1;
6140 			if (!strcmp(leaf, ".."))
6141 				return B_NOT_ALLOWED;
6142 
6143 			// omit multiple slashes
6144 			while (lastSlash > path && lastSlash[-1] == '/')
6145 				lastSlash--;
6146 
6147 			if (leaf[0]
6148 				&& strcmp(leaf, ".")) {
6149 				break;
6150 			}
6151 			// "name/" -> "name", or "name/." -> "name"
6152 			lastSlash[0] = '\0';
6153 		}
6154 
6155 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6156 			return B_NOT_ALLOWED;
6157 	}
6158 
6159 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6160 	if (status != B_OK)
6161 		return status;
6162 
6163 	if (HAS_FS_CALL(directory, remove_dir))
6164 		status = FS_CALL(directory, remove_dir, name);
6165 	else
6166 		status = B_READ_ONLY_DEVICE;
6167 
6168 	put_vnode(directory);
6169 	return status;
6170 }
6171 
6172 
6173 static status_t
6174 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6175 	size_t length)
6176 {
6177 	struct vnode* vnode = descriptor->u.vnode;
6178 
6179 	if (HAS_FS_CALL(vnode, ioctl))
6180 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6181 
6182 	return B_DEV_INVALID_IOCTL;
6183 }
6184 
6185 
6186 static status_t
6187 common_fcntl(int fd, int op, size_t argument, bool kernel)
6188 {
6189 	struct flock flock;
6190 
6191 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6192 		fd, op, argument, kernel ? "kernel" : "user"));
6193 
6194 	struct io_context* context = get_current_io_context(kernel);
6195 
6196 	struct file_descriptor* descriptor = get_fd(context, fd);
6197 	if (descriptor == NULL)
6198 		return B_FILE_ERROR;
6199 
6200 	struct vnode* vnode = fd_vnode(descriptor);
6201 
6202 	status_t status = B_OK;
6203 
6204 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6205 		if (descriptor->type != FDTYPE_FILE)
6206 			status = B_BAD_VALUE;
6207 		else if (kernel)
6208 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6209 		else if (user_memcpy(&flock, (struct flock*)argument,
6210 				sizeof(struct flock)) != B_OK)
6211 			status = B_BAD_ADDRESS;
6212 		if (status != B_OK) {
6213 			put_fd(descriptor);
6214 			return status;
6215 		}
6216 	}
6217 
6218 	switch (op) {
6219 		case F_SETFD:
6220 		{
6221 			// Set file descriptor flags
6222 
6223 			// O_CLOEXEC is the only flag available at this time
6224 			mutex_lock(&context->io_mutex);
6225 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6226 			mutex_unlock(&context->io_mutex);
6227 
6228 			status = B_OK;
6229 			break;
6230 		}
6231 
6232 		case F_GETFD:
6233 		{
6234 			// Get file descriptor flags
6235 			mutex_lock(&context->io_mutex);
6236 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6237 			mutex_unlock(&context->io_mutex);
6238 			break;
6239 		}
6240 
6241 		case F_SETFL:
6242 			// Set file descriptor open mode
6243 
6244 			// we only accept changes to O_APPEND and O_NONBLOCK
6245 			argument &= O_APPEND | O_NONBLOCK;
6246 			if (descriptor->ops->fd_set_flags != NULL) {
6247 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6248 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6249 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6250 					(int)argument);
6251 			} else
6252 				status = B_UNSUPPORTED;
6253 
6254 			if (status == B_OK) {
6255 				// update this descriptor's open_mode field
6256 				descriptor->open_mode = (descriptor->open_mode
6257 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6258 			}
6259 
6260 			break;
6261 
6262 		case F_GETFL:
6263 			// Get file descriptor open mode
6264 			status = descriptor->open_mode;
6265 			break;
6266 
6267 		case F_DUPFD:
6268 		case F_DUPFD_CLOEXEC:
6269 		{
6270 			status = new_fd_etc(context, descriptor, (int)argument);
6271 			if (status >= 0) {
6272 				mutex_lock(&context->io_mutex);
6273 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6274 				mutex_unlock(&context->io_mutex);
6275 
6276 				atomic_add(&descriptor->ref_count, 1);
6277 			}
6278 			break;
6279 		}
6280 
6281 		case F_GETLK:
6282 			if (vnode != NULL) {
6283 				struct flock normalizedLock;
6284 
6285 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6286 				status = normalize_flock(descriptor, &normalizedLock);
6287 				if (status != B_OK)
6288 					break;
6289 
6290 				if (HAS_FS_CALL(vnode, test_lock)) {
6291 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6292 						&normalizedLock);
6293 				} else
6294 					status = test_advisory_lock(vnode, &normalizedLock);
6295 				if (status == B_OK) {
6296 					if (normalizedLock.l_type == F_UNLCK) {
6297 						// no conflicting lock found, copy back the same struct
6298 						// we were given except change type to F_UNLCK
6299 						flock.l_type = F_UNLCK;
6300 						if (kernel) {
6301 							memcpy((struct flock*)argument, &flock,
6302 								sizeof(struct flock));
6303 						} else {
6304 							status = user_memcpy((struct flock*)argument,
6305 								&flock, sizeof(struct flock));
6306 						}
6307 					} else {
6308 						// a conflicting lock was found, copy back its range and
6309 						// type
6310 						if (normalizedLock.l_len == OFF_MAX)
6311 							normalizedLock.l_len = 0;
6312 
6313 						if (kernel) {
6314 							memcpy((struct flock*)argument,
6315 								&normalizedLock, sizeof(struct flock));
6316 						} else {
6317 							status = user_memcpy((struct flock*)argument,
6318 								&normalizedLock, sizeof(struct flock));
6319 						}
6320 					}
6321 				}
6322 			} else
6323 				status = B_BAD_VALUE;
6324 			break;
6325 
6326 		case F_SETLK:
6327 		case F_SETLKW:
6328 			status = normalize_flock(descriptor, &flock);
6329 			if (status != B_OK)
6330 				break;
6331 
6332 			if (vnode == NULL) {
6333 				status = B_BAD_VALUE;
6334 			} else if (flock.l_type == F_UNLCK) {
6335 				if (HAS_FS_CALL(vnode, release_lock)) {
6336 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6337 						&flock);
6338 				} else {
6339 					status = release_advisory_lock(vnode, context, NULL,
6340 						&flock);
6341 				}
6342 			} else {
6343 				// the open mode must match the lock type
6344 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6345 						&& flock.l_type == F_WRLCK)
6346 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6347 						&& flock.l_type == F_RDLCK))
6348 					status = B_FILE_ERROR;
6349 				else {
6350 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6351 						status = FS_CALL(vnode, acquire_lock,
6352 							descriptor->cookie, &flock, op == F_SETLKW);
6353 					} else {
6354 						status = acquire_advisory_lock(vnode, context, NULL,
6355 							&flock, op == F_SETLKW);
6356 					}
6357 				}
6358 			}
6359 			break;
6360 
6361 		// ToDo: add support for more ops?
6362 
6363 		default:
6364 			status = B_BAD_VALUE;
6365 	}
6366 
6367 	put_fd(descriptor);
6368 	return status;
6369 }
6370 
6371 
6372 static status_t
6373 common_sync(int fd, bool kernel)
6374 {
6375 	struct file_descriptor* descriptor;
6376 	struct vnode* vnode;
6377 	status_t status;
6378 
6379 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6380 
6381 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6382 	if (descriptor == NULL)
6383 		return B_FILE_ERROR;
6384 
6385 	if (HAS_FS_CALL(vnode, fsync))
6386 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6387 	else
6388 		status = B_UNSUPPORTED;
6389 
6390 	put_fd(descriptor);
6391 	return status;
6392 }
6393 
6394 
6395 static status_t
6396 common_lock_node(int fd, bool kernel)
6397 {
6398 	struct file_descriptor* descriptor;
6399 	struct vnode* vnode;
6400 
6401 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6402 	if (descriptor == NULL)
6403 		return B_FILE_ERROR;
6404 
6405 	status_t status = B_OK;
6406 
6407 	// We need to set the locking atomically - someone
6408 	// else might set one at the same time
6409 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6410 			(file_descriptor*)NULL) != NULL)
6411 		status = B_BUSY;
6412 
6413 	put_fd(descriptor);
6414 	return status;
6415 }
6416 
6417 
6418 static status_t
6419 common_unlock_node(int fd, bool kernel)
6420 {
6421 	struct file_descriptor* descriptor;
6422 	struct vnode* vnode;
6423 
6424 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6425 	if (descriptor == NULL)
6426 		return B_FILE_ERROR;
6427 
6428 	status_t status = B_OK;
6429 
6430 	// We need to set the locking atomically - someone
6431 	// else might set one at the same time
6432 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6433 			(file_descriptor*)NULL, descriptor) != descriptor)
6434 		status = B_BAD_VALUE;
6435 
6436 	put_fd(descriptor);
6437 	return status;
6438 }
6439 
6440 
6441 static status_t
6442 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6443 	bool kernel)
6444 {
6445 	struct vnode* vnode;
6446 	status_t status;
6447 
6448 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6449 	if (status != B_OK)
6450 		return status;
6451 
6452 	if (HAS_FS_CALL(vnode, read_symlink)) {
6453 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6454 	} else
6455 		status = B_BAD_VALUE;
6456 
6457 	put_vnode(vnode);
6458 	return status;
6459 }
6460 
6461 
6462 static status_t
6463 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6464 	bool kernel)
6465 {
6466 	// path validity checks have to be in the calling function!
6467 	char name[B_FILE_NAME_LENGTH];
6468 	struct vnode* vnode;
6469 	status_t status;
6470 
6471 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6472 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6473 
6474 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6475 	if (status != B_OK)
6476 		return status;
6477 
6478 	if (HAS_FS_CALL(vnode, create_symlink))
6479 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6480 	else {
6481 		status = HAS_FS_CALL(vnode, write)
6482 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6483 	}
6484 
6485 	put_vnode(vnode);
6486 
6487 	return status;
6488 }
6489 
6490 
6491 static status_t
6492 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6493 	bool traverseLeafLink, bool kernel)
6494 {
6495 	// path validity checks have to be in the calling function!
6496 
6497 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6498 		toPath, kernel));
6499 
6500 	char name[B_FILE_NAME_LENGTH];
6501 	struct vnode* directory;
6502 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6503 		kernel);
6504 	if (status != B_OK)
6505 		return status;
6506 
6507 	struct vnode* vnode;
6508 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6509 		kernel);
6510 	if (status != B_OK)
6511 		goto err;
6512 
6513 	if (directory->mount != vnode->mount) {
6514 		status = B_CROSS_DEVICE_LINK;
6515 		goto err1;
6516 	}
6517 
6518 	if (HAS_FS_CALL(directory, link))
6519 		status = FS_CALL(directory, link, name, vnode);
6520 	else
6521 		status = B_READ_ONLY_DEVICE;
6522 
6523 err1:
6524 	put_vnode(vnode);
6525 err:
6526 	put_vnode(directory);
6527 
6528 	return status;
6529 }
6530 
6531 
6532 static status_t
6533 common_unlink(int fd, char* path, bool kernel)
6534 {
6535 	char filename[B_FILE_NAME_LENGTH];
6536 	struct vnode* vnode;
6537 	status_t status;
6538 
6539 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6540 		kernel));
6541 
6542 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6543 	if (status < 0)
6544 		return status;
6545 
6546 	if (HAS_FS_CALL(vnode, unlink))
6547 		status = FS_CALL(vnode, unlink, filename);
6548 	else
6549 		status = B_READ_ONLY_DEVICE;
6550 
6551 	put_vnode(vnode);
6552 
6553 	return status;
6554 }
6555 
6556 
6557 static status_t
6558 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6559 {
6560 	struct vnode* vnode;
6561 	status_t status;
6562 
6563 	// TODO: honor effectiveUserGroup argument
6564 
6565 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6566 	if (status != B_OK)
6567 		return status;
6568 
6569 	if (HAS_FS_CALL(vnode, access))
6570 		status = FS_CALL(vnode, access, mode);
6571 	else
6572 		status = B_OK;
6573 
6574 	put_vnode(vnode);
6575 
6576 	return status;
6577 }
6578 
6579 
6580 static status_t
6581 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6582 {
6583 	struct vnode* fromVnode;
6584 	struct vnode* toVnode;
6585 	char fromName[B_FILE_NAME_LENGTH];
6586 	char toName[B_FILE_NAME_LENGTH];
6587 	status_t status;
6588 
6589 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6590 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6591 
6592 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6593 	if (status != B_OK)
6594 		return status;
6595 
6596 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6597 	if (status != B_OK)
6598 		goto err1;
6599 
6600 	if (fromVnode->device != toVnode->device) {
6601 		status = B_CROSS_DEVICE_LINK;
6602 		goto err2;
6603 	}
6604 
6605 	if (fromName[0] == '\0' || toName[0] == '\0'
6606 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6607 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6608 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6609 		status = B_BAD_VALUE;
6610 		goto err2;
6611 	}
6612 
6613 	if (HAS_FS_CALL(fromVnode, rename))
6614 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6615 	else
6616 		status = B_READ_ONLY_DEVICE;
6617 
6618 err2:
6619 	put_vnode(toVnode);
6620 err1:
6621 	put_vnode(fromVnode);
6622 
6623 	return status;
6624 }
6625 
6626 
6627 static status_t
6628 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6629 {
6630 	struct vnode* vnode = descriptor->u.vnode;
6631 
6632 	FUNCTION(("common_read_stat: stat %p\n", stat));
6633 
6634 	// TODO: remove this once all file systems properly set them!
6635 	stat->st_crtim.tv_nsec = 0;
6636 	stat->st_ctim.tv_nsec = 0;
6637 	stat->st_mtim.tv_nsec = 0;
6638 	stat->st_atim.tv_nsec = 0;
6639 
6640 	return vfs_stat_vnode(vnode, stat);
6641 }
6642 
6643 
6644 static status_t
6645 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6646 	int statMask)
6647 {
6648 	struct vnode* vnode = descriptor->u.vnode;
6649 
6650 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6651 		vnode, stat, statMask));
6652 
6653 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6654 		&& (statMask & B_STAT_SIZE) != 0) {
6655 		return B_BAD_VALUE;
6656 	}
6657 
6658 	if (!HAS_FS_CALL(vnode, write_stat))
6659 		return B_READ_ONLY_DEVICE;
6660 
6661 	return FS_CALL(vnode, write_stat, stat, statMask);
6662 }
6663 
6664 
6665 static status_t
6666 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6667 	struct stat* stat, bool kernel)
6668 {
6669 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6670 		stat));
6671 
6672 	struct vnode* vnode;
6673 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6674 		NULL, kernel);
6675 	if (status != B_OK)
6676 		return status;
6677 
6678 	status = vfs_stat_vnode(vnode, stat);
6679 
6680 	put_vnode(vnode);
6681 	return status;
6682 }
6683 
6684 
6685 static status_t
6686 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6687 	const struct stat* stat, int statMask, bool kernel)
6688 {
6689 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6690 		"kernel %d\n", fd, path, stat, statMask, kernel));
6691 
6692 	struct vnode* vnode;
6693 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6694 		NULL, kernel);
6695 	if (status != B_OK)
6696 		return status;
6697 
6698 	if (HAS_FS_CALL(vnode, write_stat))
6699 		status = FS_CALL(vnode, write_stat, stat, statMask);
6700 	else
6701 		status = B_READ_ONLY_DEVICE;
6702 
6703 	put_vnode(vnode);
6704 
6705 	return status;
6706 }
6707 
6708 
6709 static int
6710 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6711 {
6712 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6713 		kernel));
6714 
6715 	struct vnode* vnode;
6716 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6717 		NULL, kernel);
6718 	if (status != B_OK)
6719 		return status;
6720 
6721 	status = open_attr_dir_vnode(vnode, kernel);
6722 	if (status < 0)
6723 		put_vnode(vnode);
6724 
6725 	return status;
6726 }
6727 
6728 
6729 static status_t
6730 attr_dir_close(struct file_descriptor* descriptor)
6731 {
6732 	struct vnode* vnode = descriptor->u.vnode;
6733 
6734 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6735 
6736 	if (HAS_FS_CALL(vnode, close_attr_dir))
6737 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6738 
6739 	return B_OK;
6740 }
6741 
6742 
6743 static void
6744 attr_dir_free_fd(struct file_descriptor* descriptor)
6745 {
6746 	struct vnode* vnode = descriptor->u.vnode;
6747 
6748 	if (vnode != NULL) {
6749 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6750 		put_vnode(vnode);
6751 	}
6752 }
6753 
6754 
6755 static status_t
6756 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6757 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6758 {
6759 	struct vnode* vnode = descriptor->u.vnode;
6760 
6761 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6762 
6763 	if (HAS_FS_CALL(vnode, read_attr_dir))
6764 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6765 			bufferSize, _count);
6766 
6767 	return B_UNSUPPORTED;
6768 }
6769 
6770 
6771 static status_t
6772 attr_dir_rewind(struct file_descriptor* descriptor)
6773 {
6774 	struct vnode* vnode = descriptor->u.vnode;
6775 
6776 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6777 
6778 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6779 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6780 
6781 	return B_UNSUPPORTED;
6782 }
6783 
6784 
6785 static int
6786 attr_create(int fd, char* path, const char* name, uint32 type,
6787 	int openMode, bool kernel)
6788 {
6789 	if (name == NULL || *name == '\0')
6790 		return B_BAD_VALUE;
6791 
6792 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6793 	struct vnode* vnode;
6794 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6795 		kernel);
6796 	if (status != B_OK)
6797 		return status;
6798 
6799 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6800 		status = B_LINK_LIMIT;
6801 		goto err;
6802 	}
6803 
6804 	if (!HAS_FS_CALL(vnode, create_attr)) {
6805 		status = B_READ_ONLY_DEVICE;
6806 		goto err;
6807 	}
6808 
6809 	void* cookie;
6810 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6811 	if (status != B_OK)
6812 		goto err;
6813 
6814 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6815 	if (fd >= 0)
6816 		return fd;
6817 
6818 	status = fd;
6819 
6820 	FS_CALL(vnode, close_attr, cookie);
6821 	FS_CALL(vnode, free_attr_cookie, cookie);
6822 
6823 	FS_CALL(vnode, remove_attr, name);
6824 
6825 err:
6826 	put_vnode(vnode);
6827 
6828 	return status;
6829 }
6830 
6831 
6832 static int
6833 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6834 {
6835 	if (name == NULL || *name == '\0')
6836 		return B_BAD_VALUE;
6837 
6838 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6839 	struct vnode* vnode;
6840 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6841 		kernel);
6842 	if (status != B_OK)
6843 		return status;
6844 
6845 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6846 		status = B_LINK_LIMIT;
6847 		goto err;
6848 	}
6849 
6850 	if (!HAS_FS_CALL(vnode, open_attr)) {
6851 		status = B_UNSUPPORTED;
6852 		goto err;
6853 	}
6854 
6855 	void* cookie;
6856 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6857 	if (status != B_OK)
6858 		goto err;
6859 
6860 	// now we only need a file descriptor for this attribute and we're done
6861 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6862 	if (fd >= 0)
6863 		return fd;
6864 
6865 	status = fd;
6866 
6867 	FS_CALL(vnode, close_attr, cookie);
6868 	FS_CALL(vnode, free_attr_cookie, cookie);
6869 
6870 err:
6871 	put_vnode(vnode);
6872 
6873 	return status;
6874 }
6875 
6876 
6877 static status_t
6878 attr_close(struct file_descriptor* descriptor)
6879 {
6880 	struct vnode* vnode = descriptor->u.vnode;
6881 
6882 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6883 
6884 	if (HAS_FS_CALL(vnode, close_attr))
6885 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6886 
6887 	return B_OK;
6888 }
6889 
6890 
6891 static void
6892 attr_free_fd(struct file_descriptor* descriptor)
6893 {
6894 	struct vnode* vnode = descriptor->u.vnode;
6895 
6896 	if (vnode != NULL) {
6897 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6898 		put_vnode(vnode);
6899 	}
6900 }
6901 
6902 
6903 static status_t
6904 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6905 	size_t* length)
6906 {
6907 	struct vnode* vnode = descriptor->u.vnode;
6908 
6909 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6910 		pos, length, *length));
6911 
6912 	if (!HAS_FS_CALL(vnode, read_attr))
6913 		return B_UNSUPPORTED;
6914 
6915 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6916 }
6917 
6918 
6919 static status_t
6920 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6921 	size_t* length)
6922 {
6923 	struct vnode* vnode = descriptor->u.vnode;
6924 
6925 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6926 		length));
6927 
6928 	if (!HAS_FS_CALL(vnode, write_attr))
6929 		return B_UNSUPPORTED;
6930 
6931 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6932 }
6933 
6934 
6935 static off_t
6936 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6937 {
6938 	off_t offset;
6939 
6940 	switch (seekType) {
6941 		case SEEK_SET:
6942 			offset = 0;
6943 			break;
6944 		case SEEK_CUR:
6945 			offset = descriptor->pos;
6946 			break;
6947 		case SEEK_END:
6948 		{
6949 			struct vnode* vnode = descriptor->u.vnode;
6950 			if (!HAS_FS_CALL(vnode, read_stat))
6951 				return B_UNSUPPORTED;
6952 
6953 			struct stat stat;
6954 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6955 				&stat);
6956 			if (status != B_OK)
6957 				return status;
6958 
6959 			offset = stat.st_size;
6960 			break;
6961 		}
6962 		default:
6963 			return B_BAD_VALUE;
6964 	}
6965 
6966 	// assumes off_t is 64 bits wide
6967 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6968 		return B_BUFFER_OVERFLOW;
6969 
6970 	pos += offset;
6971 	if (pos < 0)
6972 		return B_BAD_VALUE;
6973 
6974 	return descriptor->pos = pos;
6975 }
6976 
6977 
6978 static status_t
6979 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6980 {
6981 	struct vnode* vnode = descriptor->u.vnode;
6982 
6983 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6984 
6985 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6986 		return B_UNSUPPORTED;
6987 
6988 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6989 }
6990 
6991 
6992 static status_t
6993 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6994 	int statMask)
6995 {
6996 	struct vnode* vnode = descriptor->u.vnode;
6997 
6998 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6999 
7000 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7001 		return B_READ_ONLY_DEVICE;
7002 
7003 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7004 }
7005 
7006 
7007 static status_t
7008 attr_remove(int fd, const char* name, bool kernel)
7009 {
7010 	struct file_descriptor* descriptor;
7011 	struct vnode* vnode;
7012 	status_t status;
7013 
7014 	if (name == NULL || *name == '\0')
7015 		return B_BAD_VALUE;
7016 
7017 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7018 		kernel));
7019 
7020 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
7021 	if (descriptor == NULL)
7022 		return B_FILE_ERROR;
7023 
7024 	if (HAS_FS_CALL(vnode, remove_attr))
7025 		status = FS_CALL(vnode, remove_attr, name);
7026 	else
7027 		status = B_READ_ONLY_DEVICE;
7028 
7029 	put_fd(descriptor);
7030 
7031 	return status;
7032 }
7033 
7034 
7035 static status_t
7036 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7037 	bool kernel)
7038 {
7039 	struct file_descriptor* fromDescriptor;
7040 	struct file_descriptor* toDescriptor;
7041 	struct vnode* fromVnode;
7042 	struct vnode* toVnode;
7043 	status_t status;
7044 
7045 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7046 		|| *toName == '\0')
7047 		return B_BAD_VALUE;
7048 
7049 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7050 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7051 
7052 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
7053 	if (fromDescriptor == NULL)
7054 		return B_FILE_ERROR;
7055 
7056 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
7057 	if (toDescriptor == NULL) {
7058 		status = B_FILE_ERROR;
7059 		goto err;
7060 	}
7061 
7062 	// are the files on the same volume?
7063 	if (fromVnode->device != toVnode->device) {
7064 		status = B_CROSS_DEVICE_LINK;
7065 		goto err1;
7066 	}
7067 
7068 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7069 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7070 	} else
7071 		status = B_READ_ONLY_DEVICE;
7072 
7073 err1:
7074 	put_fd(toDescriptor);
7075 err:
7076 	put_fd(fromDescriptor);
7077 
7078 	return status;
7079 }
7080 
7081 
7082 static int
7083 index_dir_open(dev_t mountID, bool kernel)
7084 {
7085 	struct fs_mount* mount;
7086 	void* cookie;
7087 
7088 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7089 		kernel));
7090 
7091 	status_t status = get_mount(mountID, &mount);
7092 	if (status != B_OK)
7093 		return status;
7094 
7095 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7096 		status = B_UNSUPPORTED;
7097 		goto error;
7098 	}
7099 
7100 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7101 	if (status != B_OK)
7102 		goto error;
7103 
7104 	// get fd for the index directory
7105 	int fd;
7106 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7107 	if (fd >= 0)
7108 		return fd;
7109 
7110 	// something went wrong
7111 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7112 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7113 
7114 	status = fd;
7115 
7116 error:
7117 	put_mount(mount);
7118 	return status;
7119 }
7120 
7121 
7122 static status_t
7123 index_dir_close(struct file_descriptor* descriptor)
7124 {
7125 	struct fs_mount* mount = descriptor->u.mount;
7126 
7127 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7128 
7129 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7130 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7131 
7132 	return B_OK;
7133 }
7134 
7135 
7136 static void
7137 index_dir_free_fd(struct file_descriptor* descriptor)
7138 {
7139 	struct fs_mount* mount = descriptor->u.mount;
7140 
7141 	if (mount != NULL) {
7142 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7143 		put_mount(mount);
7144 	}
7145 }
7146 
7147 
7148 static status_t
7149 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7150 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7151 {
7152 	struct fs_mount* mount = descriptor->u.mount;
7153 
7154 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7155 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7156 			bufferSize, _count);
7157 	}
7158 
7159 	return B_UNSUPPORTED;
7160 }
7161 
7162 
7163 static status_t
7164 index_dir_rewind(struct file_descriptor* descriptor)
7165 {
7166 	struct fs_mount* mount = descriptor->u.mount;
7167 
7168 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7169 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7170 
7171 	return B_UNSUPPORTED;
7172 }
7173 
7174 
7175 static status_t
7176 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7177 	bool kernel)
7178 {
7179 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7180 		mountID, name, kernel));
7181 
7182 	struct fs_mount* mount;
7183 	status_t status = get_mount(mountID, &mount);
7184 	if (status != B_OK)
7185 		return status;
7186 
7187 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7188 		status = B_READ_ONLY_DEVICE;
7189 		goto out;
7190 	}
7191 
7192 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7193 
7194 out:
7195 	put_mount(mount);
7196 	return status;
7197 }
7198 
7199 
7200 #if 0
7201 static status_t
7202 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7203 {
7204 	struct vnode* vnode = descriptor->u.vnode;
7205 
7206 	// ToDo: currently unused!
7207 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7208 	if (!HAS_FS_CALL(vnode, read_index_stat))
7209 		return B_UNSUPPORTED;
7210 
7211 	return B_UNSUPPORTED;
7212 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7213 }
7214 
7215 
7216 static void
7217 index_free_fd(struct file_descriptor* descriptor)
7218 {
7219 	struct vnode* vnode = descriptor->u.vnode;
7220 
7221 	if (vnode != NULL) {
7222 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7223 		put_vnode(vnode);
7224 	}
7225 }
7226 #endif
7227 
7228 
7229 static status_t
7230 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7231 	bool kernel)
7232 {
7233 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7234 		mountID, name, kernel));
7235 
7236 	struct fs_mount* mount;
7237 	status_t status = get_mount(mountID, &mount);
7238 	if (status != B_OK)
7239 		return status;
7240 
7241 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7242 		status = B_UNSUPPORTED;
7243 		goto out;
7244 	}
7245 
7246 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7247 
7248 out:
7249 	put_mount(mount);
7250 	return status;
7251 }
7252 
7253 
7254 static status_t
7255 index_remove(dev_t mountID, const char* name, bool kernel)
7256 {
7257 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7258 		mountID, name, kernel));
7259 
7260 	struct fs_mount* mount;
7261 	status_t status = get_mount(mountID, &mount);
7262 	if (status != B_OK)
7263 		return status;
7264 
7265 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7266 		status = B_READ_ONLY_DEVICE;
7267 		goto out;
7268 	}
7269 
7270 	status = FS_MOUNT_CALL(mount, remove_index, name);
7271 
7272 out:
7273 	put_mount(mount);
7274 	return status;
7275 }
7276 
7277 
7278 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7279 		It would be nice if the FS would find some more kernel support
7280 		for them.
7281 		For example, query parsing should be moved into the kernel.
7282 */
7283 static int
7284 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7285 	int32 token, bool kernel)
7286 {
7287 	struct fs_mount* mount;
7288 	void* cookie;
7289 
7290 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7291 		device, query, kernel));
7292 
7293 	status_t status = get_mount(device, &mount);
7294 	if (status != B_OK)
7295 		return status;
7296 
7297 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7298 		status = B_UNSUPPORTED;
7299 		goto error;
7300 	}
7301 
7302 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7303 		&cookie);
7304 	if (status != B_OK)
7305 		goto error;
7306 
7307 	// get fd for the index directory
7308 	int fd;
7309 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7310 	if (fd >= 0)
7311 		return fd;
7312 
7313 	status = fd;
7314 
7315 	// something went wrong
7316 	FS_MOUNT_CALL(mount, close_query, cookie);
7317 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7318 
7319 error:
7320 	put_mount(mount);
7321 	return status;
7322 }
7323 
7324 
7325 static status_t
7326 query_close(struct file_descriptor* descriptor)
7327 {
7328 	struct fs_mount* mount = descriptor->u.mount;
7329 
7330 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7331 
7332 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7333 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7334 
7335 	return B_OK;
7336 }
7337 
7338 
7339 static void
7340 query_free_fd(struct file_descriptor* descriptor)
7341 {
7342 	struct fs_mount* mount = descriptor->u.mount;
7343 
7344 	if (mount != NULL) {
7345 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7346 		put_mount(mount);
7347 	}
7348 }
7349 
7350 
7351 static status_t
7352 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7353 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7354 {
7355 	struct fs_mount* mount = descriptor->u.mount;
7356 
7357 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7358 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7359 			bufferSize, _count);
7360 	}
7361 
7362 	return B_UNSUPPORTED;
7363 }
7364 
7365 
7366 static status_t
7367 query_rewind(struct file_descriptor* descriptor)
7368 {
7369 	struct fs_mount* mount = descriptor->u.mount;
7370 
7371 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7372 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7373 
7374 	return B_UNSUPPORTED;
7375 }
7376 
7377 
7378 //	#pragma mark - General File System functions
7379 
7380 
7381 static dev_t
7382 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7383 	const char* args, bool kernel)
7384 {
7385 	struct ::fs_mount* mount;
7386 	status_t status = B_OK;
7387 	fs_volume* volume = NULL;
7388 	int32 layer = 0;
7389 	Vnode* coveredNode = NULL;
7390 
7391 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7392 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7393 
7394 	// The path is always safe, we just have to make sure that fsName is
7395 	// almost valid - we can't make any assumptions about args, though.
7396 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7397 	// We'll get it from the DDM later.
7398 	if (fsName == NULL) {
7399 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7400 			return B_BAD_VALUE;
7401 	} else if (fsName[0] == '\0')
7402 		return B_BAD_VALUE;
7403 
7404 	RecursiveLocker mountOpLocker(sMountOpLock);
7405 
7406 	// Helper to delete a newly created file device on failure.
7407 	// Not exactly beautiful, but helps to keep the code below cleaner.
7408 	struct FileDeviceDeleter {
7409 		FileDeviceDeleter() : id(-1) {}
7410 		~FileDeviceDeleter()
7411 		{
7412 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7413 		}
7414 
7415 		partition_id id;
7416 	} fileDeviceDeleter;
7417 
7418 	// If the file system is not a "virtual" one, the device argument should
7419 	// point to a real file/device (if given at all).
7420 	// get the partition
7421 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7422 	KPartition* partition = NULL;
7423 	KPath normalizedDevice;
7424 	bool newlyCreatedFileDevice = false;
7425 
7426 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7427 		// normalize the device path
7428 		status = normalizedDevice.SetTo(device, true);
7429 		if (status != B_OK)
7430 			return status;
7431 
7432 		// get a corresponding partition from the DDM
7433 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7434 		if (partition == NULL) {
7435 			// Partition not found: This either means, the user supplied
7436 			// an invalid path, or the path refers to an image file. We try
7437 			// to let the DDM create a file device for the path.
7438 			partition_id deviceID = ddm->CreateFileDevice(
7439 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7440 			if (deviceID >= 0) {
7441 				partition = ddm->RegisterPartition(deviceID);
7442 				if (newlyCreatedFileDevice)
7443 					fileDeviceDeleter.id = deviceID;
7444 			}
7445 		}
7446 
7447 		if (!partition) {
7448 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7449 				normalizedDevice.Path()));
7450 			return B_ENTRY_NOT_FOUND;
7451 		}
7452 
7453 		device = normalizedDevice.Path();
7454 			// correct path to file device
7455 	}
7456 	PartitionRegistrar partitionRegistrar(partition, true);
7457 
7458 	// Write lock the partition's device. For the time being, we keep the lock
7459 	// until we're done mounting -- not nice, but ensure, that no-one is
7460 	// interfering.
7461 	// TODO: Just mark the partition busy while mounting!
7462 	KDiskDevice* diskDevice = NULL;
7463 	if (partition) {
7464 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7465 		if (!diskDevice) {
7466 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7467 			return B_ERROR;
7468 		}
7469 	}
7470 
7471 	DeviceWriteLocker writeLocker(diskDevice, true);
7472 		// this takes over the write lock acquired before
7473 
7474 	if (partition != NULL) {
7475 		// make sure, that the partition is not busy
7476 		if (partition->IsBusy()) {
7477 			TRACE(("fs_mount(): Partition is busy.\n"));
7478 			return B_BUSY;
7479 		}
7480 
7481 		// if no FS name had been supplied, we get it from the partition
7482 		if (fsName == NULL) {
7483 			KDiskSystem* diskSystem = partition->DiskSystem();
7484 			if (!diskSystem) {
7485 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7486 					"recognize it.\n"));
7487 				return B_BAD_VALUE;
7488 			}
7489 
7490 			if (!diskSystem->IsFileSystem()) {
7491 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7492 					"partitioning system.\n"));
7493 				return B_BAD_VALUE;
7494 			}
7495 
7496 			// The disk system name will not change, and the KDiskSystem
7497 			// object will not go away while the disk device is locked (and
7498 			// the partition has a reference to it), so this is safe.
7499 			fsName = diskSystem->Name();
7500 		}
7501 	}
7502 
7503 	mount = new(std::nothrow) (struct ::fs_mount);
7504 	if (mount == NULL)
7505 		return B_NO_MEMORY;
7506 
7507 	mount->device_name = strdup(device);
7508 		// "device" can be NULL
7509 
7510 	status = mount->entry_cache.Init();
7511 	if (status != B_OK)
7512 		goto err1;
7513 
7514 	// initialize structure
7515 	mount->id = sNextMountID++;
7516 	mount->partition = NULL;
7517 	mount->root_vnode = NULL;
7518 	mount->covers_vnode = NULL;
7519 	mount->unmounting = false;
7520 	mount->owns_file_device = false;
7521 	mount->volume = NULL;
7522 
7523 	// build up the volume(s)
7524 	while (true) {
7525 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7526 		if (layerFSName == NULL) {
7527 			if (layer == 0) {
7528 				status = B_NO_MEMORY;
7529 				goto err1;
7530 			}
7531 
7532 			break;
7533 		}
7534 		MemoryDeleter layerFSNameDeleter(layerFSName);
7535 
7536 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7537 		if (volume == NULL) {
7538 			status = B_NO_MEMORY;
7539 			goto err1;
7540 		}
7541 
7542 		volume->id = mount->id;
7543 		volume->partition = partition != NULL ? partition->ID() : -1;
7544 		volume->layer = layer++;
7545 		volume->private_volume = NULL;
7546 		volume->ops = NULL;
7547 		volume->sub_volume = NULL;
7548 		volume->super_volume = NULL;
7549 		volume->file_system = NULL;
7550 		volume->file_system_name = NULL;
7551 
7552 		volume->file_system_name = get_file_system_name(layerFSName);
7553 		if (volume->file_system_name == NULL) {
7554 			status = B_NO_MEMORY;
7555 			free(volume);
7556 			goto err1;
7557 		}
7558 
7559 		volume->file_system = get_file_system(layerFSName);
7560 		if (volume->file_system == NULL) {
7561 			status = B_DEVICE_NOT_FOUND;
7562 			free(volume->file_system_name);
7563 			free(volume);
7564 			goto err1;
7565 		}
7566 
7567 		if (mount->volume == NULL)
7568 			mount->volume = volume;
7569 		else {
7570 			volume->super_volume = mount->volume;
7571 			mount->volume->sub_volume = volume;
7572 			mount->volume = volume;
7573 		}
7574 	}
7575 
7576 	// insert mount struct into list before we call FS's mount() function
7577 	// so that vnodes can be created for this mount
7578 	rw_lock_write_lock(&sMountLock);
7579 	sMountsTable->Insert(mount);
7580 	rw_lock_write_unlock(&sMountLock);
7581 
7582 	ino_t rootID;
7583 
7584 	if (!sRoot) {
7585 		// we haven't mounted anything yet
7586 		if (strcmp(path, "/") != 0) {
7587 			status = B_ERROR;
7588 			goto err2;
7589 		}
7590 
7591 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7592 			args, &rootID);
7593 		if (status != B_OK || mount->volume->ops == NULL)
7594 			goto err2;
7595 	} else {
7596 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7597 		if (status != B_OK)
7598 			goto err2;
7599 
7600 		mount->covers_vnode = coveredNode;
7601 
7602 		// make sure covered_vnode is a directory
7603 		if (!S_ISDIR(coveredNode->Type())) {
7604 			status = B_NOT_A_DIRECTORY;
7605 			goto err3;
7606 		}
7607 
7608 		if (coveredNode->IsCovered()) {
7609 			// this is already a covered vnode
7610 			status = B_BUSY;
7611 			goto err3;
7612 		}
7613 
7614 		// mount it/them
7615 		fs_volume* volume = mount->volume;
7616 		while (volume) {
7617 			status = volume->file_system->mount(volume, device, flags, args,
7618 				&rootID);
7619 			if (status != B_OK || volume->ops == NULL) {
7620 				if (status == B_OK && volume->ops == NULL)
7621 					panic("fs_mount: mount() succeeded but ops is NULL!");
7622 				if (volume->sub_volume)
7623 					goto err4;
7624 				goto err3;
7625 			}
7626 
7627 			volume = volume->super_volume;
7628 		}
7629 
7630 		volume = mount->volume;
7631 		while (volume) {
7632 			if (volume->ops->all_layers_mounted != NULL)
7633 				volume->ops->all_layers_mounted(volume);
7634 			volume = volume->super_volume;
7635 		}
7636 	}
7637 
7638 	// the root node is supposed to be owned by the file system - it must
7639 	// exist at this point
7640 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7641 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7642 		panic("fs_mount: file system does not own its root node!\n");
7643 		status = B_ERROR;
7644 		goto err4;
7645 	}
7646 
7647 	// set up the links between the root vnode and the vnode it covers
7648 	rw_lock_write_lock(&sVnodeLock);
7649 	if (coveredNode != NULL) {
7650 		if (coveredNode->IsCovered()) {
7651 			// the vnode is covered now
7652 			status = B_BUSY;
7653 			rw_lock_write_unlock(&sVnodeLock);
7654 			goto err4;
7655 		}
7656 
7657 		mount->root_vnode->covers = coveredNode;
7658 		mount->root_vnode->SetCovering(true);
7659 
7660 		coveredNode->covered_by = mount->root_vnode;
7661 		coveredNode->SetCovered(true);
7662 	}
7663 	rw_lock_write_unlock(&sVnodeLock);
7664 
7665 	if (!sRoot) {
7666 		sRoot = mount->root_vnode;
7667 		mutex_lock(&sIOContextRootLock);
7668 		get_current_io_context(true)->root = sRoot;
7669 		mutex_unlock(&sIOContextRootLock);
7670 		inc_vnode_ref_count(sRoot);
7671 	}
7672 
7673 	// supply the partition (if any) with the mount cookie and mark it mounted
7674 	if (partition) {
7675 		partition->SetMountCookie(mount->volume->private_volume);
7676 		partition->SetVolumeID(mount->id);
7677 
7678 		// keep a partition reference as long as the partition is mounted
7679 		partitionRegistrar.Detach();
7680 		mount->partition = partition;
7681 		mount->owns_file_device = newlyCreatedFileDevice;
7682 		fileDeviceDeleter.id = -1;
7683 	}
7684 
7685 	notify_mount(mount->id,
7686 		coveredNode != NULL ? coveredNode->device : -1,
7687 		coveredNode ? coveredNode->id : -1);
7688 
7689 	return mount->id;
7690 
7691 err4:
7692 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7693 err3:
7694 	if (coveredNode != NULL)
7695 		put_vnode(coveredNode);
7696 err2:
7697 	rw_lock_write_lock(&sMountLock);
7698 	sMountsTable->Remove(mount);
7699 	rw_lock_write_unlock(&sMountLock);
7700 err1:
7701 	delete mount;
7702 
7703 	return status;
7704 }
7705 
7706 
7707 static status_t
7708 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7709 {
7710 	struct fs_mount* mount;
7711 	status_t err;
7712 
7713 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7714 		mountID, kernel));
7715 
7716 	struct vnode* pathVnode = NULL;
7717 	if (path != NULL) {
7718 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7719 		if (err != B_OK)
7720 			return B_ENTRY_NOT_FOUND;
7721 	}
7722 
7723 	RecursiveLocker mountOpLocker(sMountOpLock);
7724 	ReadLocker mountLocker(sMountLock);
7725 
7726 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7727 	if (mount == NULL) {
7728 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7729 			pathVnode);
7730 	}
7731 
7732 	mountLocker.Unlock();
7733 
7734 	if (path != NULL) {
7735 		put_vnode(pathVnode);
7736 
7737 		if (mount->root_vnode != pathVnode) {
7738 			// not mountpoint
7739 			return B_BAD_VALUE;
7740 		}
7741 	}
7742 
7743 	// if the volume is associated with a partition, lock the device of the
7744 	// partition as long as we are unmounting
7745 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7746 	KPartition* partition = mount->partition;
7747 	KDiskDevice* diskDevice = NULL;
7748 	if (partition != NULL) {
7749 		if (partition->Device() == NULL) {
7750 			dprintf("fs_unmount(): There is no device!\n");
7751 			return B_ERROR;
7752 		}
7753 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7754 		if (!diskDevice) {
7755 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7756 			return B_ERROR;
7757 		}
7758 	}
7759 	DeviceWriteLocker writeLocker(diskDevice, true);
7760 
7761 	// make sure, that the partition is not busy
7762 	if (partition != NULL) {
7763 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7764 			TRACE(("fs_unmount(): Partition is busy.\n"));
7765 			return B_BUSY;
7766 		}
7767 	}
7768 
7769 	// grab the vnode master mutex to keep someone from creating
7770 	// a vnode while we're figuring out if we can continue
7771 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7772 
7773 	bool disconnectedDescriptors = false;
7774 
7775 	while (true) {
7776 		bool busy = false;
7777 
7778 		// cycle through the list of vnodes associated with this mount and
7779 		// make sure all of them are not busy or have refs on them
7780 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7781 		while (struct vnode* vnode = iterator.Next()) {
7782 			if (vnode->IsBusy()) {
7783 				busy = true;
7784 				break;
7785 			}
7786 
7787 			// check the vnode's ref count -- subtract additional references for
7788 			// covering
7789 			int32 refCount = vnode->ref_count;
7790 			if (vnode->covers != NULL)
7791 				refCount--;
7792 			if (vnode->covered_by != NULL)
7793 				refCount--;
7794 
7795 			if (refCount != 0) {
7796 				// there are still vnodes in use on this mount, so we cannot
7797 				// unmount yet
7798 				busy = true;
7799 				break;
7800 			}
7801 		}
7802 
7803 		if (!busy)
7804 			break;
7805 
7806 		if ((flags & B_FORCE_UNMOUNT) == 0)
7807 			return B_BUSY;
7808 
7809 		if (disconnectedDescriptors) {
7810 			// wait a bit until the last access is finished, and then try again
7811 			vnodesWriteLocker.Unlock();
7812 			snooze(100000);
7813 			// TODO: if there is some kind of bug that prevents the ref counts
7814 			// from getting back to zero, this will fall into an endless loop...
7815 			vnodesWriteLocker.Lock();
7816 			continue;
7817 		}
7818 
7819 		// the file system is still busy - but we're forced to unmount it,
7820 		// so let's disconnect all open file descriptors
7821 
7822 		mount->unmounting = true;
7823 			// prevent new vnodes from being created
7824 
7825 		vnodesWriteLocker.Unlock();
7826 
7827 		disconnect_mount_or_vnode_fds(mount, NULL);
7828 		disconnectedDescriptors = true;
7829 
7830 		vnodesWriteLocker.Lock();
7831 	}
7832 
7833 	// We can safely continue. Mark all of the vnodes busy and this mount
7834 	// structure in unmounting state. Also undo the vnode covers/covered_by
7835 	// links.
7836 	mount->unmounting = true;
7837 
7838 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7839 	while (struct vnode* vnode = iterator.Next()) {
7840 		// Remove all covers/covered_by links from other mounts' nodes to this
7841 		// vnode and adjust the node ref count accordingly. We will release the
7842 		// references to the external vnodes below.
7843 		if (Vnode* coveredNode = vnode->covers) {
7844 			if (Vnode* coveringNode = vnode->covered_by) {
7845 				// We have both covered and covering vnodes, so just remove us
7846 				// from the chain.
7847 				coveredNode->covered_by = coveringNode;
7848 				coveringNode->covers = coveredNode;
7849 				vnode->ref_count -= 2;
7850 
7851 				vnode->covered_by = NULL;
7852 				vnode->covers = NULL;
7853 				vnode->SetCovering(false);
7854 				vnode->SetCovered(false);
7855 			} else {
7856 				// We only have a covered vnode. Remove its link to us.
7857 				coveredNode->covered_by = NULL;
7858 				coveredNode->SetCovered(false);
7859 				vnode->ref_count--;
7860 
7861 				// If the other node is an external vnode, we keep its link
7862 				// link around so we can put the reference later on. Otherwise
7863 				// we get rid of it right now.
7864 				if (coveredNode->mount == mount) {
7865 					vnode->covers = NULL;
7866 					coveredNode->ref_count--;
7867 				}
7868 			}
7869 		} else if (Vnode* coveringNode = vnode->covered_by) {
7870 			// We only have a covering vnode. Remove its link to us.
7871 			coveringNode->covers = NULL;
7872 			coveringNode->SetCovering(false);
7873 			vnode->ref_count--;
7874 
7875 			// If the other node is an external vnode, we keep its link
7876 			// link around so we can put the reference later on. Otherwise
7877 			// we get rid of it right now.
7878 			if (coveringNode->mount == mount) {
7879 				vnode->covered_by = NULL;
7880 				coveringNode->ref_count--;
7881 			}
7882 		}
7883 
7884 		vnode->SetBusy(true);
7885 		vnode_to_be_freed(vnode);
7886 	}
7887 
7888 	vnodesWriteLocker.Unlock();
7889 
7890 	// Free all vnodes associated with this mount.
7891 	// They will be removed from the mount list by free_vnode(), so
7892 	// we don't have to do this.
7893 	while (struct vnode* vnode = mount->vnodes.Head()) {
7894 		// Put the references to external covered/covering vnodes we kept above.
7895 		if (Vnode* coveredNode = vnode->covers)
7896 			put_vnode(coveredNode);
7897 		if (Vnode* coveringNode = vnode->covered_by)
7898 			put_vnode(coveringNode);
7899 
7900 		free_vnode(vnode, false);
7901 	}
7902 
7903 	// remove the mount structure from the hash table
7904 	rw_lock_write_lock(&sMountLock);
7905 	sMountsTable->Remove(mount);
7906 	rw_lock_write_unlock(&sMountLock);
7907 
7908 	mountOpLocker.Unlock();
7909 
7910 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7911 	notify_unmount(mount->id);
7912 
7913 	// dereference the partition and mark it unmounted
7914 	if (partition) {
7915 		partition->SetVolumeID(-1);
7916 		partition->SetMountCookie(NULL);
7917 
7918 		if (mount->owns_file_device)
7919 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7920 		partition->Unregister();
7921 	}
7922 
7923 	delete mount;
7924 	return B_OK;
7925 }
7926 
7927 
7928 static status_t
7929 fs_sync(dev_t device)
7930 {
7931 	struct fs_mount* mount;
7932 	status_t status = get_mount(device, &mount);
7933 	if (status != B_OK)
7934 		return status;
7935 
7936 	struct vnode marker;
7937 	memset(&marker, 0, sizeof(marker));
7938 	marker.SetBusy(true);
7939 	marker.SetRemoved(true);
7940 
7941 	// First, synchronize all file caches
7942 
7943 	while (true) {
7944 		WriteLocker locker(sVnodeLock);
7945 			// Note: That's the easy way. Which is probably OK for sync(),
7946 			// since it's a relatively rare call and doesn't need to allow for
7947 			// a lot of concurrency. Using a read lock would be possible, but
7948 			// also more involved, since we had to lock the individual nodes
7949 			// and take care of the locking order, which we might not want to
7950 			// do while holding fs_mount::lock.
7951 
7952 		// synchronize access to vnode list
7953 		mutex_lock(&mount->lock);
7954 
7955 		struct vnode* vnode;
7956 		if (!marker.IsRemoved()) {
7957 			vnode = mount->vnodes.GetNext(&marker);
7958 			mount->vnodes.Remove(&marker);
7959 			marker.SetRemoved(true);
7960 		} else
7961 			vnode = mount->vnodes.First();
7962 
7963 		while (vnode != NULL && (vnode->cache == NULL
7964 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7965 			// TODO: we could track writes (and writable mapped vnodes)
7966 			//	and have a simple flag that we could test for here
7967 			vnode = mount->vnodes.GetNext(vnode);
7968 		}
7969 
7970 		if (vnode != NULL) {
7971 			// insert marker vnode again
7972 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7973 			marker.SetRemoved(false);
7974 		}
7975 
7976 		mutex_unlock(&mount->lock);
7977 
7978 		if (vnode == NULL)
7979 			break;
7980 
7981 		vnode = lookup_vnode(mount->id, vnode->id);
7982 		if (vnode == NULL || vnode->IsBusy())
7983 			continue;
7984 
7985 		if (vnode->ref_count == 0) {
7986 			// this vnode has been unused before
7987 			vnode_used(vnode);
7988 		}
7989 		inc_vnode_ref_count(vnode);
7990 
7991 		locker.Unlock();
7992 
7993 		if (vnode->cache != NULL && !vnode->IsRemoved())
7994 			vnode->cache->WriteModified();
7995 
7996 		put_vnode(vnode);
7997 	}
7998 
7999 	// Let the file systems do their synchronizing work
8000 	if (HAS_FS_MOUNT_CALL(mount, sync))
8001 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8002 
8003 	// Finally, flush the underlying device's write cache (if possible.)
8004 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8005 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8006 
8007 	put_mount(mount);
8008 	return status;
8009 }
8010 
8011 
8012 static status_t
8013 fs_read_info(dev_t device, struct fs_info* info)
8014 {
8015 	struct fs_mount* mount;
8016 	status_t status = get_mount(device, &mount);
8017 	if (status != B_OK)
8018 		return status;
8019 
8020 	memset(info, 0, sizeof(struct fs_info));
8021 
8022 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8023 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8024 
8025 	// fill in info the file system doesn't (have to) know about
8026 	if (status == B_OK) {
8027 		info->dev = mount->id;
8028 		info->root = mount->root_vnode->id;
8029 
8030 		fs_volume* volume = mount->volume;
8031 		while (volume->super_volume != NULL)
8032 			volume = volume->super_volume;
8033 
8034 		strlcpy(info->fsh_name, volume->file_system_name,
8035 			sizeof(info->fsh_name));
8036 		if (mount->device_name != NULL) {
8037 			strlcpy(info->device_name, mount->device_name,
8038 				sizeof(info->device_name));
8039 		}
8040 	}
8041 
8042 	// if the call is not supported by the file system, there are still
8043 	// the parts that we filled out ourselves
8044 
8045 	put_mount(mount);
8046 	return status;
8047 }
8048 
8049 
8050 static status_t
8051 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8052 {
8053 	struct fs_mount* mount;
8054 	status_t status = get_mount(device, &mount);
8055 	if (status != B_OK)
8056 		return status;
8057 
8058 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8059 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8060 	else
8061 		status = B_READ_ONLY_DEVICE;
8062 
8063 	put_mount(mount);
8064 	return status;
8065 }
8066 
8067 
8068 static dev_t
8069 fs_next_device(int32* _cookie)
8070 {
8071 	struct fs_mount* mount = NULL;
8072 	dev_t device = *_cookie;
8073 
8074 	rw_lock_read_lock(&sMountLock);
8075 
8076 	// Since device IDs are assigned sequentially, this algorithm
8077 	// does work good enough. It makes sure that the device list
8078 	// returned is sorted, and that no device is skipped when an
8079 	// already visited device got unmounted.
8080 
8081 	while (device < sNextMountID) {
8082 		mount = find_mount(device++);
8083 		if (mount != NULL && mount->volume->private_volume != NULL)
8084 			break;
8085 	}
8086 
8087 	*_cookie = device;
8088 
8089 	if (mount != NULL)
8090 		device = mount->id;
8091 	else
8092 		device = B_BAD_VALUE;
8093 
8094 	rw_lock_read_unlock(&sMountLock);
8095 
8096 	return device;
8097 }
8098 
8099 
8100 ssize_t
8101 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8102 	void *buffer, size_t readBytes)
8103 {
8104 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8105 	if (attrFD < 0)
8106 		return attrFD;
8107 
8108 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8109 
8110 	_kern_close(attrFD);
8111 
8112 	return bytesRead;
8113 }
8114 
8115 
8116 static status_t
8117 get_cwd(char* buffer, size_t size, bool kernel)
8118 {
8119 	// Get current working directory from io context
8120 	struct io_context* context = get_current_io_context(kernel);
8121 	status_t status;
8122 
8123 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8124 
8125 	mutex_lock(&context->io_mutex);
8126 
8127 	struct vnode* vnode = context->cwd;
8128 	if (vnode)
8129 		inc_vnode_ref_count(vnode);
8130 
8131 	mutex_unlock(&context->io_mutex);
8132 
8133 	if (vnode) {
8134 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8135 		put_vnode(vnode);
8136 	} else
8137 		status = B_ERROR;
8138 
8139 	return status;
8140 }
8141 
8142 
8143 static status_t
8144 set_cwd(int fd, char* path, bool kernel)
8145 {
8146 	struct io_context* context;
8147 	struct vnode* vnode = NULL;
8148 	struct vnode* oldDirectory;
8149 	status_t status;
8150 
8151 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8152 
8153 	// Get vnode for passed path, and bail if it failed
8154 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8155 	if (status < 0)
8156 		return status;
8157 
8158 	if (!S_ISDIR(vnode->Type())) {
8159 		// nope, can't cwd to here
8160 		status = B_NOT_A_DIRECTORY;
8161 		goto err;
8162 	}
8163 
8164 	// We need to have the permission to enter the directory, too
8165 	if (HAS_FS_CALL(vnode, access)) {
8166 		status = FS_CALL(vnode, access, X_OK);
8167 		if (status != B_OK)
8168 			goto err;
8169 	}
8170 
8171 	// Get current io context and lock
8172 	context = get_current_io_context(kernel);
8173 	mutex_lock(&context->io_mutex);
8174 
8175 	// save the old current working directory first
8176 	oldDirectory = context->cwd;
8177 	context->cwd = vnode;
8178 
8179 	mutex_unlock(&context->io_mutex);
8180 
8181 	if (oldDirectory)
8182 		put_vnode(oldDirectory);
8183 
8184 	return B_NO_ERROR;
8185 
8186 err:
8187 	put_vnode(vnode);
8188 	return status;
8189 }
8190 
8191 
8192 static status_t
8193 user_copy_name(char* to, const char* from, size_t length)
8194 {
8195 	ssize_t len = user_strlcpy(to, from, length);
8196 	if (len < 0)
8197 		return len;
8198 	if (len >= (ssize_t)length)
8199 		return B_NAME_TOO_LONG;
8200 	return B_OK;
8201 }
8202 
8203 
8204 //	#pragma mark - kernel mirrored syscalls
8205 
8206 
8207 dev_t
8208 _kern_mount(const char* path, const char* device, const char* fsName,
8209 	uint32 flags, const char* args, size_t argsLength)
8210 {
8211 	KPath pathBuffer(path);
8212 	if (pathBuffer.InitCheck() != B_OK)
8213 		return B_NO_MEMORY;
8214 
8215 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8216 }
8217 
8218 
8219 status_t
8220 _kern_unmount(const char* path, uint32 flags)
8221 {
8222 	KPath pathBuffer(path);
8223 	if (pathBuffer.InitCheck() != B_OK)
8224 		return B_NO_MEMORY;
8225 
8226 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8227 }
8228 
8229 
8230 status_t
8231 _kern_read_fs_info(dev_t device, struct fs_info* info)
8232 {
8233 	if (info == NULL)
8234 		return B_BAD_VALUE;
8235 
8236 	return fs_read_info(device, info);
8237 }
8238 
8239 
8240 status_t
8241 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8242 {
8243 	if (info == NULL)
8244 		return B_BAD_VALUE;
8245 
8246 	return fs_write_info(device, info, mask);
8247 }
8248 
8249 
8250 status_t
8251 _kern_sync(void)
8252 {
8253 	// Note: _kern_sync() is also called from _user_sync()
8254 	int32 cookie = 0;
8255 	dev_t device;
8256 	while ((device = next_dev(&cookie)) >= 0) {
8257 		status_t status = fs_sync(device);
8258 		if (status != B_OK && status != B_BAD_VALUE) {
8259 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8260 				strerror(status));
8261 		}
8262 	}
8263 
8264 	return B_OK;
8265 }
8266 
8267 
8268 dev_t
8269 _kern_next_device(int32* _cookie)
8270 {
8271 	return fs_next_device(_cookie);
8272 }
8273 
8274 
8275 status_t
8276 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8277 	size_t infoSize)
8278 {
8279 	if (infoSize != sizeof(fd_info))
8280 		return B_BAD_VALUE;
8281 
8282 	// get the team
8283 	Team* team = Team::Get(teamID);
8284 	if (team == NULL)
8285 		return B_BAD_TEAM_ID;
8286 	BReference<Team> teamReference(team, true);
8287 
8288 	// now that we have a team reference, its I/O context won't go away
8289 	io_context* context = team->io_context;
8290 	MutexLocker contextLocker(context->io_mutex);
8291 
8292 	uint32 slot = *_cookie;
8293 
8294 	struct file_descriptor* descriptor;
8295 	while (slot < context->table_size
8296 		&& (descriptor = context->fds[slot]) == NULL) {
8297 		slot++;
8298 	}
8299 
8300 	if (slot >= context->table_size)
8301 		return B_ENTRY_NOT_FOUND;
8302 
8303 	info->number = slot;
8304 	info->open_mode = descriptor->open_mode;
8305 
8306 	struct vnode* vnode = fd_vnode(descriptor);
8307 	if (vnode != NULL) {
8308 		info->device = vnode->device;
8309 		info->node = vnode->id;
8310 	} else if (descriptor->u.mount != NULL) {
8311 		info->device = descriptor->u.mount->id;
8312 		info->node = -1;
8313 	}
8314 
8315 	*_cookie = slot + 1;
8316 	return B_OK;
8317 }
8318 
8319 
8320 int
8321 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8322 	int perms)
8323 {
8324 	if ((openMode & O_CREAT) != 0) {
8325 		return file_create_entry_ref(device, inode, name, openMode, perms,
8326 			true);
8327 	}
8328 
8329 	return file_open_entry_ref(device, inode, name, openMode, true);
8330 }
8331 
8332 
8333 /*!	\brief Opens a node specified by a FD + path pair.
8334 
8335 	At least one of \a fd and \a path must be specified.
8336 	If only \a fd is given, the function opens the node identified by this
8337 	FD. If only a path is given, this path is opened. If both are given and
8338 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8339 	of the directory (!) identified by \a fd.
8340 
8341 	\param fd The FD. May be < 0.
8342 	\param path The absolute or relative path. May be \c NULL.
8343 	\param openMode The open mode.
8344 	\return A FD referring to the newly opened node, or an error code,
8345 			if an error occurs.
8346 */
8347 int
8348 _kern_open(int fd, const char* path, int openMode, int perms)
8349 {
8350 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8351 	if (pathBuffer.InitCheck() != B_OK)
8352 		return B_NO_MEMORY;
8353 
8354 	if ((openMode & O_CREAT) != 0)
8355 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8356 
8357 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8358 }
8359 
8360 
8361 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8362 
8363 	The supplied name may be \c NULL, in which case directory identified
8364 	by \a device and \a inode will be opened. Otherwise \a device and
8365 	\a inode identify the parent directory of the directory to be opened
8366 	and \a name its entry name.
8367 
8368 	\param device If \a name is specified the ID of the device the parent
8369 		   directory of the directory to be opened resides on, otherwise
8370 		   the device of the directory itself.
8371 	\param inode If \a name is specified the node ID of the parent
8372 		   directory of the directory to be opened, otherwise node ID of the
8373 		   directory itself.
8374 	\param name The entry name of the directory to be opened. If \c NULL,
8375 		   the \a device + \a inode pair identify the node to be opened.
8376 	\return The FD of the newly opened directory or an error code, if
8377 			something went wrong.
8378 */
8379 int
8380 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8381 {
8382 	return dir_open_entry_ref(device, inode, name, true);
8383 }
8384 
8385 
8386 /*!	\brief Opens a directory specified by a FD + path pair.
8387 
8388 	At least one of \a fd and \a path must be specified.
8389 	If only \a fd is given, the function opens the directory identified by this
8390 	FD. If only a path is given, this path is opened. If both are given and
8391 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8392 	of the directory (!) identified by \a fd.
8393 
8394 	\param fd The FD. May be < 0.
8395 	\param path The absolute or relative path. May be \c NULL.
8396 	\return A FD referring to the newly opened directory, or an error code,
8397 			if an error occurs.
8398 */
8399 int
8400 _kern_open_dir(int fd, const char* path)
8401 {
8402 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8403 	if (pathBuffer.InitCheck() != B_OK)
8404 		return B_NO_MEMORY;
8405 
8406 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8407 }
8408 
8409 
8410 status_t
8411 _kern_fcntl(int fd, int op, size_t argument)
8412 {
8413 	return common_fcntl(fd, op, argument, true);
8414 }
8415 
8416 
8417 status_t
8418 _kern_fsync(int fd)
8419 {
8420 	return common_sync(fd, true);
8421 }
8422 
8423 
8424 status_t
8425 _kern_lock_node(int fd)
8426 {
8427 	return common_lock_node(fd, true);
8428 }
8429 
8430 
8431 status_t
8432 _kern_unlock_node(int fd)
8433 {
8434 	return common_unlock_node(fd, true);
8435 }
8436 
8437 
8438 status_t
8439 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8440 	int perms)
8441 {
8442 	return dir_create_entry_ref(device, inode, name, perms, true);
8443 }
8444 
8445 
8446 /*!	\brief Creates a directory specified by a FD + path pair.
8447 
8448 	\a path must always be specified (it contains the name of the new directory
8449 	at least). If only a path is given, this path identifies the location at
8450 	which the directory shall be created. If both \a fd and \a path are given
8451 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8452 	of the directory (!) identified by \a fd.
8453 
8454 	\param fd The FD. May be < 0.
8455 	\param path The absolute or relative path. Must not be \c NULL.
8456 	\param perms The access permissions the new directory shall have.
8457 	\return \c B_OK, if the directory has been created successfully, another
8458 			error code otherwise.
8459 */
8460 status_t
8461 _kern_create_dir(int fd, const char* path, int perms)
8462 {
8463 	KPath pathBuffer(path, KPath::DEFAULT);
8464 	if (pathBuffer.InitCheck() != B_OK)
8465 		return B_NO_MEMORY;
8466 
8467 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8468 }
8469 
8470 
8471 status_t
8472 _kern_remove_dir(int fd, const char* path)
8473 {
8474 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8475 	if (pathBuffer.InitCheck() != B_OK)
8476 		return B_NO_MEMORY;
8477 
8478 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8479 }
8480 
8481 
8482 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8483 
8484 	At least one of \a fd and \a path must be specified.
8485 	If only \a fd is given, the function the symlink to be read is the node
8486 	identified by this FD. If only a path is given, this path identifies the
8487 	symlink to be read. If both are given and the path is absolute, \a fd is
8488 	ignored; a relative path is reckoned off of the directory (!) identified
8489 	by \a fd.
8490 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8491 	will still be updated to reflect the required buffer size.
8492 
8493 	\param fd The FD. May be < 0.
8494 	\param path The absolute or relative path. May be \c NULL.
8495 	\param buffer The buffer into which the contents of the symlink shall be
8496 		   written.
8497 	\param _bufferSize A pointer to the size of the supplied buffer.
8498 	\return The length of the link on success or an appropriate error code
8499 */
8500 status_t
8501 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8502 {
8503 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8504 	if (pathBuffer.InitCheck() != B_OK)
8505 		return B_NO_MEMORY;
8506 
8507 	return common_read_link(fd, pathBuffer.LockBuffer(),
8508 		buffer, _bufferSize, true);
8509 }
8510 
8511 
8512 /*!	\brief Creates a symlink specified by a FD + path pair.
8513 
8514 	\a path must always be specified (it contains the name of the new symlink
8515 	at least). If only a path is given, this path identifies the location at
8516 	which the symlink shall be created. If both \a fd and \a path are given and
8517 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8518 	of the directory (!) identified by \a fd.
8519 
8520 	\param fd The FD. May be < 0.
8521 	\param toPath The absolute or relative path. Must not be \c NULL.
8522 	\param mode The access permissions the new symlink shall have.
8523 	\return \c B_OK, if the symlink has been created successfully, another
8524 			error code otherwise.
8525 */
8526 status_t
8527 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8528 {
8529 	KPath pathBuffer(path);
8530 	if (pathBuffer.InitCheck() != B_OK)
8531 		return B_NO_MEMORY;
8532 
8533 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8534 		toPath, mode, true);
8535 }
8536 
8537 
8538 status_t
8539 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8540 	bool traverseLeafLink)
8541 {
8542 	KPath pathBuffer(path);
8543 	KPath toPathBuffer(toPath);
8544 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8545 		return B_NO_MEMORY;
8546 
8547 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8548 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8549 }
8550 
8551 
8552 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8553 
8554 	\a path must always be specified (it contains at least the name of the entry
8555 	to be deleted). If only a path is given, this path identifies the entry
8556 	directly. If both \a fd and \a path are given and the path is absolute,
8557 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8558 	identified by \a fd.
8559 
8560 	\param fd The FD. May be < 0.
8561 	\param path The absolute or relative path. Must not be \c NULL.
8562 	\return \c B_OK, if the entry has been removed successfully, another
8563 			error code otherwise.
8564 */
8565 status_t
8566 _kern_unlink(int fd, const char* path)
8567 {
8568 	KPath pathBuffer(path);
8569 	if (pathBuffer.InitCheck() != B_OK)
8570 		return B_NO_MEMORY;
8571 
8572 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8573 }
8574 
8575 
8576 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8577 		   by another FD + path pair.
8578 
8579 	\a oldPath and \a newPath must always be specified (they contain at least
8580 	the name of the entry). If only a path is given, this path identifies the
8581 	entry directly. If both a FD and a path are given and the path is absolute,
8582 	the FD is ignored; a relative path is reckoned off of the directory (!)
8583 	identified by the respective FD.
8584 
8585 	\param oldFD The FD of the old location. May be < 0.
8586 	\param oldPath The absolute or relative path of the old location. Must not
8587 		   be \c NULL.
8588 	\param newFD The FD of the new location. May be < 0.
8589 	\param newPath The absolute or relative path of the new location. Must not
8590 		   be \c NULL.
8591 	\return \c B_OK, if the entry has been moved successfully, another
8592 			error code otherwise.
8593 */
8594 status_t
8595 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8596 {
8597 	KPath oldPathBuffer(oldPath);
8598 	KPath newPathBuffer(newPath);
8599 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8600 		return B_NO_MEMORY;
8601 
8602 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8603 		newFD, newPathBuffer.LockBuffer(), true);
8604 }
8605 
8606 
8607 status_t
8608 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8609 {
8610 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8611 	if (pathBuffer.InitCheck() != B_OK)
8612 		return B_NO_MEMORY;
8613 
8614 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8615 		true);
8616 }
8617 
8618 
8619 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8620 
8621 	If only \a fd is given, the stat operation associated with the type
8622 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8623 	given, this path identifies the entry for whose node to retrieve the
8624 	stat data. If both \a fd and \a path are given and the path is absolute,
8625 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8626 	identified by \a fd and specifies the entry whose stat data shall be
8627 	retrieved.
8628 
8629 	\param fd The FD. May be < 0.
8630 	\param path The absolute or relative path. Must not be \c NULL.
8631 	\param traverseLeafLink If \a path is given, \c true specifies that the
8632 		   function shall not stick to symlinks, but traverse them.
8633 	\param stat The buffer the stat data shall be written into.
8634 	\param statSize The size of the supplied stat buffer.
8635 	\return \c B_OK, if the the stat data have been read successfully, another
8636 			error code otherwise.
8637 */
8638 status_t
8639 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8640 	struct stat* stat, size_t statSize)
8641 {
8642 	struct stat completeStat;
8643 	struct stat* originalStat = NULL;
8644 	status_t status;
8645 
8646 	if (statSize > sizeof(struct stat))
8647 		return B_BAD_VALUE;
8648 
8649 	// this supports different stat extensions
8650 	if (statSize < sizeof(struct stat)) {
8651 		originalStat = stat;
8652 		stat = &completeStat;
8653 	}
8654 
8655 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8656 
8657 	if (status == B_OK && originalStat != NULL)
8658 		memcpy(originalStat, stat, statSize);
8659 
8660 	return status;
8661 }
8662 
8663 
8664 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8665 
8666 	If only \a fd is given, the stat operation associated with the type
8667 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8668 	given, this path identifies the entry for whose node to write the
8669 	stat data. If both \a fd and \a path are given and the path is absolute,
8670 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8671 	identified by \a fd and specifies the entry whose stat data shall be
8672 	written.
8673 
8674 	\param fd The FD. May be < 0.
8675 	\param path The absolute or relative path. May be \c NULL.
8676 	\param traverseLeafLink If \a path is given, \c true specifies that the
8677 		   function shall not stick to symlinks, but traverse them.
8678 	\param stat The buffer containing the stat data to be written.
8679 	\param statSize The size of the supplied stat buffer.
8680 	\param statMask A mask specifying which parts of the stat data shall be
8681 		   written.
8682 	\return \c B_OK, if the the stat data have been written successfully,
8683 			another error code otherwise.
8684 */
8685 status_t
8686 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8687 	const struct stat* stat, size_t statSize, int statMask)
8688 {
8689 	struct stat completeStat;
8690 
8691 	if (statSize > sizeof(struct stat))
8692 		return B_BAD_VALUE;
8693 
8694 	// this supports different stat extensions
8695 	if (statSize < sizeof(struct stat)) {
8696 		memset((uint8*)&completeStat + statSize, 0,
8697 			sizeof(struct stat) - statSize);
8698 		memcpy(&completeStat, stat, statSize);
8699 		stat = &completeStat;
8700 	}
8701 
8702 	status_t status;
8703 
8704 	if (path != NULL) {
8705 		// path given: write the stat of the node referred to by (fd, path)
8706 		KPath pathBuffer(path);
8707 		if (pathBuffer.InitCheck() != B_OK)
8708 			return B_NO_MEMORY;
8709 
8710 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8711 			traverseLeafLink, stat, statMask, true);
8712 	} else {
8713 		// no path given: get the FD and use the FD operation
8714 		struct file_descriptor* descriptor
8715 			= get_fd(get_current_io_context(true), fd);
8716 		if (descriptor == NULL)
8717 			return B_FILE_ERROR;
8718 
8719 		if (descriptor->ops->fd_write_stat)
8720 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8721 		else
8722 			status = B_UNSUPPORTED;
8723 
8724 		put_fd(descriptor);
8725 	}
8726 
8727 	return status;
8728 }
8729 
8730 
8731 int
8732 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8733 {
8734 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8735 	if (pathBuffer.InitCheck() != B_OK)
8736 		return B_NO_MEMORY;
8737 
8738 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8739 }
8740 
8741 
8742 int
8743 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8744 	int openMode)
8745 {
8746 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8747 	if (pathBuffer.InitCheck() != B_OK)
8748 		return B_NO_MEMORY;
8749 
8750 	if ((openMode & O_CREAT) != 0) {
8751 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8752 			true);
8753 	}
8754 
8755 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8756 }
8757 
8758 
8759 status_t
8760 _kern_remove_attr(int fd, const char* name)
8761 {
8762 	return attr_remove(fd, name, true);
8763 }
8764 
8765 
8766 status_t
8767 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8768 	const char* toName)
8769 {
8770 	return attr_rename(fromFile, fromName, toFile, toName, true);
8771 }
8772 
8773 
8774 int
8775 _kern_open_index_dir(dev_t device)
8776 {
8777 	return index_dir_open(device, true);
8778 }
8779 
8780 
8781 status_t
8782 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8783 {
8784 	return index_create(device, name, type, flags, true);
8785 }
8786 
8787 
8788 status_t
8789 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8790 {
8791 	return index_name_read_stat(device, name, stat, true);
8792 }
8793 
8794 
8795 status_t
8796 _kern_remove_index(dev_t device, const char* name)
8797 {
8798 	return index_remove(device, name, true);
8799 }
8800 
8801 
8802 status_t
8803 _kern_getcwd(char* buffer, size_t size)
8804 {
8805 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8806 
8807 	// Call vfs to get current working directory
8808 	return get_cwd(buffer, size, true);
8809 }
8810 
8811 
8812 status_t
8813 _kern_setcwd(int fd, const char* path)
8814 {
8815 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8816 	if (pathBuffer.InitCheck() != B_OK)
8817 		return B_NO_MEMORY;
8818 
8819 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8820 }
8821 
8822 
8823 //	#pragma mark - userland syscalls
8824 
8825 
8826 dev_t
8827 _user_mount(const char* userPath, const char* userDevice,
8828 	const char* userFileSystem, uint32 flags, const char* userArgs,
8829 	size_t argsLength)
8830 {
8831 	char fileSystem[B_FILE_NAME_LENGTH];
8832 	KPath path, device;
8833 	char* args = NULL;
8834 	status_t status;
8835 
8836 	if (!IS_USER_ADDRESS(userPath))
8837 		return B_BAD_ADDRESS;
8838 
8839 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8840 		return B_NO_MEMORY;
8841 
8842 	status = user_copy_name(path.LockBuffer(), userPath,
8843 		B_PATH_NAME_LENGTH);
8844 	if (status != B_OK)
8845 		return status;
8846 	path.UnlockBuffer();
8847 
8848 	if (userFileSystem != NULL) {
8849 		if (!IS_USER_ADDRESS(userFileSystem))
8850 			return B_BAD_ADDRESS;
8851 
8852 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8853 		if (status != B_OK)
8854 			return status;
8855 	}
8856 
8857 	if (userDevice != NULL) {
8858 		if (!IS_USER_ADDRESS(userDevice))
8859 			return B_BAD_ADDRESS;
8860 
8861 		status = user_copy_name(device.LockBuffer(), userDevice,
8862 			B_PATH_NAME_LENGTH);
8863 		if (status != B_OK)
8864 			return status;
8865 		device.UnlockBuffer();
8866 	}
8867 
8868 	if (userArgs != NULL && argsLength > 0) {
8869 		if (!IS_USER_ADDRESS(userArgs))
8870 			return B_BAD_ADDRESS;
8871 
8872 		// this is a safety restriction
8873 		if (argsLength >= 65536)
8874 			return B_NAME_TOO_LONG;
8875 
8876 		args = (char*)malloc(argsLength + 1);
8877 		if (args == NULL)
8878 			return B_NO_MEMORY;
8879 
8880 		status = user_copy_name(args, userArgs, argsLength + 1);
8881 		if (status != B_OK) {
8882 			free(args);
8883 			return status;
8884 		}
8885 	}
8886 
8887 	status = fs_mount(path.LockBuffer(),
8888 		userDevice != NULL ? device.Path() : NULL,
8889 		userFileSystem ? fileSystem : NULL, flags, args, false);
8890 
8891 	free(args);
8892 	return status;
8893 }
8894 
8895 
8896 status_t
8897 _user_unmount(const char* userPath, uint32 flags)
8898 {
8899 	if (!IS_USER_ADDRESS(userPath))
8900 		return B_BAD_ADDRESS;
8901 
8902 	KPath pathBuffer;
8903 	if (pathBuffer.InitCheck() != B_OK)
8904 		return B_NO_MEMORY;
8905 
8906 	char* path = pathBuffer.LockBuffer();
8907 
8908 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8909 	if (status != B_OK)
8910 		return status;
8911 
8912 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8913 }
8914 
8915 
8916 status_t
8917 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8918 {
8919 	struct fs_info info;
8920 	status_t status;
8921 
8922 	if (userInfo == NULL)
8923 		return B_BAD_VALUE;
8924 
8925 	if (!IS_USER_ADDRESS(userInfo))
8926 		return B_BAD_ADDRESS;
8927 
8928 	status = fs_read_info(device, &info);
8929 	if (status != B_OK)
8930 		return status;
8931 
8932 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8933 		return B_BAD_ADDRESS;
8934 
8935 	return B_OK;
8936 }
8937 
8938 
8939 status_t
8940 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8941 {
8942 	struct fs_info info;
8943 
8944 	if (userInfo == NULL)
8945 		return B_BAD_VALUE;
8946 
8947 	if (!IS_USER_ADDRESS(userInfo)
8948 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8949 		return B_BAD_ADDRESS;
8950 
8951 	return fs_write_info(device, &info, mask);
8952 }
8953 
8954 
8955 dev_t
8956 _user_next_device(int32* _userCookie)
8957 {
8958 	int32 cookie;
8959 	dev_t device;
8960 
8961 	if (!IS_USER_ADDRESS(_userCookie)
8962 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8963 		return B_BAD_ADDRESS;
8964 
8965 	device = fs_next_device(&cookie);
8966 
8967 	if (device >= B_OK) {
8968 		// update user cookie
8969 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8970 			return B_BAD_ADDRESS;
8971 	}
8972 
8973 	return device;
8974 }
8975 
8976 
8977 status_t
8978 _user_sync(void)
8979 {
8980 	return _kern_sync();
8981 }
8982 
8983 
8984 status_t
8985 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8986 	size_t infoSize)
8987 {
8988 	struct fd_info info;
8989 	uint32 cookie;
8990 
8991 	// only root can do this
8992 	if (geteuid() != 0)
8993 		return B_NOT_ALLOWED;
8994 
8995 	if (infoSize != sizeof(fd_info))
8996 		return B_BAD_VALUE;
8997 
8998 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8999 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9000 		return B_BAD_ADDRESS;
9001 
9002 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9003 	if (status != B_OK)
9004 		return status;
9005 
9006 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9007 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9008 		return B_BAD_ADDRESS;
9009 
9010 	return status;
9011 }
9012 
9013 
9014 status_t
9015 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9016 	char* userPath, size_t pathLength)
9017 {
9018 	if (!IS_USER_ADDRESS(userPath))
9019 		return B_BAD_ADDRESS;
9020 
9021 	KPath path;
9022 	if (path.InitCheck() != B_OK)
9023 		return B_NO_MEMORY;
9024 
9025 	// copy the leaf name onto the stack
9026 	char stackLeaf[B_FILE_NAME_LENGTH];
9027 	if (leaf != NULL) {
9028 		if (!IS_USER_ADDRESS(leaf))
9029 			return B_BAD_ADDRESS;
9030 
9031 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9032 		if (status != B_OK)
9033 			return status;
9034 
9035 		leaf = stackLeaf;
9036 	}
9037 
9038 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9039 		false, path.LockBuffer(), path.BufferSize());
9040 	if (status != B_OK)
9041 		return status;
9042 
9043 	path.UnlockBuffer();
9044 
9045 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9046 	if (length < 0)
9047 		return length;
9048 	if (length >= (int)pathLength)
9049 		return B_BUFFER_OVERFLOW;
9050 
9051 	return B_OK;
9052 }
9053 
9054 
9055 status_t
9056 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9057 {
9058 	if (userPath == NULL || buffer == NULL)
9059 		return B_BAD_VALUE;
9060 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9061 		return B_BAD_ADDRESS;
9062 
9063 	// copy path from userland
9064 	KPath pathBuffer;
9065 	if (pathBuffer.InitCheck() != B_OK)
9066 		return B_NO_MEMORY;
9067 	char* path = pathBuffer.LockBuffer();
9068 
9069 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9070 	if (status != B_OK)
9071 		return status;
9072 
9073 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9074 		false);
9075 	if (error != B_OK)
9076 		return error;
9077 
9078 	// copy back to userland
9079 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9080 	if (len < 0)
9081 		return len;
9082 	if (len >= B_PATH_NAME_LENGTH)
9083 		return B_BUFFER_OVERFLOW;
9084 
9085 	return B_OK;
9086 }
9087 
9088 
9089 int
9090 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9091 	int openMode, int perms)
9092 {
9093 	char name[B_FILE_NAME_LENGTH];
9094 
9095 	if (userName == NULL || device < 0 || inode < 0)
9096 		return B_BAD_VALUE;
9097 	if (!IS_USER_ADDRESS(userName))
9098 		return B_BAD_ADDRESS;
9099 	status_t status = user_copy_name(name, userName, sizeof(name));
9100 	if (status != B_OK)
9101 		return status;
9102 
9103 	if ((openMode & O_CREAT) != 0) {
9104 		return file_create_entry_ref(device, inode, name, openMode, perms,
9105 			false);
9106 	}
9107 
9108 	return file_open_entry_ref(device, inode, name, openMode, false);
9109 }
9110 
9111 
9112 int
9113 _user_open(int fd, const char* userPath, int openMode, int perms)
9114 {
9115 	KPath path;
9116 	if (path.InitCheck() != B_OK)
9117 		return B_NO_MEMORY;
9118 
9119 	char* buffer = path.LockBuffer();
9120 
9121 	if (!IS_USER_ADDRESS(userPath))
9122 		return B_BAD_ADDRESS;
9123 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9124 	if (status != B_OK)
9125 		return status;
9126 
9127 	if ((openMode & O_CREAT) != 0)
9128 		return file_create(fd, buffer, openMode, perms, false);
9129 
9130 	return file_open(fd, buffer, openMode, false);
9131 }
9132 
9133 
9134 int
9135 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9136 {
9137 	if (userName != NULL) {
9138 		char name[B_FILE_NAME_LENGTH];
9139 
9140 		if (!IS_USER_ADDRESS(userName))
9141 			return B_BAD_ADDRESS;
9142 		status_t status = user_copy_name(name, userName, sizeof(name));
9143 		if (status != B_OK)
9144 			return status;
9145 
9146 		return dir_open_entry_ref(device, inode, name, false);
9147 	}
9148 	return dir_open_entry_ref(device, inode, NULL, false);
9149 }
9150 
9151 
9152 int
9153 _user_open_dir(int fd, const char* userPath)
9154 {
9155 	if (userPath == NULL)
9156 		return dir_open(fd, NULL, false);
9157 
9158 	KPath path;
9159 	if (path.InitCheck() != B_OK)
9160 		return B_NO_MEMORY;
9161 
9162 	char* buffer = path.LockBuffer();
9163 
9164 	if (!IS_USER_ADDRESS(userPath))
9165 		return B_BAD_ADDRESS;
9166 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9167 	if (status != B_OK)
9168 		return status;
9169 
9170 	return dir_open(fd, buffer, false);
9171 }
9172 
9173 
9174 /*!	\brief Opens a directory's parent directory and returns the entry name
9175 		   of the former.
9176 
9177 	Aside from that it returns the directory's entry name, this method is
9178 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9179 	equivalent, if \a userName is \c NULL.
9180 
9181 	If a name buffer is supplied and the name does not fit the buffer, the
9182 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9183 
9184 	\param fd A FD referring to a directory.
9185 	\param userName Buffer the directory's entry name shall be written into.
9186 		   May be \c NULL.
9187 	\param nameLength Size of the name buffer.
9188 	\return The file descriptor of the opened parent directory, if everything
9189 			went fine, an error code otherwise.
9190 */
9191 int
9192 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9193 {
9194 	bool kernel = false;
9195 
9196 	if (userName && !IS_USER_ADDRESS(userName))
9197 		return B_BAD_ADDRESS;
9198 
9199 	// open the parent dir
9200 	int parentFD = dir_open(fd, (char*)"..", kernel);
9201 	if (parentFD < 0)
9202 		return parentFD;
9203 	FDCloser fdCloser(parentFD, kernel);
9204 
9205 	if (userName) {
9206 		// get the vnodes
9207 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9208 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9209 		VNodePutter parentVNodePutter(parentVNode);
9210 		VNodePutter dirVNodePutter(dirVNode);
9211 		if (!parentVNode || !dirVNode)
9212 			return B_FILE_ERROR;
9213 
9214 		// get the vnode name
9215 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9216 		struct dirent* buffer = (struct dirent*)_buffer;
9217 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9218 			sizeof(_buffer), get_current_io_context(false));
9219 		if (status != B_OK)
9220 			return status;
9221 
9222 		// copy the name to the userland buffer
9223 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9224 		if (len < 0)
9225 			return len;
9226 		if (len >= (int)nameLength)
9227 			return B_BUFFER_OVERFLOW;
9228 	}
9229 
9230 	return fdCloser.Detach();
9231 }
9232 
9233 
9234 status_t
9235 _user_fcntl(int fd, int op, size_t argument)
9236 {
9237 	status_t status = common_fcntl(fd, op, argument, false);
9238 	if (op == F_SETLKW)
9239 		syscall_restart_handle_post(status);
9240 
9241 	return status;
9242 }
9243 
9244 
9245 status_t
9246 _user_fsync(int fd)
9247 {
9248 	return common_sync(fd, false);
9249 }
9250 
9251 
9252 status_t
9253 _user_flock(int fd, int operation)
9254 {
9255 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9256 
9257 	// Check if the operation is valid
9258 	switch (operation & ~LOCK_NB) {
9259 		case LOCK_UN:
9260 		case LOCK_SH:
9261 		case LOCK_EX:
9262 			break;
9263 
9264 		default:
9265 			return B_BAD_VALUE;
9266 	}
9267 
9268 	struct file_descriptor* descriptor;
9269 	struct vnode* vnode;
9270 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9271 	if (descriptor == NULL)
9272 		return B_FILE_ERROR;
9273 
9274 	if (descriptor->type != FDTYPE_FILE) {
9275 		put_fd(descriptor);
9276 		return B_BAD_VALUE;
9277 	}
9278 
9279 	struct flock flock;
9280 	flock.l_start = 0;
9281 	flock.l_len = OFF_MAX;
9282 	flock.l_whence = 0;
9283 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9284 
9285 	status_t status;
9286 	if ((operation & LOCK_UN) != 0) {
9287 		if (HAS_FS_CALL(vnode, release_lock))
9288 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9289 		else
9290 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9291 	} else {
9292 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9293 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9294 				(operation & LOCK_NB) == 0);
9295 		} else {
9296 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9297 				(operation & LOCK_NB) == 0);
9298 		}
9299 	}
9300 
9301 	syscall_restart_handle_post(status);
9302 
9303 	put_fd(descriptor);
9304 	return status;
9305 }
9306 
9307 
9308 status_t
9309 _user_lock_node(int fd)
9310 {
9311 	return common_lock_node(fd, false);
9312 }
9313 
9314 
9315 status_t
9316 _user_unlock_node(int fd)
9317 {
9318 	return common_unlock_node(fd, false);
9319 }
9320 
9321 
9322 status_t
9323 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9324 	int perms)
9325 {
9326 	char name[B_FILE_NAME_LENGTH];
9327 	status_t status;
9328 
9329 	if (!IS_USER_ADDRESS(userName))
9330 		return B_BAD_ADDRESS;
9331 
9332 	status = user_copy_name(name, userName, sizeof(name));
9333 	if (status != B_OK)
9334 		return status;
9335 
9336 	return dir_create_entry_ref(device, inode, name, perms, false);
9337 }
9338 
9339 
9340 status_t
9341 _user_create_dir(int fd, const char* userPath, int perms)
9342 {
9343 	KPath pathBuffer;
9344 	if (pathBuffer.InitCheck() != B_OK)
9345 		return B_NO_MEMORY;
9346 
9347 	char* path = pathBuffer.LockBuffer();
9348 
9349 	if (!IS_USER_ADDRESS(userPath))
9350 		return B_BAD_ADDRESS;
9351 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9352 	if (status != B_OK)
9353 		return status;
9354 
9355 	return dir_create(fd, path, perms, false);
9356 }
9357 
9358 
9359 status_t
9360 _user_remove_dir(int fd, const char* userPath)
9361 {
9362 	KPath pathBuffer;
9363 	if (pathBuffer.InitCheck() != B_OK)
9364 		return B_NO_MEMORY;
9365 
9366 	char* path = pathBuffer.LockBuffer();
9367 
9368 	if (userPath != NULL) {
9369 		if (!IS_USER_ADDRESS(userPath))
9370 			return B_BAD_ADDRESS;
9371 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9372 		if (status != B_OK)
9373 			return status;
9374 	}
9375 
9376 	return dir_remove(fd, userPath ? path : NULL, false);
9377 }
9378 
9379 
9380 status_t
9381 _user_read_link(int fd, const char* userPath, char* userBuffer,
9382 	size_t* userBufferSize)
9383 {
9384 	KPath pathBuffer, linkBuffer;
9385 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9386 		return B_NO_MEMORY;
9387 
9388 	size_t bufferSize;
9389 
9390 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9391 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9392 		return B_BAD_ADDRESS;
9393 
9394 	char* path = pathBuffer.LockBuffer();
9395 	char* buffer = linkBuffer.LockBuffer();
9396 
9397 	if (userPath) {
9398 		if (!IS_USER_ADDRESS(userPath))
9399 			return B_BAD_ADDRESS;
9400 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9401 		if (status != B_OK)
9402 			return status;
9403 
9404 		if (bufferSize > B_PATH_NAME_LENGTH)
9405 			bufferSize = B_PATH_NAME_LENGTH;
9406 	}
9407 
9408 	size_t newBufferSize = bufferSize;
9409 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9410 		&newBufferSize, false);
9411 
9412 	// we also update the bufferSize in case of errors
9413 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9414 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9415 		return B_BAD_ADDRESS;
9416 
9417 	if (status != B_OK)
9418 		return status;
9419 
9420 	bufferSize = min_c(newBufferSize, bufferSize);
9421 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9422 		return B_BAD_ADDRESS;
9423 
9424 	return B_OK;
9425 }
9426 
9427 
9428 status_t
9429 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9430 	int mode)
9431 {
9432 	KPath pathBuffer;
9433 	KPath toPathBuffer;
9434 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9435 		return B_NO_MEMORY;
9436 
9437 	char* path = pathBuffer.LockBuffer();
9438 	char* toPath = toPathBuffer.LockBuffer();
9439 
9440 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9441 		return B_BAD_ADDRESS;
9442 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9443 	if (status != B_OK)
9444 		return status;
9445 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9446 	if (status != B_OK)
9447 		return status;
9448 
9449 	return common_create_symlink(fd, path, toPath, mode, false);
9450 }
9451 
9452 
9453 status_t
9454 _user_create_link(int pathFD, const char* userPath, int toFD,
9455 	const char* userToPath, bool traverseLeafLink)
9456 {
9457 	KPath pathBuffer;
9458 	KPath toPathBuffer;
9459 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9460 		return B_NO_MEMORY;
9461 
9462 	char* path = pathBuffer.LockBuffer();
9463 	char* toPath = toPathBuffer.LockBuffer();
9464 
9465 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9466 		return B_BAD_ADDRESS;
9467 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9468 	if (status != B_OK)
9469 		return status;
9470 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9471 	if (status != B_OK)
9472 		return status;
9473 
9474 	status = check_path(toPath);
9475 	if (status != B_OK)
9476 		return status;
9477 
9478 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9479 		false);
9480 }
9481 
9482 
9483 status_t
9484 _user_unlink(int fd, const char* userPath)
9485 {
9486 	KPath pathBuffer;
9487 	if (pathBuffer.InitCheck() != B_OK)
9488 		return B_NO_MEMORY;
9489 
9490 	char* path = pathBuffer.LockBuffer();
9491 
9492 	if (!IS_USER_ADDRESS(userPath))
9493 		return B_BAD_ADDRESS;
9494 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9495 	if (status != B_OK)
9496 		return status;
9497 
9498 	return common_unlink(fd, path, false);
9499 }
9500 
9501 
9502 status_t
9503 _user_rename(int oldFD, const char* userOldPath, int newFD,
9504 	const char* userNewPath)
9505 {
9506 	KPath oldPathBuffer;
9507 	KPath newPathBuffer;
9508 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9509 		return B_NO_MEMORY;
9510 
9511 	char* oldPath = oldPathBuffer.LockBuffer();
9512 	char* newPath = newPathBuffer.LockBuffer();
9513 
9514 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9515 		return B_BAD_ADDRESS;
9516 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9517 	if (status != B_OK)
9518 		return status;
9519 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9520 	if (status != B_OK)
9521 		return status;
9522 
9523 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9524 }
9525 
9526 
9527 status_t
9528 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9529 {
9530 	KPath pathBuffer;
9531 	if (pathBuffer.InitCheck() != B_OK)
9532 		return B_NO_MEMORY;
9533 
9534 	char* path = pathBuffer.LockBuffer();
9535 
9536 	if (!IS_USER_ADDRESS(userPath))
9537 		return B_BAD_ADDRESS;
9538 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9539 	if (status != B_OK)
9540 		return status;
9541 
9542 	// split into directory vnode and filename path
9543 	char filename[B_FILE_NAME_LENGTH];
9544 	struct vnode* dir;
9545 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9546 	if (status != B_OK)
9547 		return status;
9548 
9549 	VNodePutter _(dir);
9550 
9551 	// the underlying FS needs to support creating FIFOs
9552 	if (!HAS_FS_CALL(dir, create_special_node))
9553 		return B_UNSUPPORTED;
9554 
9555 	// create the entry	-- the FIFO sub node is set up automatically
9556 	fs_vnode superVnode;
9557 	ino_t nodeID;
9558 	status = FS_CALL(dir, create_special_node, filename, NULL,
9559 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9560 
9561 	// create_special_node() acquired a reference for us that we don't need.
9562 	if (status == B_OK)
9563 		put_vnode(dir->mount->volume, nodeID);
9564 
9565 	return status;
9566 }
9567 
9568 
9569 status_t
9570 _user_create_pipe(int* userFDs)
9571 {
9572 	// rootfs should support creating FIFOs, but let's be sure
9573 	if (!HAS_FS_CALL(sRoot, create_special_node))
9574 		return B_UNSUPPORTED;
9575 
9576 	// create the node	-- the FIFO sub node is set up automatically
9577 	fs_vnode superVnode;
9578 	ino_t nodeID;
9579 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9580 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9581 	if (status != B_OK)
9582 		return status;
9583 
9584 	// We've got one reference to the node and need another one.
9585 	struct vnode* vnode;
9586 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9587 	if (status != B_OK) {
9588 		// that should not happen
9589 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9590 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9591 		return status;
9592 	}
9593 
9594 	// Everything looks good so far. Open two FDs for reading respectively
9595 	// writing.
9596 	int fds[2];
9597 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9598 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9599 
9600 	FDCloser closer0(fds[0], false);
9601 	FDCloser closer1(fds[1], false);
9602 
9603 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9604 
9605 	// copy FDs to userland
9606 	if (status == B_OK) {
9607 		if (!IS_USER_ADDRESS(userFDs)
9608 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9609 			status = B_BAD_ADDRESS;
9610 		}
9611 	}
9612 
9613 	// keep FDs, if everything went fine
9614 	if (status == B_OK) {
9615 		closer0.Detach();
9616 		closer1.Detach();
9617 	}
9618 
9619 	return status;
9620 }
9621 
9622 
9623 status_t
9624 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9625 {
9626 	KPath pathBuffer;
9627 	if (pathBuffer.InitCheck() != B_OK)
9628 		return B_NO_MEMORY;
9629 
9630 	char* path = pathBuffer.LockBuffer();
9631 
9632 	if (!IS_USER_ADDRESS(userPath))
9633 		return B_BAD_ADDRESS;
9634 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9635 	if (status != B_OK)
9636 		return status;
9637 
9638 	return common_access(fd, path, mode, effectiveUserGroup, false);
9639 }
9640 
9641 
9642 status_t
9643 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9644 	struct stat* userStat, size_t statSize)
9645 {
9646 	struct stat stat = {0};
9647 	status_t status;
9648 
9649 	if (statSize > sizeof(struct stat))
9650 		return B_BAD_VALUE;
9651 
9652 	if (!IS_USER_ADDRESS(userStat))
9653 		return B_BAD_ADDRESS;
9654 
9655 	if (userPath != NULL) {
9656 		// path given: get the stat of the node referred to by (fd, path)
9657 		if (!IS_USER_ADDRESS(userPath))
9658 			return B_BAD_ADDRESS;
9659 
9660 		KPath pathBuffer;
9661 		if (pathBuffer.InitCheck() != B_OK)
9662 			return B_NO_MEMORY;
9663 
9664 		char* path = pathBuffer.LockBuffer();
9665 
9666 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9667 		if (status != B_OK)
9668 			return status;
9669 
9670 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9671 	} else {
9672 		// no path given: get the FD and use the FD operation
9673 		struct file_descriptor* descriptor
9674 			= get_fd(get_current_io_context(false), fd);
9675 		if (descriptor == NULL)
9676 			return B_FILE_ERROR;
9677 
9678 		if (descriptor->ops->fd_read_stat)
9679 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9680 		else
9681 			status = B_UNSUPPORTED;
9682 
9683 		put_fd(descriptor);
9684 	}
9685 
9686 	if (status != B_OK)
9687 		return status;
9688 
9689 	return user_memcpy(userStat, &stat, statSize);
9690 }
9691 
9692 
9693 status_t
9694 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9695 	const struct stat* userStat, size_t statSize, int statMask)
9696 {
9697 	if (statSize > sizeof(struct stat))
9698 		return B_BAD_VALUE;
9699 
9700 	struct stat stat;
9701 
9702 	if (!IS_USER_ADDRESS(userStat)
9703 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9704 		return B_BAD_ADDRESS;
9705 
9706 	// clear additional stat fields
9707 	if (statSize < sizeof(struct stat))
9708 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9709 
9710 	status_t status;
9711 
9712 	if (userPath != NULL) {
9713 		// path given: write the stat of the node referred to by (fd, path)
9714 		if (!IS_USER_ADDRESS(userPath))
9715 			return B_BAD_ADDRESS;
9716 
9717 		KPath pathBuffer;
9718 		if (pathBuffer.InitCheck() != B_OK)
9719 			return B_NO_MEMORY;
9720 
9721 		char* path = pathBuffer.LockBuffer();
9722 
9723 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9724 		if (status != B_OK)
9725 			return status;
9726 
9727 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9728 			statMask, false);
9729 	} else {
9730 		// no path given: get the FD and use the FD operation
9731 		struct file_descriptor* descriptor
9732 			= get_fd(get_current_io_context(false), fd);
9733 		if (descriptor == NULL)
9734 			return B_FILE_ERROR;
9735 
9736 		if (descriptor->ops->fd_write_stat) {
9737 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9738 				statMask);
9739 		} else
9740 			status = B_UNSUPPORTED;
9741 
9742 		put_fd(descriptor);
9743 	}
9744 
9745 	return status;
9746 }
9747 
9748 
9749 int
9750 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9751 {
9752 	KPath pathBuffer;
9753 	if (pathBuffer.InitCheck() != B_OK)
9754 		return B_NO_MEMORY;
9755 
9756 	char* path = pathBuffer.LockBuffer();
9757 
9758 	if (userPath != NULL) {
9759 		if (!IS_USER_ADDRESS(userPath))
9760 			return B_BAD_ADDRESS;
9761 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9762 		if (status != B_OK)
9763 			return status;
9764 	}
9765 
9766 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9767 }
9768 
9769 
9770 ssize_t
9771 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9772 	size_t readBytes)
9773 {
9774 	char attribute[B_FILE_NAME_LENGTH];
9775 
9776 	if (userAttribute == NULL)
9777 		return B_BAD_VALUE;
9778 	if (!IS_USER_ADDRESS(userAttribute))
9779 		return B_BAD_ADDRESS;
9780 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9781 	if (status != B_OK)
9782 		return status;
9783 
9784 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9785 	if (attr < 0)
9786 		return attr;
9787 
9788 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9789 	_user_close(attr);
9790 
9791 	return bytes;
9792 }
9793 
9794 
9795 ssize_t
9796 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9797 	const void* buffer, size_t writeBytes)
9798 {
9799 	char attribute[B_FILE_NAME_LENGTH];
9800 
9801 	if (userAttribute == NULL)
9802 		return B_BAD_VALUE;
9803 	if (!IS_USER_ADDRESS(userAttribute))
9804 		return B_BAD_ADDRESS;
9805 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9806 	if (status != B_OK)
9807 		return status;
9808 
9809 	// Try to support the BeOS typical truncation as well as the position
9810 	// argument
9811 	int attr = attr_create(fd, NULL, attribute, type,
9812 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9813 	if (attr < 0)
9814 		return attr;
9815 
9816 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9817 	_user_close(attr);
9818 
9819 	return bytes;
9820 }
9821 
9822 
9823 status_t
9824 _user_stat_attr(int fd, const char* userAttribute,
9825 	struct attr_info* userAttrInfo)
9826 {
9827 	char attribute[B_FILE_NAME_LENGTH];
9828 
9829 	if (userAttribute == NULL || userAttrInfo == NULL)
9830 		return B_BAD_VALUE;
9831 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9832 		return B_BAD_ADDRESS;
9833 	status_t status = user_copy_name(attribute, userAttribute,
9834 		sizeof(attribute));
9835 	if (status != B_OK)
9836 		return status;
9837 
9838 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9839 	if (attr < 0)
9840 		return attr;
9841 
9842 	struct file_descriptor* descriptor
9843 		= get_fd(get_current_io_context(false), attr);
9844 	if (descriptor == NULL) {
9845 		_user_close(attr);
9846 		return B_FILE_ERROR;
9847 	}
9848 
9849 	struct stat stat;
9850 	if (descriptor->ops->fd_read_stat)
9851 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9852 	else
9853 		status = B_UNSUPPORTED;
9854 
9855 	put_fd(descriptor);
9856 	_user_close(attr);
9857 
9858 	if (status == B_OK) {
9859 		attr_info info;
9860 		info.type = stat.st_type;
9861 		info.size = stat.st_size;
9862 
9863 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9864 			return B_BAD_ADDRESS;
9865 	}
9866 
9867 	return status;
9868 }
9869 
9870 
9871 int
9872 _user_open_attr(int fd, const char* userPath, const char* userName,
9873 	uint32 type, int openMode)
9874 {
9875 	char name[B_FILE_NAME_LENGTH];
9876 
9877 	if (!IS_USER_ADDRESS(userName))
9878 		return B_BAD_ADDRESS;
9879 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9880 	if (status != B_OK)
9881 		return status;
9882 
9883 	KPath pathBuffer;
9884 	if (pathBuffer.InitCheck() != B_OK)
9885 		return B_NO_MEMORY;
9886 
9887 	char* path = pathBuffer.LockBuffer();
9888 
9889 	if (userPath != NULL) {
9890 		if (!IS_USER_ADDRESS(userPath))
9891 			return B_BAD_ADDRESS;
9892 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9893 		if (status != B_OK)
9894 			return status;
9895 	}
9896 
9897 	if ((openMode & O_CREAT) != 0) {
9898 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9899 			false);
9900 	}
9901 
9902 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9903 }
9904 
9905 
9906 status_t
9907 _user_remove_attr(int fd, const char* userName)
9908 {
9909 	char name[B_FILE_NAME_LENGTH];
9910 
9911 	if (!IS_USER_ADDRESS(userName))
9912 		return B_BAD_ADDRESS;
9913 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9914 	if (status != B_OK)
9915 		return status;
9916 
9917 	return attr_remove(fd, name, false);
9918 }
9919 
9920 
9921 status_t
9922 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9923 	const char* userToName)
9924 {
9925 	if (!IS_USER_ADDRESS(userFromName)
9926 		|| !IS_USER_ADDRESS(userToName))
9927 		return B_BAD_ADDRESS;
9928 
9929 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9930 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9931 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9932 		return B_NO_MEMORY;
9933 
9934 	char* fromName = fromNameBuffer.LockBuffer();
9935 	char* toName = toNameBuffer.LockBuffer();
9936 
9937 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9938 	if (status != B_OK)
9939 		return status;
9940 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9941 	if (status != B_OK)
9942 		return status;
9943 
9944 	return attr_rename(fromFile, fromName, toFile, toName, false);
9945 }
9946 
9947 
9948 int
9949 _user_open_index_dir(dev_t device)
9950 {
9951 	return index_dir_open(device, false);
9952 }
9953 
9954 
9955 status_t
9956 _user_create_index(dev_t device, const char* userName, uint32 type,
9957 	uint32 flags)
9958 {
9959 	char name[B_FILE_NAME_LENGTH];
9960 
9961 	if (!IS_USER_ADDRESS(userName))
9962 		return B_BAD_ADDRESS;
9963 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9964 	if (status != B_OK)
9965 		return status;
9966 
9967 	return index_create(device, name, type, flags, false);
9968 }
9969 
9970 
9971 status_t
9972 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9973 {
9974 	char name[B_FILE_NAME_LENGTH];
9975 	struct stat stat = {0};
9976 	status_t status;
9977 
9978 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9979 		return B_BAD_ADDRESS;
9980 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9981 	if (status != B_OK)
9982 		return status;
9983 
9984 	status = index_name_read_stat(device, name, &stat, false);
9985 	if (status == B_OK) {
9986 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9987 			return B_BAD_ADDRESS;
9988 	}
9989 
9990 	return status;
9991 }
9992 
9993 
9994 status_t
9995 _user_remove_index(dev_t device, const char* userName)
9996 {
9997 	char name[B_FILE_NAME_LENGTH];
9998 
9999 	if (!IS_USER_ADDRESS(userName))
10000 		return B_BAD_ADDRESS;
10001 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10002 	if (status != B_OK)
10003 		return status;
10004 
10005 	return index_remove(device, name, false);
10006 }
10007 
10008 
10009 status_t
10010 _user_getcwd(char* userBuffer, size_t size)
10011 {
10012 	if (size == 0)
10013 		return B_BAD_VALUE;
10014 	if (!IS_USER_ADDRESS(userBuffer))
10015 		return B_BAD_ADDRESS;
10016 
10017 	if (size > kMaxPathLength)
10018 		size = kMaxPathLength;
10019 
10020 	KPath pathBuffer(size);
10021 	if (pathBuffer.InitCheck() != B_OK)
10022 		return B_NO_MEMORY;
10023 
10024 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10025 
10026 	char* path = pathBuffer.LockBuffer();
10027 
10028 	status_t status = get_cwd(path, size, false);
10029 	if (status != B_OK)
10030 		return status;
10031 
10032 	// Copy back the result
10033 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10034 		return B_BAD_ADDRESS;
10035 
10036 	return status;
10037 }
10038 
10039 
10040 status_t
10041 _user_setcwd(int fd, const char* userPath)
10042 {
10043 	TRACE(("user_setcwd: path = %p\n", userPath));
10044 
10045 	KPath pathBuffer;
10046 	if (pathBuffer.InitCheck() != B_OK)
10047 		return B_NO_MEMORY;
10048 
10049 	char* path = pathBuffer.LockBuffer();
10050 
10051 	if (userPath != NULL) {
10052 		if (!IS_USER_ADDRESS(userPath))
10053 			return B_BAD_ADDRESS;
10054 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10055 		if (status != B_OK)
10056 			return status;
10057 	}
10058 
10059 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10060 }
10061 
10062 
10063 status_t
10064 _user_change_root(const char* userPath)
10065 {
10066 	// only root is allowed to chroot()
10067 	if (geteuid() != 0)
10068 		return B_NOT_ALLOWED;
10069 
10070 	// alloc path buffer
10071 	KPath pathBuffer;
10072 	if (pathBuffer.InitCheck() != B_OK)
10073 		return B_NO_MEMORY;
10074 
10075 	// copy userland path to kernel
10076 	char* path = pathBuffer.LockBuffer();
10077 	if (userPath != NULL) {
10078 		if (!IS_USER_ADDRESS(userPath))
10079 			return B_BAD_ADDRESS;
10080 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10081 		if (status != B_OK)
10082 			return status;
10083 	}
10084 
10085 	// get the vnode
10086 	struct vnode* vnode;
10087 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
10088 	if (status != B_OK)
10089 		return status;
10090 
10091 	// set the new root
10092 	struct io_context* context = get_current_io_context(false);
10093 	mutex_lock(&sIOContextRootLock);
10094 	struct vnode* oldRoot = context->root;
10095 	context->root = vnode;
10096 	mutex_unlock(&sIOContextRootLock);
10097 
10098 	put_vnode(oldRoot);
10099 
10100 	return B_OK;
10101 }
10102 
10103 
10104 int
10105 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10106 	uint32 flags, port_id port, int32 token)
10107 {
10108 	if (device < 0 || userQuery == NULL || queryLength == 0)
10109 		return B_BAD_VALUE;
10110 
10111 	if (!IS_USER_ADDRESS(userQuery))
10112 		return B_BAD_ADDRESS;
10113 
10114 	// this is a safety restriction
10115 	if (queryLength >= 65536)
10116 		return B_NAME_TOO_LONG;
10117 
10118 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10119 	if (!query.IsValid())
10120 		return B_NO_MEMORY;
10121 
10122 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10123 		return B_BAD_ADDRESS;
10124 
10125 	return query_open(device, query, flags, port, token, false);
10126 }
10127 
10128 
10129 #include "vfs_request_io.cpp"
10130