xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 1f0635d2277dcd0818dc7f539c1cb1b296f6444b)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <vfs.h>
53 #include <vm/vm.h>
54 #include <vm/VMCache.h>
55 #include <wait_for_objects.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 typedef DoublyLinkedList<vnode> VnodeList;
116 
117 /*!	\brief Structure to manage a mounted file system
118 
119 	Note: The root_vnode and root_vnode->covers fields (what others?) are
120 	initialized in fs_mount() and not changed afterwards. That is as soon
121 	as the mount is mounted and it is made sure it won't be unmounted
122 	(e.g. by holding a reference to a vnode of that mount) (read) access
123 	to those fields is always safe, even without additional locking. Morever
124 	while mounted the mount holds a reference to the root_vnode->covers vnode,
125 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
126 	safe if a reference to vnode is held (note that for the root mount
127 	root_vnode->covers is NULL, though).
128 */
129 struct fs_mount {
130 	fs_mount()
131 		:
132 		volume(NULL),
133 		device_name(NULL)
134 	{
135 		mutex_init(&lock, "mount lock");
136 	}
137 
138 	~fs_mount()
139 	{
140 		mutex_destroy(&lock);
141 		free(device_name);
142 
143 		while (volume) {
144 			fs_volume* superVolume = volume->super_volume;
145 
146 			if (volume->file_system != NULL)
147 				put_module(volume->file_system->info.name);
148 
149 			free(volume->file_system_name);
150 			free(volume);
151 			volume = superVolume;
152 		}
153 	}
154 
155 	struct fs_mount* next;
156 	dev_t			id;
157 	fs_volume*		volume;
158 	char*			device_name;
159 	mutex			lock;	// guards the vnodes list
160 	struct vnode*	root_vnode;
161 	struct vnode*	covers_vnode;	// immutable
162 	KPartition*		partition;
163 	VnodeList		vnodes;
164 	EntryCache		entry_cache;
165 	bool			unmounting;
166 	bool			owns_file_device;
167 };
168 
169 
170 namespace {
171 
172 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
173 	list_link		link;
174 	void*			bound_to;
175 	team_id			team;
176 	pid_t			session;
177 	off_t			start;
178 	off_t			end;
179 	bool			shared;
180 };
181 
182 typedef DoublyLinkedList<advisory_lock> LockList;
183 
184 } // namespace
185 
186 
187 struct advisory_locking {
188 	sem_id			lock;
189 	sem_id			wait_sem;
190 	LockList		locks;
191 
192 	advisory_locking()
193 		:
194 		lock(-1),
195 		wait_sem(-1)
196 	{
197 	}
198 
199 	~advisory_locking()
200 	{
201 		if (lock >= 0)
202 			delete_sem(lock);
203 		if (wait_sem >= 0)
204 			delete_sem(wait_sem);
205 	}
206 };
207 
208 /*!	\brief Guards sMountsTable.
209 
210 	The holder is allowed to read/write access the sMountsTable.
211 	Manipulation of the fs_mount structures themselves
212 	(and their destruction) requires different locks though.
213 */
214 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
215 
216 /*!	\brief Guards mount/unmount operations.
217 
218 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
219 	That is locking the lock ensures that no FS is mounted/unmounted. In
220 	particular this means that
221 	- sMountsTable will not be modified,
222 	- the fields immutable after initialization of the fs_mount structures in
223 	  sMountsTable will not be modified,
224 
225 	The thread trying to lock the lock must not hold sVnodeLock or
226 	sMountMutex.
227 */
228 static recursive_lock sMountOpLock;
229 
230 /*!	\brief Guards sVnodeTable.
231 
232 	The holder is allowed read/write access to sVnodeTable and to
233 	any unbusy vnode in that table, save to the immutable fields (device, id,
234 	private_node, mount) to which only read-only access is allowed.
235 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
236 	well as the busy, removed, unused flags, and the vnode's type can also be
237 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
238 	locked. Write access to covered_by and covers requires to write lock
239 	sVnodeLock.
240 
241 	The thread trying to acquire the lock must not hold sMountMutex.
242 	You must not hold this lock when calling create_sem(), as this might call
243 	vfs_free_unused_vnodes() and thus cause a deadlock.
244 */
245 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
246 
247 /*!	\brief Guards io_context::root.
248 
249 	Must be held when setting or getting the io_context::root field.
250 	The only operation allowed while holding this lock besides getting or
251 	setting the field is inc_vnode_ref_count() on io_context::root.
252 */
253 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
254 
255 
256 namespace {
257 
258 struct vnode_hash_key {
259 	dev_t	device;
260 	ino_t	vnode;
261 };
262 
263 struct VnodeHash {
264 	typedef vnode_hash_key	KeyType;
265 	typedef	struct vnode	ValueType;
266 
267 #define VHASH(mountid, vnodeid) \
268 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
269 
270 	size_t HashKey(KeyType key) const
271 	{
272 		return VHASH(key.device, key.vnode);
273 	}
274 
275 	size_t Hash(ValueType* vnode) const
276 	{
277 		return VHASH(vnode->device, vnode->id);
278 	}
279 
280 #undef VHASH
281 
282 	bool Compare(KeyType key, ValueType* vnode) const
283 	{
284 		return vnode->device == key.device && vnode->id == key.vnode;
285 	}
286 
287 	ValueType*& GetLink(ValueType* value) const
288 	{
289 		return value->next;
290 	}
291 };
292 
293 typedef BOpenHashTable<VnodeHash> VnodeTable;
294 
295 
296 struct MountHash {
297 	typedef dev_t			KeyType;
298 	typedef	struct fs_mount	ValueType;
299 
300 	size_t HashKey(KeyType key) const
301 	{
302 		return key;
303 	}
304 
305 	size_t Hash(ValueType* mount) const
306 	{
307 		return mount->id;
308 	}
309 
310 	bool Compare(KeyType key, ValueType* mount) const
311 	{
312 		return mount->id == key;
313 	}
314 
315 	ValueType*& GetLink(ValueType* value) const
316 	{
317 		return value->next;
318 	}
319 };
320 
321 typedef BOpenHashTable<MountHash> MountTable;
322 
323 } // namespace
324 
325 
326 #define VNODE_HASH_TABLE_SIZE 1024
327 static VnodeTable* sVnodeTable;
328 static struct vnode* sRoot;
329 
330 #define MOUNTS_HASH_TABLE_SIZE 16
331 static MountTable* sMountsTable;
332 static dev_t sNextMountID = 1;
333 
334 #define MAX_TEMP_IO_VECS 8
335 
336 // How long to wait for busy vnodes (10s)
337 #define BUSY_VNODE_RETRIES 2000
338 #define BUSY_VNODE_DELAY 5000
339 
340 mode_t __gUmask = 022;
341 
342 /* function declarations */
343 
344 static void free_unused_vnodes();
345 
346 // file descriptor operation prototypes
347 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
348 	void* buffer, size_t* _bytes);
349 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
350 	const void* buffer, size_t* _bytes);
351 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
352 	int seekType);
353 static void file_free_fd(struct file_descriptor* descriptor);
354 static status_t file_close(struct file_descriptor* descriptor);
355 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
356 	struct selectsync* sync);
357 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
358 	struct selectsync* sync);
359 static status_t dir_read(struct io_context* context,
360 	struct file_descriptor* descriptor, struct dirent* buffer,
361 	size_t bufferSize, uint32* _count);
362 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
363 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
364 static status_t dir_rewind(struct file_descriptor* descriptor);
365 static void dir_free_fd(struct file_descriptor* descriptor);
366 static status_t dir_close(struct file_descriptor* descriptor);
367 static status_t attr_dir_read(struct io_context* context,
368 	struct file_descriptor* descriptor, struct dirent* buffer,
369 	size_t bufferSize, uint32* _count);
370 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
371 static void attr_dir_free_fd(struct file_descriptor* descriptor);
372 static status_t attr_dir_close(struct file_descriptor* descriptor);
373 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
374 	void* buffer, size_t* _bytes);
375 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
376 	const void* buffer, size_t* _bytes);
377 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
378 	int seekType);
379 static void attr_free_fd(struct file_descriptor* descriptor);
380 static status_t attr_close(struct file_descriptor* descriptor);
381 static status_t attr_read_stat(struct file_descriptor* descriptor,
382 	struct stat* statData);
383 static status_t attr_write_stat(struct file_descriptor* descriptor,
384 	const struct stat* stat, int statMask);
385 static status_t index_dir_read(struct io_context* context,
386 	struct file_descriptor* descriptor, struct dirent* buffer,
387 	size_t bufferSize, uint32* _count);
388 static status_t index_dir_rewind(struct file_descriptor* descriptor);
389 static void index_dir_free_fd(struct file_descriptor* descriptor);
390 static status_t index_dir_close(struct file_descriptor* descriptor);
391 static status_t query_read(struct io_context* context,
392 	struct file_descriptor* descriptor, struct dirent* buffer,
393 	size_t bufferSize, uint32* _count);
394 static status_t query_rewind(struct file_descriptor* descriptor);
395 static void query_free_fd(struct file_descriptor* descriptor);
396 static status_t query_close(struct file_descriptor* descriptor);
397 
398 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
399 	void* buffer, size_t length);
400 static status_t common_read_stat(struct file_descriptor* descriptor,
401 	struct stat* statData);
402 static status_t common_write_stat(struct file_descriptor* descriptor,
403 	const struct stat* statData, int statMask);
404 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
405 	struct stat* stat, bool kernel);
406 
407 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
408 	bool traverseLeafLink, int count, bool kernel,
409 	struct vnode** _vnode, ino_t* _parentID);
410 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
411 	size_t bufferSize, bool kernel);
412 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
413 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
414 static void inc_vnode_ref_count(struct vnode* vnode);
415 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
416 	bool reenter);
417 static inline void put_vnode(struct vnode* vnode);
418 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
419 	bool kernel);
420 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
421 
422 
423 static struct fd_ops sFileOps = {
424 	file_read,
425 	file_write,
426 	file_seek,
427 	common_ioctl,
428 	NULL,		// set_flags
429 	file_select,
430 	file_deselect,
431 	NULL,		// read_dir()
432 	NULL,		// rewind_dir()
433 	common_read_stat,
434 	common_write_stat,
435 	file_close,
436 	file_free_fd
437 };
438 
439 static struct fd_ops sDirectoryOps = {
440 	NULL,		// read()
441 	NULL,		// write()
442 	NULL,		// seek()
443 	common_ioctl,
444 	NULL,		// set_flags
445 	NULL,		// select()
446 	NULL,		// deselect()
447 	dir_read,
448 	dir_rewind,
449 	common_read_stat,
450 	common_write_stat,
451 	dir_close,
452 	dir_free_fd
453 };
454 
455 static struct fd_ops sAttributeDirectoryOps = {
456 	NULL,		// read()
457 	NULL,		// write()
458 	NULL,		// seek()
459 	common_ioctl,
460 	NULL,		// set_flags
461 	NULL,		// select()
462 	NULL,		// deselect()
463 	attr_dir_read,
464 	attr_dir_rewind,
465 	common_read_stat,
466 	common_write_stat,
467 	attr_dir_close,
468 	attr_dir_free_fd
469 };
470 
471 static struct fd_ops sAttributeOps = {
472 	attr_read,
473 	attr_write,
474 	attr_seek,
475 	common_ioctl,
476 	NULL,		// set_flags
477 	NULL,		// select()
478 	NULL,		// deselect()
479 	NULL,		// read_dir()
480 	NULL,		// rewind_dir()
481 	attr_read_stat,
482 	attr_write_stat,
483 	attr_close,
484 	attr_free_fd
485 };
486 
487 static struct fd_ops sIndexDirectoryOps = {
488 	NULL,		// read()
489 	NULL,		// write()
490 	NULL,		// seek()
491 	NULL,		// ioctl()
492 	NULL,		// set_flags
493 	NULL,		// select()
494 	NULL,		// deselect()
495 	index_dir_read,
496 	index_dir_rewind,
497 	NULL,		// read_stat()
498 	NULL,		// write_stat()
499 	index_dir_close,
500 	index_dir_free_fd
501 };
502 
503 #if 0
504 static struct fd_ops sIndexOps = {
505 	NULL,		// read()
506 	NULL,		// write()
507 	NULL,		// seek()
508 	NULL,		// ioctl()
509 	NULL,		// set_flags
510 	NULL,		// select()
511 	NULL,		// deselect()
512 	NULL,		// dir_read()
513 	NULL,		// dir_rewind()
514 	index_read_stat,	// read_stat()
515 	NULL,		// write_stat()
516 	NULL,		// dir_close()
517 	NULL		// free_fd()
518 };
519 #endif
520 
521 static struct fd_ops sQueryOps = {
522 	NULL,		// read()
523 	NULL,		// write()
524 	NULL,		// seek()
525 	NULL,		// ioctl()
526 	NULL,		// set_flags
527 	NULL,		// select()
528 	NULL,		// deselect()
529 	query_read,
530 	query_rewind,
531 	NULL,		// read_stat()
532 	NULL,		// write_stat()
533 	query_close,
534 	query_free_fd
535 };
536 
537 
538 namespace {
539 
540 class VNodePutter {
541 public:
542 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
543 
544 	~VNodePutter()
545 	{
546 		Put();
547 	}
548 
549 	void SetTo(struct vnode* vnode)
550 	{
551 		Put();
552 		fVNode = vnode;
553 	}
554 
555 	void Put()
556 	{
557 		if (fVNode) {
558 			put_vnode(fVNode);
559 			fVNode = NULL;
560 		}
561 	}
562 
563 	struct vnode* Detach()
564 	{
565 		struct vnode* vnode = fVNode;
566 		fVNode = NULL;
567 		return vnode;
568 	}
569 
570 private:
571 	struct vnode* fVNode;
572 };
573 
574 
575 class FDCloser {
576 public:
577 	FDCloser() : fFD(-1), fKernel(true) {}
578 
579 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
580 
581 	~FDCloser()
582 	{
583 		Close();
584 	}
585 
586 	void SetTo(int fd, bool kernel)
587 	{
588 		Close();
589 		fFD = fd;
590 		fKernel = kernel;
591 	}
592 
593 	void Close()
594 	{
595 		if (fFD >= 0) {
596 			if (fKernel)
597 				_kern_close(fFD);
598 			else
599 				_user_close(fFD);
600 			fFD = -1;
601 		}
602 	}
603 
604 	int Detach()
605 	{
606 		int fd = fFD;
607 		fFD = -1;
608 		return fd;
609 	}
610 
611 private:
612 	int		fFD;
613 	bool	fKernel;
614 };
615 
616 } // namespace
617 
618 
619 #if VFS_PAGES_IO_TRACING
620 
621 namespace VFSPagesIOTracing {
622 
623 class PagesIOTraceEntry : public AbstractTraceEntry {
624 protected:
625 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
626 		const generic_io_vec* vecs, uint32 count, uint32 flags,
627 		generic_size_t bytesRequested, status_t status,
628 		generic_size_t bytesTransferred)
629 		:
630 		fVnode(vnode),
631 		fMountID(vnode->mount->id),
632 		fNodeID(vnode->id),
633 		fCookie(cookie),
634 		fPos(pos),
635 		fCount(count),
636 		fFlags(flags),
637 		fBytesRequested(bytesRequested),
638 		fStatus(status),
639 		fBytesTransferred(bytesTransferred)
640 	{
641 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
642 			sizeof(generic_io_vec) * count, false);
643 	}
644 
645 	void AddDump(TraceOutput& out, const char* mode)
646 	{
647 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
648 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
649 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
650 			(uint64)fBytesRequested);
651 
652 		if (fVecs != NULL) {
653 			for (uint32 i = 0; i < fCount; i++) {
654 				if (i > 0)
655 					out.Print(", ");
656 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
657 					(uint64)fVecs[i].length);
658 			}
659 		}
660 
661 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
662 			"transferred: %" B_PRIu64, fFlags, fStatus,
663 			(uint64)fBytesTransferred);
664 	}
665 
666 protected:
667 	struct vnode*	fVnode;
668 	dev_t			fMountID;
669 	ino_t			fNodeID;
670 	void*			fCookie;
671 	off_t			fPos;
672 	generic_io_vec*	fVecs;
673 	uint32			fCount;
674 	uint32			fFlags;
675 	generic_size_t	fBytesRequested;
676 	status_t		fStatus;
677 	generic_size_t	fBytesTransferred;
678 };
679 
680 
681 class ReadPages : public PagesIOTraceEntry {
682 public:
683 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
684 		const generic_io_vec* vecs, uint32 count, uint32 flags,
685 		generic_size_t bytesRequested, status_t status,
686 		generic_size_t bytesTransferred)
687 		:
688 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
689 			bytesRequested, status, bytesTransferred)
690 	{
691 		Initialized();
692 	}
693 
694 	virtual void AddDump(TraceOutput& out)
695 	{
696 		PagesIOTraceEntry::AddDump(out, "read");
697 	}
698 };
699 
700 
701 class WritePages : public PagesIOTraceEntry {
702 public:
703 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
704 		const generic_io_vec* vecs, uint32 count, uint32 flags,
705 		generic_size_t bytesRequested, status_t status,
706 		generic_size_t bytesTransferred)
707 		:
708 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
709 			bytesRequested, status, bytesTransferred)
710 	{
711 		Initialized();
712 	}
713 
714 	virtual void AddDump(TraceOutput& out)
715 	{
716 		PagesIOTraceEntry::AddDump(out, "write");
717 	}
718 };
719 
720 }	// namespace VFSPagesIOTracing
721 
722 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
723 #else
724 #	define TPIO(x) ;
725 #endif	// VFS_PAGES_IO_TRACING
726 
727 
728 /*! Finds the mounted device (the fs_mount structure) with the given ID.
729 	Note, you must hold the gMountMutex lock when you call this function.
730 */
731 static struct fs_mount*
732 find_mount(dev_t id)
733 {
734 	ASSERT_LOCKED_MUTEX(&sMountMutex);
735 
736 	return sMountsTable->Lookup(id);
737 }
738 
739 
740 static status_t
741 get_mount(dev_t id, struct fs_mount** _mount)
742 {
743 	struct fs_mount* mount;
744 
745 	ReadLocker nodeLocker(sVnodeLock);
746 	MutexLocker mountLocker(sMountMutex);
747 
748 	mount = find_mount(id);
749 	if (mount == NULL)
750 		return B_BAD_VALUE;
751 
752 	struct vnode* rootNode = mount->root_vnode;
753 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
754 		|| rootNode->ref_count == 0) {
755 		// might have been called during a mount/unmount operation
756 		return B_BUSY;
757 	}
758 
759 	inc_vnode_ref_count(rootNode);
760 	*_mount = mount;
761 	return B_OK;
762 }
763 
764 
765 static void
766 put_mount(struct fs_mount* mount)
767 {
768 	if (mount)
769 		put_vnode(mount->root_vnode);
770 }
771 
772 
773 /*!	Tries to open the specified file system module.
774 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
775 	Returns a pointer to file system module interface, or NULL if it
776 	could not open the module.
777 */
778 static file_system_module_info*
779 get_file_system(const char* fsName)
780 {
781 	char name[B_FILE_NAME_LENGTH];
782 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
783 		// construct module name if we didn't get one
784 		// (we currently support only one API)
785 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
786 		fsName = NULL;
787 	}
788 
789 	file_system_module_info* info;
790 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
791 		return NULL;
792 
793 	return info;
794 }
795 
796 
797 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
798 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
799 	The name is allocated for you, and you have to free() it when you're
800 	done with it.
801 	Returns NULL if the required memory is not available.
802 */
803 static char*
804 get_file_system_name(const char* fsName)
805 {
806 	const size_t length = strlen("file_systems/");
807 
808 	if (strncmp(fsName, "file_systems/", length)) {
809 		// the name already seems to be the module's file name
810 		return strdup(fsName);
811 	}
812 
813 	fsName += length;
814 	const char* end = strchr(fsName, '/');
815 	if (end == NULL) {
816 		// this doesn't seem to be a valid name, but well...
817 		return strdup(fsName);
818 	}
819 
820 	// cut off the trailing /v1
821 
822 	char* name = (char*)malloc(end + 1 - fsName);
823 	if (name == NULL)
824 		return NULL;
825 
826 	strlcpy(name, fsName, end + 1 - fsName);
827 	return name;
828 }
829 
830 
831 /*!	Accepts a list of file system names separated by a colon, one for each
832 	layer and returns the file system name for the specified layer.
833 	The name is allocated for you, and you have to free() it when you're
834 	done with it.
835 	Returns NULL if the required memory is not available or if there is no
836 	name for the specified layer.
837 */
838 static char*
839 get_file_system_name_for_layer(const char* fsNames, int32 layer)
840 {
841 	while (layer >= 0) {
842 		const char* end = strchr(fsNames, ':');
843 		if (end == NULL) {
844 			if (layer == 0)
845 				return strdup(fsNames);
846 			return NULL;
847 		}
848 
849 		if (layer == 0) {
850 			size_t length = end - fsNames + 1;
851 			char* result = (char*)malloc(length);
852 			strlcpy(result, fsNames, length);
853 			return result;
854 		}
855 
856 		fsNames = end + 1;
857 		layer--;
858 	}
859 
860 	return NULL;
861 }
862 
863 
864 static void
865 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
866 {
867 	MutexLocker _(mount->lock);
868 	mount->vnodes.Add(vnode);
869 }
870 
871 
872 static void
873 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
874 {
875 	MutexLocker _(mount->lock);
876 	mount->vnodes.Remove(vnode);
877 }
878 
879 
880 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
881 
882 	The caller must hold the sVnodeLock (read lock at least).
883 
884 	\param mountID the mount ID.
885 	\param vnodeID the node ID.
886 
887 	\return The vnode structure, if it was found in the hash table, \c NULL
888 			otherwise.
889 */
890 static struct vnode*
891 lookup_vnode(dev_t mountID, ino_t vnodeID)
892 {
893 	struct vnode_hash_key key;
894 
895 	key.device = mountID;
896 	key.vnode = vnodeID;
897 
898 	return sVnodeTable->Lookup(key);
899 }
900 
901 
902 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
903 
904 	This will also wait for BUSY_VNODE_DELAY before returning if one should
905 	still wait for the vnode becoming unbusy.
906 
907 	\return \c true if one should retry, \c false if not.
908 */
909 static bool
910 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
911 {
912 	if (--tries < 0) {
913 		// vnode doesn't seem to become unbusy
914 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
915 			" is not becoming unbusy!\n", mountID, vnodeID);
916 		return false;
917 	}
918 	snooze(BUSY_VNODE_DELAY);
919 	return true;
920 }
921 
922 
923 /*!	Creates a new vnode with the given mount and node ID.
924 	If the node already exists, it is returned instead and no new node is
925 	created. In either case -- but not, if an error occurs -- the function write
926 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
927 	error the lock is not held on return.
928 
929 	\param mountID The mount ID.
930 	\param vnodeID The vnode ID.
931 	\param _vnode Will be set to the new vnode on success.
932 	\param _nodeCreated Will be set to \c true when the returned vnode has
933 		been newly created, \c false when it already existed. Will not be
934 		changed on error.
935 	\return \c B_OK, when the vnode was successfully created and inserted or
936 		a node with the given ID was found, \c B_NO_MEMORY or
937 		\c B_ENTRY_NOT_FOUND on error.
938 */
939 static status_t
940 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
941 	bool& _nodeCreated)
942 {
943 	FUNCTION(("create_new_vnode_and_lock()\n"));
944 
945 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
946 	if (vnode == NULL)
947 		return B_NO_MEMORY;
948 
949 	// initialize basic values
950 	memset(vnode, 0, sizeof(struct vnode));
951 	vnode->device = mountID;
952 	vnode->id = vnodeID;
953 	vnode->ref_count = 1;
954 	vnode->SetBusy(true);
955 
956 	// look up the node -- it might have been added by someone else in the
957 	// meantime
958 	rw_lock_write_lock(&sVnodeLock);
959 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
960 	if (existingVnode != NULL) {
961 		free(vnode);
962 		_vnode = existingVnode;
963 		_nodeCreated = false;
964 		return B_OK;
965 	}
966 
967 	// get the mount structure
968 	mutex_lock(&sMountMutex);
969 	vnode->mount = find_mount(mountID);
970 	if (!vnode->mount || vnode->mount->unmounting) {
971 		mutex_unlock(&sMountMutex);
972 		rw_lock_write_unlock(&sVnodeLock);
973 		free(vnode);
974 		return B_ENTRY_NOT_FOUND;
975 	}
976 
977 	// add the vnode to the mount's node list and the hash table
978 	sVnodeTable->Insert(vnode);
979 	add_vnode_to_mount_list(vnode, vnode->mount);
980 
981 	mutex_unlock(&sMountMutex);
982 
983 	_vnode = vnode;
984 	_nodeCreated = true;
985 
986 	// keep the vnode lock locked
987 	return B_OK;
988 }
989 
990 
991 /*!	Frees the vnode and all resources it has acquired, and removes
992 	it from the vnode hash as well as from its mount structure.
993 	Will also make sure that any cache modifications are written back.
994 */
995 static void
996 free_vnode(struct vnode* vnode, bool reenter)
997 {
998 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
999 		vnode);
1000 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1001 
1002 	// write back any changes in this vnode's cache -- but only
1003 	// if the vnode won't be deleted, in which case the changes
1004 	// will be discarded
1005 
1006 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1007 		FS_CALL_NO_PARAMS(vnode, fsync);
1008 
1009 	// Note: If this vnode has a cache attached, there will still be two
1010 	// references to that cache at this point. The last one belongs to the vnode
1011 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1012 	// cache. Each but the last reference to a cache also includes a reference
1013 	// to the vnode. The file cache, however, released its reference (cf.
1014 	// file_cache_create()), so that this vnode's ref count has the chance to
1015 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1016 	// cache reference to be released, which will also release a (no longer
1017 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1018 	// count, so that it will neither become negative nor 0.
1019 	vnode->ref_count = 2;
1020 
1021 	if (!vnode->IsUnpublished()) {
1022 		if (vnode->IsRemoved())
1023 			FS_CALL(vnode, remove_vnode, reenter);
1024 		else
1025 			FS_CALL(vnode, put_vnode, reenter);
1026 	}
1027 
1028 	// If the vnode has a VMCache attached, make sure that it won't try to get
1029 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1030 	// long as the vnode is busy and in the hash, that won't happen, but as
1031 	// soon as we've removed it from the hash, it could reload the vnode -- with
1032 	// a new cache attached!
1033 	if (vnode->cache != NULL)
1034 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1035 
1036 	// The file system has removed the resources of the vnode now, so we can
1037 	// make it available again (by removing the busy vnode from the hash).
1038 	rw_lock_write_lock(&sVnodeLock);
1039 	sVnodeTable->Remove(vnode);
1040 	rw_lock_write_unlock(&sVnodeLock);
1041 
1042 	// if we have a VMCache attached, remove it
1043 	if (vnode->cache)
1044 		vnode->cache->ReleaseRef();
1045 
1046 	vnode->cache = NULL;
1047 
1048 	remove_vnode_from_mount_list(vnode, vnode->mount);
1049 
1050 	free(vnode);
1051 }
1052 
1053 
1054 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1055 	if the counter dropped to 0.
1056 
1057 	The caller must, of course, own a reference to the vnode to call this
1058 	function.
1059 	The caller must not hold the sVnodeLock or the sMountMutex.
1060 
1061 	\param vnode the vnode.
1062 	\param alwaysFree don't move this vnode into the unused list, but really
1063 		   delete it if possible.
1064 	\param reenter \c true, if this function is called (indirectly) from within
1065 		   a file system. This will be passed to file system hooks only.
1066 	\return \c B_OK, if everything went fine, an error code otherwise.
1067 */
1068 static status_t
1069 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1070 {
1071 	ReadLocker locker(sVnodeLock);
1072 	AutoLocker<Vnode> nodeLocker(vnode);
1073 
1074 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1075 
1076 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1077 
1078 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1079 		vnode->ref_count));
1080 
1081 	if (oldRefCount != 1)
1082 		return B_OK;
1083 
1084 	if (vnode->IsBusy())
1085 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1086 
1087 	bool freeNode = false;
1088 	bool freeUnusedNodes = false;
1089 
1090 	// Just insert the vnode into an unused list if we don't need
1091 	// to delete it
1092 	if (vnode->IsRemoved() || alwaysFree) {
1093 		vnode_to_be_freed(vnode);
1094 		vnode->SetBusy(true);
1095 		freeNode = true;
1096 	} else
1097 		freeUnusedNodes = vnode_unused(vnode);
1098 
1099 	nodeLocker.Unlock();
1100 	locker.Unlock();
1101 
1102 	if (freeNode)
1103 		free_vnode(vnode, reenter);
1104 	else if (freeUnusedNodes)
1105 		free_unused_vnodes();
1106 
1107 	return B_OK;
1108 }
1109 
1110 
1111 /*!	\brief Increments the reference counter of the given vnode.
1112 
1113 	The caller must make sure that the node isn't deleted while this function
1114 	is called. This can be done either:
1115 	- by ensuring that a reference to the node exists and remains in existence,
1116 	  or
1117 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1118 	  or by holding sVnodeLock write locked.
1119 
1120 	In the second case the caller is responsible for dealing with the ref count
1121 	0 -> 1 transition. That is 1. this function must not be invoked when the
1122 	node is busy in the first place and 2. vnode_used() must be called for the
1123 	node.
1124 
1125 	\param vnode the vnode.
1126 */
1127 static void
1128 inc_vnode_ref_count(struct vnode* vnode)
1129 {
1130 	atomic_add(&vnode->ref_count, 1);
1131 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1132 		vnode->ref_count));
1133 }
1134 
1135 
1136 static bool
1137 is_special_node_type(int type)
1138 {
1139 	// at the moment only FIFOs are supported
1140 	return S_ISFIFO(type);
1141 }
1142 
1143 
1144 static status_t
1145 create_special_sub_node(struct vnode* vnode, uint32 flags)
1146 {
1147 	if (S_ISFIFO(vnode->Type()))
1148 		return create_fifo_vnode(vnode->mount->volume, vnode);
1149 
1150 	return B_BAD_VALUE;
1151 }
1152 
1153 
1154 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1155 
1156 	If the node is not yet in memory, it will be loaded.
1157 
1158 	The caller must not hold the sVnodeLock or the sMountMutex.
1159 
1160 	\param mountID the mount ID.
1161 	\param vnodeID the node ID.
1162 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1163 		   retrieved vnode structure shall be written.
1164 	\param reenter \c true, if this function is called (indirectly) from within
1165 		   a file system.
1166 	\return \c B_OK, if everything when fine, an error code otherwise.
1167 */
1168 static status_t
1169 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1170 	int reenter)
1171 {
1172 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1173 		mountID, vnodeID, _vnode));
1174 
1175 	rw_lock_read_lock(&sVnodeLock);
1176 
1177 	int32 tries = BUSY_VNODE_RETRIES;
1178 restart:
1179 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1180 	AutoLocker<Vnode> nodeLocker(vnode);
1181 
1182 	if (vnode && vnode->IsBusy()) {
1183 		nodeLocker.Unlock();
1184 		rw_lock_read_unlock(&sVnodeLock);
1185 		if (!canWait) {
1186 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1187 				mountID, vnodeID);
1188 			return B_BUSY;
1189 		}
1190 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1191 			return B_BUSY;
1192 
1193 		rw_lock_read_lock(&sVnodeLock);
1194 		goto restart;
1195 	}
1196 
1197 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1198 
1199 	status_t status;
1200 
1201 	if (vnode) {
1202 		if (vnode->ref_count == 0) {
1203 			// this vnode has been unused before
1204 			vnode_used(vnode);
1205 		}
1206 		inc_vnode_ref_count(vnode);
1207 
1208 		nodeLocker.Unlock();
1209 		rw_lock_read_unlock(&sVnodeLock);
1210 	} else {
1211 		// we need to create a new vnode and read it in
1212 		rw_lock_read_unlock(&sVnodeLock);
1213 			// unlock -- create_new_vnode_and_lock() write-locks on success
1214 		bool nodeCreated;
1215 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1216 			nodeCreated);
1217 		if (status != B_OK)
1218 			return status;
1219 
1220 		if (!nodeCreated) {
1221 			rw_lock_read_lock(&sVnodeLock);
1222 			rw_lock_write_unlock(&sVnodeLock);
1223 			goto restart;
1224 		}
1225 
1226 		rw_lock_write_unlock(&sVnodeLock);
1227 
1228 		int type;
1229 		uint32 flags;
1230 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1231 			&flags, reenter);
1232 		if (status == B_OK && vnode->private_node == NULL)
1233 			status = B_BAD_VALUE;
1234 
1235 		bool gotNode = status == B_OK;
1236 		bool publishSpecialSubNode = false;
1237 		if (gotNode) {
1238 			vnode->SetType(type);
1239 			publishSpecialSubNode = is_special_node_type(type)
1240 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1241 		}
1242 
1243 		if (gotNode && publishSpecialSubNode)
1244 			status = create_special_sub_node(vnode, flags);
1245 
1246 		if (status != B_OK) {
1247 			if (gotNode)
1248 				FS_CALL(vnode, put_vnode, reenter);
1249 
1250 			rw_lock_write_lock(&sVnodeLock);
1251 			sVnodeTable->Remove(vnode);
1252 			remove_vnode_from_mount_list(vnode, vnode->mount);
1253 			rw_lock_write_unlock(&sVnodeLock);
1254 
1255 			free(vnode);
1256 			return status;
1257 		}
1258 
1259 		rw_lock_read_lock(&sVnodeLock);
1260 		vnode->Lock();
1261 
1262 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1263 		vnode->SetBusy(false);
1264 
1265 		vnode->Unlock();
1266 		rw_lock_read_unlock(&sVnodeLock);
1267 	}
1268 
1269 	TRACE(("get_vnode: returning %p\n", vnode));
1270 
1271 	*_vnode = vnode;
1272 	return B_OK;
1273 }
1274 
1275 
1276 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1277 	if the counter dropped to 0.
1278 
1279 	The caller must, of course, own a reference to the vnode to call this
1280 	function.
1281 	The caller must not hold the sVnodeLock or the sMountMutex.
1282 
1283 	\param vnode the vnode.
1284 */
1285 static inline void
1286 put_vnode(struct vnode* vnode)
1287 {
1288 	dec_vnode_ref_count(vnode, false, false);
1289 }
1290 
1291 
1292 static void
1293 free_unused_vnodes(int32 level)
1294 {
1295 	unused_vnodes_check_started();
1296 
1297 	if (level == B_NO_LOW_RESOURCE) {
1298 		unused_vnodes_check_done();
1299 		return;
1300 	}
1301 
1302 	flush_hot_vnodes();
1303 
1304 	// determine how many nodes to free
1305 	uint32 count = 1;
1306 	{
1307 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1308 
1309 		switch (level) {
1310 			case B_LOW_RESOURCE_NOTE:
1311 				count = sUnusedVnodes / 100;
1312 				break;
1313 			case B_LOW_RESOURCE_WARNING:
1314 				count = sUnusedVnodes / 10;
1315 				break;
1316 			case B_LOW_RESOURCE_CRITICAL:
1317 				count = sUnusedVnodes;
1318 				break;
1319 		}
1320 
1321 		if (count > sUnusedVnodes)
1322 			count = sUnusedVnodes;
1323 	}
1324 
1325 	// Write back the modified pages of some unused vnodes and free them.
1326 
1327 	for (uint32 i = 0; i < count; i++) {
1328 		ReadLocker vnodesReadLocker(sVnodeLock);
1329 
1330 		// get the first node
1331 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1332 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1333 			&sUnusedVnodeList);
1334 		unusedVnodesLocker.Unlock();
1335 
1336 		if (vnode == NULL)
1337 			break;
1338 
1339 		// lock the node
1340 		AutoLocker<Vnode> nodeLocker(vnode);
1341 
1342 		// Check whether the node is still unused -- since we only append to the
1343 		// tail of the unused queue, the vnode should still be at its head.
1344 		// Alternatively we could check its ref count for 0 and its busy flag,
1345 		// but if the node is no longer at the head of the queue, it means it
1346 		// has been touched in the meantime, i.e. it is no longer the least
1347 		// recently used unused vnode and we rather don't free it.
1348 		unusedVnodesLocker.Lock();
1349 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1350 			continue;
1351 		unusedVnodesLocker.Unlock();
1352 
1353 		ASSERT(!vnode->IsBusy());
1354 
1355 		// grab a reference
1356 		inc_vnode_ref_count(vnode);
1357 		vnode_used(vnode);
1358 
1359 		// write back changes and free the node
1360 		nodeLocker.Unlock();
1361 		vnodesReadLocker.Unlock();
1362 
1363 		if (vnode->cache != NULL)
1364 			vnode->cache->WriteModified();
1365 
1366 		dec_vnode_ref_count(vnode, true, false);
1367 			// this should free the vnode when it's still unused
1368 	}
1369 
1370 	unused_vnodes_check_done();
1371 }
1372 
1373 
1374 /*!	Gets the vnode the given vnode is covering.
1375 
1376 	The caller must have \c sVnodeLock read-locked at least.
1377 
1378 	The function returns a reference to the retrieved vnode (if any), the caller
1379 	is responsible to free.
1380 
1381 	\param vnode The vnode whose covered node shall be returned.
1382 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1383 		vnode.
1384 */
1385 static inline Vnode*
1386 get_covered_vnode_locked(Vnode* vnode)
1387 {
1388 	if (Vnode* coveredNode = vnode->covers) {
1389 		while (coveredNode->covers != NULL)
1390 			coveredNode = coveredNode->covers;
1391 
1392 		inc_vnode_ref_count(coveredNode);
1393 		return coveredNode;
1394 	}
1395 
1396 	return NULL;
1397 }
1398 
1399 
1400 /*!	Gets the vnode the given vnode is covering.
1401 
1402 	The caller must not hold \c sVnodeLock. Note that this implies a race
1403 	condition, since the situation can change at any time.
1404 
1405 	The function returns a reference to the retrieved vnode (if any), the caller
1406 	is responsible to free.
1407 
1408 	\param vnode The vnode whose covered node shall be returned.
1409 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1410 		vnode.
1411 */
1412 static inline Vnode*
1413 get_covered_vnode(Vnode* vnode)
1414 {
1415 	if (!vnode->IsCovering())
1416 		return NULL;
1417 
1418 	ReadLocker vnodeReadLocker(sVnodeLock);
1419 	return get_covered_vnode_locked(vnode);
1420 }
1421 
1422 
1423 /*!	Gets the vnode the given vnode is covered by.
1424 
1425 	The caller must have \c sVnodeLock read-locked at least.
1426 
1427 	The function returns a reference to the retrieved vnode (if any), the caller
1428 	is responsible to free.
1429 
1430 	\param vnode The vnode whose covering node shall be returned.
1431 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1432 		any vnode.
1433 */
1434 static Vnode*
1435 get_covering_vnode_locked(Vnode* vnode)
1436 {
1437 	if (Vnode* coveringNode = vnode->covered_by) {
1438 		while (coveringNode->covered_by != NULL)
1439 			coveringNode = coveringNode->covered_by;
1440 
1441 		inc_vnode_ref_count(coveringNode);
1442 		return coveringNode;
1443 	}
1444 
1445 	return NULL;
1446 }
1447 
1448 
1449 /*!	Gets the vnode the given vnode is covered by.
1450 
1451 	The caller must not hold \c sVnodeLock. Note that this implies a race
1452 	condition, since the situation can change at any time.
1453 
1454 	The function returns a reference to the retrieved vnode (if any), the caller
1455 	is responsible to free.
1456 
1457 	\param vnode The vnode whose covering node shall be returned.
1458 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1459 		any vnode.
1460 */
1461 static inline Vnode*
1462 get_covering_vnode(Vnode* vnode)
1463 {
1464 	if (!vnode->IsCovered())
1465 		return NULL;
1466 
1467 	ReadLocker vnodeReadLocker(sVnodeLock);
1468 	return get_covering_vnode_locked(vnode);
1469 }
1470 
1471 
1472 static void
1473 free_unused_vnodes()
1474 {
1475 	free_unused_vnodes(
1476 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1477 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1478 }
1479 
1480 
1481 static void
1482 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1483 {
1484 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1485 
1486 	free_unused_vnodes(level);
1487 }
1488 
1489 
1490 static inline void
1491 put_advisory_locking(struct advisory_locking* locking)
1492 {
1493 	release_sem(locking->lock);
1494 }
1495 
1496 
1497 /*!	Returns the advisory_locking object of the \a vnode in case it
1498 	has one, and locks it.
1499 	You have to call put_advisory_locking() when you're done with
1500 	it.
1501 	Note, you must not have the vnode mutex locked when calling
1502 	this function.
1503 */
1504 static struct advisory_locking*
1505 get_advisory_locking(struct vnode* vnode)
1506 {
1507 	rw_lock_read_lock(&sVnodeLock);
1508 	vnode->Lock();
1509 
1510 	struct advisory_locking* locking = vnode->advisory_locking;
1511 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1512 
1513 	vnode->Unlock();
1514 	rw_lock_read_unlock(&sVnodeLock);
1515 
1516 	if (lock >= 0)
1517 		lock = acquire_sem(lock);
1518 	if (lock < 0) {
1519 		// This means the locking has been deleted in the mean time
1520 		// or had never existed in the first place - otherwise, we
1521 		// would get the lock at some point.
1522 		return NULL;
1523 	}
1524 
1525 	return locking;
1526 }
1527 
1528 
1529 /*!	Creates a locked advisory_locking object, and attaches it to the
1530 	given \a vnode.
1531 	Returns B_OK in case of success - also if the vnode got such an
1532 	object from someone else in the mean time, you'll still get this
1533 	one locked then.
1534 */
1535 static status_t
1536 create_advisory_locking(struct vnode* vnode)
1537 {
1538 	if (vnode == NULL)
1539 		return B_FILE_ERROR;
1540 
1541 	ObjectDeleter<advisory_locking> lockingDeleter;
1542 	struct advisory_locking* locking = NULL;
1543 
1544 	while (get_advisory_locking(vnode) == NULL) {
1545 		// no locking object set on the vnode yet, create one
1546 		if (locking == NULL) {
1547 			locking = new(std::nothrow) advisory_locking;
1548 			if (locking == NULL)
1549 				return B_NO_MEMORY;
1550 			lockingDeleter.SetTo(locking);
1551 
1552 			locking->wait_sem = create_sem(0, "advisory lock");
1553 			if (locking->wait_sem < 0)
1554 				return locking->wait_sem;
1555 
1556 			locking->lock = create_sem(0, "advisory locking");
1557 			if (locking->lock < 0)
1558 				return locking->lock;
1559 		}
1560 
1561 		// set our newly created locking object
1562 		ReadLocker _(sVnodeLock);
1563 		AutoLocker<Vnode> nodeLocker(vnode);
1564 		if (vnode->advisory_locking == NULL) {
1565 			vnode->advisory_locking = locking;
1566 			lockingDeleter.Detach();
1567 			return B_OK;
1568 		}
1569 	}
1570 
1571 	// The vnode already had a locking object. That's just as well.
1572 
1573 	return B_OK;
1574 }
1575 
1576 
1577 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1578 	with the advisory_lock \a lock.
1579 */
1580 static bool
1581 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1582 {
1583 	if (flock == NULL)
1584 		return true;
1585 
1586 	return lock->start <= flock->l_start - 1 + flock->l_len
1587 		&& lock->end >= flock->l_start;
1588 }
1589 
1590 
1591 /*!	Tests whether acquiring a lock would block.
1592 */
1593 static status_t
1594 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1595 {
1596 	flock->l_type = F_UNLCK;
1597 
1598 	struct advisory_locking* locking = get_advisory_locking(vnode);
1599 	if (locking == NULL)
1600 		return B_OK;
1601 
1602 	team_id team = team_get_current_team_id();
1603 
1604 	LockList::Iterator iterator = locking->locks.GetIterator();
1605 	while (iterator.HasNext()) {
1606 		struct advisory_lock* lock = iterator.Next();
1607 
1608 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1609 			// locks do overlap
1610 			if (flock->l_type != F_RDLCK || !lock->shared) {
1611 				// collision
1612 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1613 				flock->l_whence = SEEK_SET;
1614 				flock->l_start = lock->start;
1615 				flock->l_len = lock->end - lock->start + 1;
1616 				flock->l_pid = lock->team;
1617 				break;
1618 			}
1619 		}
1620 	}
1621 
1622 	put_advisory_locking(locking);
1623 	return B_OK;
1624 }
1625 
1626 
1627 /*!	Removes the specified lock, or all locks of the calling team
1628 	if \a flock is NULL.
1629 */
1630 static status_t
1631 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1632 	struct file_descriptor* descriptor, struct flock* flock)
1633 {
1634 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1635 
1636 	struct advisory_locking* locking = get_advisory_locking(vnode);
1637 	if (locking == NULL)
1638 		return B_OK;
1639 
1640 	// find matching lock entries
1641 
1642 	LockList::Iterator iterator = locking->locks.GetIterator();
1643 	while (iterator.HasNext()) {
1644 		struct advisory_lock* lock = iterator.Next();
1645 		bool removeLock = false;
1646 
1647 		if (descriptor != NULL && lock->bound_to == descriptor) {
1648 			// Remove flock() locks
1649 			removeLock = true;
1650 		} else if (lock->bound_to == context
1651 				&& advisory_lock_intersects(lock, flock)) {
1652 			// Remove POSIX locks
1653 			bool endsBeyond = false;
1654 			bool startsBefore = false;
1655 			if (flock != NULL) {
1656 				startsBefore = lock->start < flock->l_start;
1657 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1658 			}
1659 
1660 			if (!startsBefore && !endsBeyond) {
1661 				// lock is completely contained in flock
1662 				removeLock = true;
1663 			} else if (startsBefore && !endsBeyond) {
1664 				// cut the end of the lock
1665 				lock->end = flock->l_start - 1;
1666 			} else if (!startsBefore && endsBeyond) {
1667 				// cut the start of the lock
1668 				lock->start = flock->l_start + flock->l_len;
1669 			} else {
1670 				// divide the lock into two locks
1671 				struct advisory_lock* secondLock = new advisory_lock;
1672 				if (secondLock == NULL) {
1673 					// TODO: we should probably revert the locks we already
1674 					// changed... (ie. allocate upfront)
1675 					put_advisory_locking(locking);
1676 					return B_NO_MEMORY;
1677 				}
1678 
1679 				lock->end = flock->l_start - 1;
1680 
1681 				secondLock->bound_to = context;
1682 				secondLock->team = lock->team;
1683 				secondLock->session = lock->session;
1684 				// values must already be normalized when getting here
1685 				secondLock->start = flock->l_start + flock->l_len;
1686 				secondLock->end = lock->end;
1687 				secondLock->shared = lock->shared;
1688 
1689 				locking->locks.Add(secondLock);
1690 			}
1691 		}
1692 
1693 		if (removeLock) {
1694 			// this lock is no longer used
1695 			iterator.Remove();
1696 			free(lock);
1697 		}
1698 	}
1699 
1700 	bool removeLocking = locking->locks.IsEmpty();
1701 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1702 
1703 	put_advisory_locking(locking);
1704 
1705 	if (removeLocking) {
1706 		// We can remove the whole advisory locking structure; it's no
1707 		// longer used
1708 		locking = get_advisory_locking(vnode);
1709 		if (locking != NULL) {
1710 			ReadLocker locker(sVnodeLock);
1711 			AutoLocker<Vnode> nodeLocker(vnode);
1712 
1713 			// the locking could have been changed in the mean time
1714 			if (locking->locks.IsEmpty()) {
1715 				vnode->advisory_locking = NULL;
1716 				nodeLocker.Unlock();
1717 				locker.Unlock();
1718 
1719 				// we've detached the locking from the vnode, so we can
1720 				// safely delete it
1721 				delete locking;
1722 			} else {
1723 				// the locking is in use again
1724 				nodeLocker.Unlock();
1725 				locker.Unlock();
1726 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1727 			}
1728 		}
1729 	}
1730 
1731 	return B_OK;
1732 }
1733 
1734 
1735 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1736 	will wait for the lock to become available, if there are any collisions
1737 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1738 
1739 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1740 	BSD flock() semantics are used, that is, all children can unlock the file
1741 	in question (we even allow parents to remove the lock, though, but that
1742 	seems to be in line to what the BSD's are doing).
1743 */
1744 static status_t
1745 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1746 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1747 {
1748 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1749 		vnode, flock, wait ? "yes" : "no"));
1750 	dprintf("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1751 		vnode, flock, wait ? "yes" : "no");
1752 
1753 	bool shared = flock->l_type == F_RDLCK;
1754 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1755 	status_t status = B_OK;
1756 
1757 	// TODO: do deadlock detection!
1758 
1759 	struct advisory_locking* locking;
1760 
1761 	while (true) {
1762 		// if this vnode has an advisory_locking structure attached,
1763 		// lock that one and search for any colliding file lock
1764 		status = create_advisory_locking(vnode);
1765 		if (status != B_OK)
1766 			return status;
1767 
1768 		locking = vnode->advisory_locking;
1769 		team_id team = team_get_current_team_id();
1770 		sem_id waitForLock = -1;
1771 
1772 		// test for collisions
1773 		LockList::Iterator iterator = locking->locks.GetIterator();
1774 		while (iterator.HasNext()) {
1775 			struct advisory_lock* lock = iterator.Next();
1776 
1777 			// TODO: locks from the same team might be joinable!
1778 			if ((lock->team != team || lock->bound_to != boundTo)
1779 					&& advisory_lock_intersects(lock, flock)) {
1780 				// locks do overlap
1781 				if (!shared || !lock->shared) {
1782 					// we need to wait
1783 					waitForLock = locking->wait_sem;
1784 					break;
1785 				}
1786 			}
1787 		}
1788 
1789 		if (waitForLock < 0)
1790 			break;
1791 
1792 		// We need to wait. Do that or fail now, if we've been asked not to.
1793 
1794 		if (!wait) {
1795 			put_advisory_locking(locking);
1796 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1797 		}
1798 
1799 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1800 			B_CAN_INTERRUPT, 0);
1801 		if (status != B_OK && status != B_BAD_SEM_ID)
1802 			return status;
1803 
1804 		// We have been notified, but we need to re-lock the locking object. So
1805 		// go another round...
1806 	}
1807 
1808 	// install new lock
1809 
1810 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1811 		sizeof(struct advisory_lock));
1812 	if (lock == NULL) {
1813 		put_advisory_locking(locking);
1814 		return B_NO_MEMORY;
1815 	}
1816 
1817 	lock->bound_to = boundTo;
1818 	lock->team = team_get_current_team_id();
1819 	lock->session = thread_get_current_thread()->team->session_id;
1820 	// values must already be normalized when getting here
1821 	lock->start = flock->l_start;
1822 	lock->end = flock->l_start - 1 + flock->l_len;
1823 	lock->shared = shared;
1824 
1825 	locking->locks.Add(lock);
1826 	put_advisory_locking(locking);
1827 
1828 	return status;
1829 }
1830 
1831 
1832 /*!	Normalizes the \a flock structure to make it easier to compare the
1833 	structure with others. The l_start and l_len fields are set to absolute
1834 	values according to the l_whence field.
1835 */
1836 static status_t
1837 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1838 {
1839 	switch (flock->l_whence) {
1840 		case SEEK_SET:
1841 			break;
1842 		case SEEK_CUR:
1843 			flock->l_start += descriptor->pos;
1844 			break;
1845 		case SEEK_END:
1846 		{
1847 			struct vnode* vnode = descriptor->u.vnode;
1848 			struct stat stat;
1849 			status_t status;
1850 
1851 			if (!HAS_FS_CALL(vnode, read_stat))
1852 				return B_UNSUPPORTED;
1853 
1854 			status = FS_CALL(vnode, read_stat, &stat);
1855 			if (status != B_OK)
1856 				return status;
1857 
1858 			flock->l_start += stat.st_size;
1859 			break;
1860 		}
1861 		default:
1862 			return B_BAD_VALUE;
1863 	}
1864 
1865 	if (flock->l_start < 0)
1866 		flock->l_start = 0;
1867 	if (flock->l_len == 0)
1868 		flock->l_len = OFF_MAX;
1869 
1870 	// don't let the offset and length overflow
1871 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1872 		flock->l_len = OFF_MAX - flock->l_start;
1873 
1874 	if (flock->l_len < 0) {
1875 		// a negative length reverses the region
1876 		flock->l_start += flock->l_len;
1877 		flock->l_len = -flock->l_len;
1878 	}
1879 
1880 	return B_OK;
1881 }
1882 
1883 
1884 static void
1885 replace_vnode_if_disconnected(struct fs_mount* mount,
1886 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1887 	struct vnode* fallBack, bool lockRootLock)
1888 {
1889 	struct vnode* givenVnode = vnode;
1890 	bool vnodeReplaced = false;
1891 
1892 	ReadLocker vnodeReadLocker(sVnodeLock);
1893 
1894 	if (lockRootLock)
1895 		mutex_lock(&sIOContextRootLock);
1896 
1897 	while (vnode != NULL && vnode->mount == mount
1898 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1899 		if (vnode->covers != NULL) {
1900 			// redirect the vnode to the covered vnode
1901 			vnode = vnode->covers;
1902 		} else
1903 			vnode = fallBack;
1904 
1905 		vnodeReplaced = true;
1906 	}
1907 
1908 	// If we've replaced the node, grab a reference for the new one.
1909 	if (vnodeReplaced && vnode != NULL)
1910 		inc_vnode_ref_count(vnode);
1911 
1912 	if (lockRootLock)
1913 		mutex_unlock(&sIOContextRootLock);
1914 
1915 	vnodeReadLocker.Unlock();
1916 
1917 	if (vnodeReplaced)
1918 		put_vnode(givenVnode);
1919 }
1920 
1921 
1922 /*!	Disconnects all file descriptors that are associated with the
1923 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1924 	\a mount object.
1925 
1926 	Note, after you've called this function, there might still be ongoing
1927 	accesses - they won't be interrupted if they already happened before.
1928 	However, any subsequent access will fail.
1929 
1930 	This is not a cheap function and should be used with care and rarely.
1931 	TODO: there is currently no means to stop a blocking read/write!
1932 */
1933 static void
1934 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1935 	struct vnode* vnodeToDisconnect)
1936 {
1937 	// iterate over all teams and peek into their file descriptors
1938 	TeamListIterator teamIterator;
1939 	while (Team* team = teamIterator.Next()) {
1940 		BReference<Team> teamReference(team, true);
1941 		TeamLocker teamLocker(team);
1942 
1943 		// lock the I/O context
1944 		io_context* context = team->io_context;
1945 		if (context == NULL)
1946 			continue;
1947 		MutexLocker contextLocker(context->io_mutex);
1948 
1949 		teamLocker.Unlock();
1950 
1951 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1952 			sRoot, true);
1953 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1954 			sRoot, false);
1955 
1956 		for (uint32 i = 0; i < context->table_size; i++) {
1957 			struct file_descriptor* descriptor = context->fds[i];
1958 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1959 				continue;
1960 
1961 			inc_fd_ref_count(descriptor);
1962 
1963 			// if this descriptor points at this mount, we
1964 			// need to disconnect it to be able to unmount
1965 			struct vnode* vnode = fd_vnode(descriptor);
1966 			if (vnodeToDisconnect != NULL) {
1967 				if (vnode == vnodeToDisconnect)
1968 					disconnect_fd(descriptor);
1969 			} else if ((vnode != NULL && vnode->mount == mount)
1970 				|| (vnode == NULL && descriptor->u.mount == mount))
1971 				disconnect_fd(descriptor);
1972 
1973 			put_fd(descriptor);
1974 		}
1975 	}
1976 }
1977 
1978 
1979 /*!	\brief Gets the root node of the current IO context.
1980 	If \a kernel is \c true, the kernel IO context will be used.
1981 	The caller obtains a reference to the returned node.
1982 */
1983 struct vnode*
1984 get_root_vnode(bool kernel)
1985 {
1986 	if (!kernel) {
1987 		// Get current working directory from io context
1988 		struct io_context* context = get_current_io_context(kernel);
1989 
1990 		mutex_lock(&sIOContextRootLock);
1991 
1992 		struct vnode* root = context->root;
1993 		if (root != NULL)
1994 			inc_vnode_ref_count(root);
1995 
1996 		mutex_unlock(&sIOContextRootLock);
1997 
1998 		if (root != NULL)
1999 			return root;
2000 
2001 		// That should never happen.
2002 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2003 			"have a root\n", team_get_current_team_id());
2004 	}
2005 
2006 	inc_vnode_ref_count(sRoot);
2007 	return sRoot;
2008 }
2009 
2010 
2011 /*!	\brief Gets the directory path and leaf name for a given path.
2012 
2013 	The supplied \a path is transformed to refer to the directory part of
2014 	the entry identified by the original path, and into the buffer \a filename
2015 	the leaf name of the original entry is written.
2016 	Neither the returned path nor the leaf name can be expected to be
2017 	canonical.
2018 
2019 	\param path The path to be analyzed. Must be able to store at least one
2020 		   additional character.
2021 	\param filename The buffer into which the leaf name will be written.
2022 		   Must be of size B_FILE_NAME_LENGTH at least.
2023 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2024 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2025 		   if the given path name is empty.
2026 */
2027 static status_t
2028 get_dir_path_and_leaf(char* path, char* filename)
2029 {
2030 	if (*path == '\0')
2031 		return B_ENTRY_NOT_FOUND;
2032 
2033 	char* last = strrchr(path, '/');
2034 		// '/' are not allowed in file names!
2035 
2036 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2037 
2038 	if (last == NULL) {
2039 		// this path is single segment with no '/' in it
2040 		// ex. "foo"
2041 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2042 			return B_NAME_TOO_LONG;
2043 
2044 		strcpy(path, ".");
2045 	} else {
2046 		last++;
2047 		if (last[0] == '\0') {
2048 			// special case: the path ends in one or more '/' - remove them
2049 			while (*--last == '/' && last != path);
2050 			last[1] = '\0';
2051 
2052 			if (last == path && last[0] == '/') {
2053 				// This path points to the root of the file system
2054 				strcpy(filename, ".");
2055 				return B_OK;
2056 			}
2057 			for (; last != path && *(last - 1) != '/'; last--);
2058 				// rewind to the start of the leaf before the '/'
2059 		}
2060 
2061 		// normal leaf: replace the leaf portion of the path with a '.'
2062 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2063 			return B_NAME_TOO_LONG;
2064 
2065 		last[0] = '.';
2066 		last[1] = '\0';
2067 	}
2068 	return B_OK;
2069 }
2070 
2071 
2072 static status_t
2073 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2074 	bool traverse, bool kernel, struct vnode** _vnode)
2075 {
2076 	char clonedName[B_FILE_NAME_LENGTH + 1];
2077 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2078 		return B_NAME_TOO_LONG;
2079 
2080 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2081 	struct vnode* directory;
2082 
2083 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2084 	if (status < 0)
2085 		return status;
2086 
2087 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2088 		_vnode, NULL);
2089 }
2090 
2091 
2092 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2093 	and returns the respective vnode.
2094 	On success a reference to the vnode is acquired for the caller.
2095 */
2096 static status_t
2097 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2098 {
2099 	ino_t id;
2100 	bool missing;
2101 
2102 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2103 		return missing ? B_ENTRY_NOT_FOUND
2104 			: get_vnode(dir->device, id, _vnode, true, false);
2105 	}
2106 
2107 	status_t status = FS_CALL(dir, lookup, name, &id);
2108 	if (status != B_OK)
2109 		return status;
2110 
2111 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2112 	// have a reference and just need to look the node up.
2113 	rw_lock_read_lock(&sVnodeLock);
2114 	*_vnode = lookup_vnode(dir->device, id);
2115 	rw_lock_read_unlock(&sVnodeLock);
2116 
2117 	if (*_vnode == NULL) {
2118 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2119 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2120 		return B_ENTRY_NOT_FOUND;
2121 	}
2122 
2123 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2124 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2125 //		(*_vnode)->mount->id, (*_vnode)->id);
2126 
2127 	return B_OK;
2128 }
2129 
2130 
2131 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2132 	\a path must not be NULL.
2133 	If it returns successfully, \a path contains the name of the last path
2134 	component. This function clobbers the buffer pointed to by \a path only
2135 	if it does contain more than one component.
2136 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2137 	it is successful or not!
2138 */
2139 static status_t
2140 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2141 	int count, struct io_context* ioContext, struct vnode** _vnode,
2142 	ino_t* _parentID)
2143 {
2144 	status_t status = B_OK;
2145 	ino_t lastParentID = vnode->id;
2146 
2147 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2148 
2149 	if (path == NULL) {
2150 		put_vnode(vnode);
2151 		return B_BAD_VALUE;
2152 	}
2153 
2154 	if (*path == '\0') {
2155 		put_vnode(vnode);
2156 		return B_ENTRY_NOT_FOUND;
2157 	}
2158 
2159 	while (true) {
2160 		struct vnode* nextVnode;
2161 		char* nextPath;
2162 
2163 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2164 			path));
2165 
2166 		// done?
2167 		if (path[0] == '\0')
2168 			break;
2169 
2170 		// walk to find the next path component ("path" will point to a single
2171 		// path component), and filter out multiple slashes
2172 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2173 				nextPath++);
2174 
2175 		if (*nextPath == '/') {
2176 			*nextPath = '\0';
2177 			do
2178 				nextPath++;
2179 			while (*nextPath == '/');
2180 		}
2181 
2182 		// See if the '..' is at a covering vnode move to the covered
2183 		// vnode so we pass the '..' path to the underlying filesystem.
2184 		// Also prevent breaking the root of the IO context.
2185 		if (strcmp("..", path) == 0) {
2186 			if (vnode == ioContext->root) {
2187 				// Attempted prison break! Keep it contained.
2188 				path = nextPath;
2189 				continue;
2190 			}
2191 
2192 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2193 				nextVnode = coveredVnode;
2194 				put_vnode(vnode);
2195 				vnode = nextVnode;
2196 			}
2197 		}
2198 
2199 		// check if vnode is really a directory
2200 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2201 			status = B_NOT_A_DIRECTORY;
2202 
2203 		// Check if we have the right to search the current directory vnode.
2204 		// If a file system doesn't have the access() function, we assume that
2205 		// searching a directory is always allowed
2206 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2207 			status = FS_CALL(vnode, access, X_OK);
2208 
2209 		// Tell the filesystem to get the vnode of this path component (if we
2210 		// got the permission from the call above)
2211 		if (status == B_OK)
2212 			status = lookup_dir_entry(vnode, path, &nextVnode);
2213 
2214 		if (status != B_OK) {
2215 			put_vnode(vnode);
2216 			return status;
2217 		}
2218 
2219 		// If the new node is a symbolic link, resolve it (if we've been told
2220 		// to do it)
2221 		if (S_ISLNK(nextVnode->Type())
2222 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2223 			size_t bufferSize;
2224 			char* buffer;
2225 
2226 			TRACE(("traverse link\n"));
2227 
2228 			// it's not exactly nice style using goto in this way, but hey,
2229 			// it works :-/
2230 			if (count + 1 > B_MAX_SYMLINKS) {
2231 				status = B_LINK_LIMIT;
2232 				goto resolve_link_error;
2233 			}
2234 
2235 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2236 			if (buffer == NULL) {
2237 				status = B_NO_MEMORY;
2238 				goto resolve_link_error;
2239 			}
2240 
2241 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2242 				bufferSize--;
2243 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2244 				// null-terminate
2245 				if (status >= 0)
2246 					buffer[bufferSize] = '\0';
2247 			} else
2248 				status = B_BAD_VALUE;
2249 
2250 			if (status != B_OK) {
2251 				free(buffer);
2252 
2253 		resolve_link_error:
2254 				put_vnode(vnode);
2255 				put_vnode(nextVnode);
2256 
2257 				return status;
2258 			}
2259 			put_vnode(nextVnode);
2260 
2261 			// Check if we start from the root directory or the current
2262 			// directory ("vnode" still points to that one).
2263 			// Cut off all leading slashes if it's the root directory
2264 			path = buffer;
2265 			bool absoluteSymlink = false;
2266 			if (path[0] == '/') {
2267 				// we don't need the old directory anymore
2268 				put_vnode(vnode);
2269 
2270 				while (*++path == '/')
2271 					;
2272 
2273 				mutex_lock(&sIOContextRootLock);
2274 				vnode = ioContext->root;
2275 				inc_vnode_ref_count(vnode);
2276 				mutex_unlock(&sIOContextRootLock);
2277 
2278 				absoluteSymlink = true;
2279 			}
2280 
2281 			inc_vnode_ref_count(vnode);
2282 				// balance the next recursion - we will decrement the
2283 				// ref_count of the vnode, no matter if we succeeded or not
2284 
2285 			if (absoluteSymlink && *path == '\0') {
2286 				// symlink was just "/"
2287 				nextVnode = vnode;
2288 			} else {
2289 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2290 					ioContext, &nextVnode, &lastParentID);
2291 			}
2292 
2293 			free(buffer);
2294 
2295 			if (status != B_OK) {
2296 				put_vnode(vnode);
2297 				return status;
2298 			}
2299 		} else
2300 			lastParentID = vnode->id;
2301 
2302 		// decrease the ref count on the old dir we just looked up into
2303 		put_vnode(vnode);
2304 
2305 		path = nextPath;
2306 		vnode = nextVnode;
2307 
2308 		// see if we hit a covered node
2309 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2310 			put_vnode(vnode);
2311 			vnode = coveringNode;
2312 		}
2313 	}
2314 
2315 	*_vnode = vnode;
2316 	if (_parentID)
2317 		*_parentID = lastParentID;
2318 
2319 	return B_OK;
2320 }
2321 
2322 
2323 static status_t
2324 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2325 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2326 {
2327 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2328 		get_current_io_context(kernel), _vnode, _parentID);
2329 }
2330 
2331 
2332 static status_t
2333 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2334 	ino_t* _parentID, bool kernel)
2335 {
2336 	struct vnode* start = NULL;
2337 
2338 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2339 
2340 	if (!path)
2341 		return B_BAD_VALUE;
2342 
2343 	if (*path == '\0')
2344 		return B_ENTRY_NOT_FOUND;
2345 
2346 	// figure out if we need to start at root or at cwd
2347 	if (*path == '/') {
2348 		if (sRoot == NULL) {
2349 			// we're a bit early, aren't we?
2350 			return B_ERROR;
2351 		}
2352 
2353 		while (*++path == '/')
2354 			;
2355 		start = get_root_vnode(kernel);
2356 
2357 		if (*path == '\0') {
2358 			*_vnode = start;
2359 			return B_OK;
2360 		}
2361 
2362 	} else {
2363 		struct io_context* context = get_current_io_context(kernel);
2364 
2365 		mutex_lock(&context->io_mutex);
2366 		start = context->cwd;
2367 		if (start != NULL)
2368 			inc_vnode_ref_count(start);
2369 		mutex_unlock(&context->io_mutex);
2370 
2371 		if (start == NULL)
2372 			return B_ERROR;
2373 	}
2374 
2375 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2376 		_parentID);
2377 }
2378 
2379 
2380 /*! Returns the vnode in the next to last segment of the path, and returns
2381 	the last portion in filename.
2382 	The path buffer must be able to store at least one additional character.
2383 */
2384 static status_t
2385 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2386 	bool kernel)
2387 {
2388 	status_t status = get_dir_path_and_leaf(path, filename);
2389 	if (status != B_OK)
2390 		return status;
2391 
2392 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2393 }
2394 
2395 
2396 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2397 		   to by a FD + path pair.
2398 
2399 	\a path must be given in either case. \a fd might be omitted, in which
2400 	case \a path is either an absolute path or one relative to the current
2401 	directory. If both a supplied and \a path is relative it is reckoned off
2402 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2403 	ignored.
2404 
2405 	The caller has the responsibility to call put_vnode() on the returned
2406 	directory vnode.
2407 
2408 	\param fd The FD. May be < 0.
2409 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2410 	       is modified by this function. It must have at least room for a
2411 	       string one character longer than the path it contains.
2412 	\param _vnode A pointer to a variable the directory vnode shall be written
2413 		   into.
2414 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2415 		   the leaf name of the specified entry will be written.
2416 	\param kernel \c true, if invoked from inside the kernel, \c false if
2417 		   invoked from userland.
2418 	\return \c B_OK, if everything went fine, another error code otherwise.
2419 */
2420 static status_t
2421 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2422 	char* filename, bool kernel)
2423 {
2424 	if (!path)
2425 		return B_BAD_VALUE;
2426 	if (*path == '\0')
2427 		return B_ENTRY_NOT_FOUND;
2428 	if (fd < 0)
2429 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2430 
2431 	status_t status = get_dir_path_and_leaf(path, filename);
2432 	if (status != B_OK)
2433 		return status;
2434 
2435 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2436 }
2437 
2438 
2439 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2440 		   to by a vnode + path pair.
2441 
2442 	\a path must be given in either case. \a vnode might be omitted, in which
2443 	case \a path is either an absolute path or one relative to the current
2444 	directory. If both a supplied and \a path is relative it is reckoned off
2445 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2446 	ignored.
2447 
2448 	The caller has the responsibility to call put_vnode() on the returned
2449 	directory vnode.
2450 
2451 	\param vnode The vnode. May be \c NULL.
2452 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2453 	       is modified by this function. It must have at least room for a
2454 	       string one character longer than the path it contains.
2455 	\param _vnode A pointer to a variable the directory vnode shall be written
2456 		   into.
2457 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2458 		   the leaf name of the specified entry will be written.
2459 	\param kernel \c true, if invoked from inside the kernel, \c false if
2460 		   invoked from userland.
2461 	\return \c B_OK, if everything went fine, another error code otherwise.
2462 */
2463 static status_t
2464 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2465 	struct vnode** _vnode, char* filename, bool kernel)
2466 {
2467 	if (!path)
2468 		return B_BAD_VALUE;
2469 	if (*path == '\0')
2470 		return B_ENTRY_NOT_FOUND;
2471 	if (vnode == NULL || path[0] == '/')
2472 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2473 
2474 	status_t status = get_dir_path_and_leaf(path, filename);
2475 	if (status != B_OK)
2476 		return status;
2477 
2478 	inc_vnode_ref_count(vnode);
2479 		// vnode_path_to_vnode() always decrements the ref count
2480 
2481 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2482 }
2483 
2484 
2485 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2486 */
2487 static status_t
2488 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2489 	size_t bufferSize, struct io_context* ioContext)
2490 {
2491 	if (bufferSize < sizeof(struct dirent))
2492 		return B_BAD_VALUE;
2493 
2494 	// See if the vnode is covering another vnode and move to the covered
2495 	// vnode so we get the underlying file system
2496 	VNodePutter vnodePutter;
2497 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2498 		vnode = coveredVnode;
2499 		vnodePutter.SetTo(vnode);
2500 	}
2501 
2502 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2503 		// The FS supports getting the name of a vnode.
2504 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2505 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2506 			return B_OK;
2507 	}
2508 
2509 	// The FS doesn't support getting the name of a vnode. So we search the
2510 	// parent directory for the vnode, if the caller let us.
2511 
2512 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2513 		return B_UNSUPPORTED;
2514 
2515 	void* cookie;
2516 
2517 	status_t status = FS_CALL(parent, open_dir, &cookie);
2518 	if (status >= B_OK) {
2519 		while (true) {
2520 			uint32 num = 1;
2521 			// We use the FS hook directly instead of dir_read(), since we don't
2522 			// want the entries to be fixed. We have already resolved vnode to
2523 			// the covered node.
2524 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2525 				&num);
2526 			if (status != B_OK)
2527 				break;
2528 			if (num == 0) {
2529 				status = B_ENTRY_NOT_FOUND;
2530 				break;
2531 			}
2532 
2533 			if (vnode->id == buffer->d_ino) {
2534 				// found correct entry!
2535 				break;
2536 			}
2537 		}
2538 
2539 		FS_CALL(parent, close_dir, cookie);
2540 		FS_CALL(parent, free_dir_cookie, cookie);
2541 	}
2542 	return status;
2543 }
2544 
2545 
2546 static status_t
2547 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2548 	size_t nameSize, bool kernel)
2549 {
2550 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2551 	struct dirent* dirent = (struct dirent*)buffer;
2552 
2553 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2554 		get_current_io_context(kernel));
2555 	if (status != B_OK)
2556 		return status;
2557 
2558 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2559 		return B_BUFFER_OVERFLOW;
2560 
2561 	return B_OK;
2562 }
2563 
2564 
2565 /*!	Gets the full path to a given directory vnode.
2566 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2567 	file system doesn't support this call, it will fall back to iterating
2568 	through the parent directory to get the name of the child.
2569 
2570 	To protect against circular loops, it supports a maximum tree depth
2571 	of 256 levels.
2572 
2573 	Note that the path may not be correct the time this function returns!
2574 	It doesn't use any locking to prevent returning the correct path, as
2575 	paths aren't safe anyway: the path to a file can change at any time.
2576 
2577 	It might be a good idea, though, to check if the returned path exists
2578 	in the calling function (it's not done here because of efficiency)
2579 */
2580 static status_t
2581 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2582 	bool kernel)
2583 {
2584 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2585 
2586 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2587 		return B_BAD_VALUE;
2588 
2589 	if (!S_ISDIR(vnode->Type()))
2590 		return B_NOT_A_DIRECTORY;
2591 
2592 	char* path = buffer;
2593 	int32 insert = bufferSize;
2594 	int32 maxLevel = 256;
2595 	int32 length;
2596 	status_t status = B_OK;
2597 	struct io_context* ioContext = get_current_io_context(kernel);
2598 
2599 	// we don't use get_vnode() here because this call is more
2600 	// efficient and does all we need from get_vnode()
2601 	inc_vnode_ref_count(vnode);
2602 
2603 	path[--insert] = '\0';
2604 		// the path is filled right to left
2605 
2606 	while (true) {
2607 		// If the node is the context's root, bail out. Otherwise resolve mount
2608 		// points.
2609 		if (vnode == ioContext->root)
2610 			break;
2611 
2612 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2613 			put_vnode(vnode);
2614 			vnode = coveredVnode;
2615 		}
2616 
2617 		// lookup the parent vnode
2618 		struct vnode* parentVnode;
2619 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2620 		if (status != B_OK)
2621 			goto out;
2622 
2623 		if (parentVnode == vnode) {
2624 			// The caller apparently got their hands on a node outside of their
2625 			// context's root. Now we've hit the global root.
2626 			put_vnode(parentVnode);
2627 			break;
2628 		}
2629 
2630 		// get the node's name
2631 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2632 			// also used for fs_read_dir()
2633 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2634 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2635 			sizeof(nameBuffer), ioContext);
2636 
2637 		// release the current vnode, we only need its parent from now on
2638 		put_vnode(vnode);
2639 		vnode = parentVnode;
2640 
2641 		if (status != B_OK)
2642 			goto out;
2643 
2644 		// TODO: add an explicit check for loops in about 10 levels to do
2645 		// real loop detection
2646 
2647 		// don't go deeper as 'maxLevel' to prevent circular loops
2648 		if (maxLevel-- < 0) {
2649 			status = B_LINK_LIMIT;
2650 			goto out;
2651 		}
2652 
2653 		// add the name in front of the current path
2654 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2655 		length = strlen(name);
2656 		insert -= length;
2657 		if (insert <= 0) {
2658 			status = B_RESULT_NOT_REPRESENTABLE;
2659 			goto out;
2660 		}
2661 		memcpy(path + insert, name, length);
2662 		path[--insert] = '/';
2663 	}
2664 
2665 	// the root dir will result in an empty path: fix it
2666 	if (path[insert] == '\0')
2667 		path[--insert] = '/';
2668 
2669 	TRACE(("  path is: %s\n", path + insert));
2670 
2671 	// move the path to the start of the buffer
2672 	length = bufferSize - insert;
2673 	memmove(buffer, path + insert, length);
2674 
2675 out:
2676 	put_vnode(vnode);
2677 	return status;
2678 }
2679 
2680 
2681 /*!	Checks the length of every path component, and adds a '.'
2682 	if the path ends in a slash.
2683 	The given path buffer must be able to store at least one
2684 	additional character.
2685 */
2686 static status_t
2687 check_path(char* to)
2688 {
2689 	int32 length = 0;
2690 
2691 	// check length of every path component
2692 
2693 	while (*to) {
2694 		char* begin;
2695 		if (*to == '/')
2696 			to++, length++;
2697 
2698 		begin = to;
2699 		while (*to != '/' && *to)
2700 			to++, length++;
2701 
2702 		if (to - begin > B_FILE_NAME_LENGTH)
2703 			return B_NAME_TOO_LONG;
2704 	}
2705 
2706 	if (length == 0)
2707 		return B_ENTRY_NOT_FOUND;
2708 
2709 	// complete path if there is a slash at the end
2710 
2711 	if (*(to - 1) == '/') {
2712 		if (length > B_PATH_NAME_LENGTH - 2)
2713 			return B_NAME_TOO_LONG;
2714 
2715 		to[0] = '.';
2716 		to[1] = '\0';
2717 	}
2718 
2719 	return B_OK;
2720 }
2721 
2722 
2723 static struct file_descriptor*
2724 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2725 {
2726 	struct file_descriptor* descriptor
2727 		= get_fd(get_current_io_context(kernel), fd);
2728 	if (descriptor == NULL)
2729 		return NULL;
2730 
2731 	struct vnode* vnode = fd_vnode(descriptor);
2732 	if (vnode == NULL) {
2733 		put_fd(descriptor);
2734 		return NULL;
2735 	}
2736 
2737 	// ToDo: when we can close a file descriptor at any point, investigate
2738 	//	if this is still valid to do (accessing the vnode without ref_count
2739 	//	or locking)
2740 	*_vnode = vnode;
2741 	return descriptor;
2742 }
2743 
2744 
2745 static struct vnode*
2746 get_vnode_from_fd(int fd, bool kernel)
2747 {
2748 	struct file_descriptor* descriptor;
2749 	struct vnode* vnode;
2750 
2751 	descriptor = get_fd(get_current_io_context(kernel), fd);
2752 	if (descriptor == NULL)
2753 		return NULL;
2754 
2755 	vnode = fd_vnode(descriptor);
2756 	if (vnode != NULL)
2757 		inc_vnode_ref_count(vnode);
2758 
2759 	put_fd(descriptor);
2760 	return vnode;
2761 }
2762 
2763 
2764 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2765 	only the path will be considered. In this case, the \a path must not be
2766 	NULL.
2767 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2768 	and should be NULL for files.
2769 */
2770 static status_t
2771 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2772 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2773 {
2774 	if (fd < 0 && !path)
2775 		return B_BAD_VALUE;
2776 
2777 	if (path != NULL && *path == '\0')
2778 		return B_ENTRY_NOT_FOUND;
2779 
2780 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2781 		// no FD or absolute path
2782 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2783 	}
2784 
2785 	// FD only, or FD + relative path
2786 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2787 	if (vnode == NULL)
2788 		return B_FILE_ERROR;
2789 
2790 	if (path != NULL) {
2791 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2792 			_vnode, _parentID);
2793 	}
2794 
2795 	// there is no relative path to take into account
2796 
2797 	*_vnode = vnode;
2798 	if (_parentID)
2799 		*_parentID = -1;
2800 
2801 	return B_OK;
2802 }
2803 
2804 
2805 static int
2806 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2807 	void* cookie, int openMode, bool kernel)
2808 {
2809 	struct file_descriptor* descriptor;
2810 	int fd;
2811 
2812 	// If the vnode is locked, we don't allow creating a new file/directory
2813 	// file_descriptor for it
2814 	if (vnode && vnode->mandatory_locked_by != NULL
2815 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2816 		return B_BUSY;
2817 
2818 	descriptor = alloc_fd();
2819 	if (!descriptor)
2820 		return B_NO_MEMORY;
2821 
2822 	if (vnode)
2823 		descriptor->u.vnode = vnode;
2824 	else
2825 		descriptor->u.mount = mount;
2826 	descriptor->cookie = cookie;
2827 
2828 	switch (type) {
2829 		// vnode types
2830 		case FDTYPE_FILE:
2831 			descriptor->ops = &sFileOps;
2832 			break;
2833 		case FDTYPE_DIR:
2834 			descriptor->ops = &sDirectoryOps;
2835 			break;
2836 		case FDTYPE_ATTR:
2837 			descriptor->ops = &sAttributeOps;
2838 			break;
2839 		case FDTYPE_ATTR_DIR:
2840 			descriptor->ops = &sAttributeDirectoryOps;
2841 			break;
2842 
2843 		// mount types
2844 		case FDTYPE_INDEX_DIR:
2845 			descriptor->ops = &sIndexDirectoryOps;
2846 			break;
2847 		case FDTYPE_QUERY:
2848 			descriptor->ops = &sQueryOps;
2849 			break;
2850 
2851 		default:
2852 			panic("get_new_fd() called with unknown type %d\n", type);
2853 			break;
2854 	}
2855 	descriptor->type = type;
2856 	descriptor->open_mode = openMode;
2857 
2858 	io_context* context = get_current_io_context(kernel);
2859 	fd = new_fd(context, descriptor);
2860 	if (fd < 0) {
2861 		free(descriptor);
2862 		return B_NO_MORE_FDS;
2863 	}
2864 
2865 	mutex_lock(&context->io_mutex);
2866 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2867 	mutex_unlock(&context->io_mutex);
2868 
2869 	return fd;
2870 }
2871 
2872 
2873 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2874 	vfs_normalize_path(). See there for more documentation.
2875 */
2876 static status_t
2877 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2878 {
2879 	VNodePutter dirPutter;
2880 	struct vnode* dir = NULL;
2881 	status_t error;
2882 
2883 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2884 		// get dir vnode + leaf name
2885 		struct vnode* nextDir;
2886 		char leaf[B_FILE_NAME_LENGTH];
2887 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2888 		if (error != B_OK)
2889 			return error;
2890 
2891 		dir = nextDir;
2892 		strcpy(path, leaf);
2893 		dirPutter.SetTo(dir);
2894 
2895 		// get file vnode, if we shall resolve links
2896 		bool fileExists = false;
2897 		struct vnode* fileVnode;
2898 		VNodePutter fileVnodePutter;
2899 		if (traverseLink) {
2900 			inc_vnode_ref_count(dir);
2901 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2902 					NULL) == B_OK) {
2903 				fileVnodePutter.SetTo(fileVnode);
2904 				fileExists = true;
2905 			}
2906 		}
2907 
2908 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2909 			// we're done -- construct the path
2910 			bool hasLeaf = true;
2911 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2912 				// special cases "." and ".." -- get the dir, forget the leaf
2913 				inc_vnode_ref_count(dir);
2914 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2915 					&nextDir, NULL);
2916 				if (error != B_OK)
2917 					return error;
2918 				dir = nextDir;
2919 				dirPutter.SetTo(dir);
2920 				hasLeaf = false;
2921 			}
2922 
2923 			// get the directory path
2924 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2925 			if (error != B_OK)
2926 				return error;
2927 
2928 			// append the leaf name
2929 			if (hasLeaf) {
2930 				// insert a directory separator if this is not the file system
2931 				// root
2932 				if ((strcmp(path, "/") != 0
2933 					&& strlcat(path, "/", pathSize) >= pathSize)
2934 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2935 					return B_NAME_TOO_LONG;
2936 				}
2937 			}
2938 
2939 			return B_OK;
2940 		}
2941 
2942 		// read link
2943 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2944 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2945 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2946 			if (error != B_OK)
2947 				return error;
2948 			path[bufferSize] = '\0';
2949 		} else
2950 			return B_BAD_VALUE;
2951 	}
2952 
2953 	return B_LINK_LIMIT;
2954 }
2955 
2956 
2957 static status_t
2958 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2959 	struct io_context* ioContext)
2960 {
2961 	// Make sure the IO context root is not bypassed.
2962 	if (parent == ioContext->root) {
2963 		*_device = parent->device;
2964 		*_node = parent->id;
2965 		return B_OK;
2966 	}
2967 
2968 	inc_vnode_ref_count(parent);
2969 		// vnode_path_to_vnode() puts the node
2970 
2971 	// ".." is guaranteed not to be clobbered by this call
2972 	struct vnode* vnode;
2973 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2974 		ioContext, &vnode, NULL);
2975 	if (status == B_OK) {
2976 		*_device = vnode->device;
2977 		*_node = vnode->id;
2978 		put_vnode(vnode);
2979 	}
2980 
2981 	return status;
2982 }
2983 
2984 
2985 #ifdef ADD_DEBUGGER_COMMANDS
2986 
2987 
2988 static void
2989 _dump_advisory_locking(advisory_locking* locking)
2990 {
2991 	if (locking == NULL)
2992 		return;
2993 
2994 	kprintf("   lock:        %" B_PRId32, locking->lock);
2995 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2996 
2997 	int32 index = 0;
2998 	LockList::Iterator iterator = locking->locks.GetIterator();
2999 	while (iterator.HasNext()) {
3000 		struct advisory_lock* lock = iterator.Next();
3001 
3002 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3003 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3004 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3005 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3006 	}
3007 }
3008 
3009 
3010 static void
3011 _dump_mount(struct fs_mount* mount)
3012 {
3013 	kprintf("MOUNT: %p\n", mount);
3014 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3015 	kprintf(" device_name:   %s\n", mount->device_name);
3016 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3017 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3018 	kprintf(" partition:     %p\n", mount->partition);
3019 	kprintf(" lock:          %p\n", &mount->lock);
3020 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3021 		mount->owns_file_device ? " owns_file_device" : "");
3022 
3023 	fs_volume* volume = mount->volume;
3024 	while (volume != NULL) {
3025 		kprintf(" volume %p:\n", volume);
3026 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3027 		kprintf("  private_volume:   %p\n", volume->private_volume);
3028 		kprintf("  ops:              %p\n", volume->ops);
3029 		kprintf("  file_system:      %p\n", volume->file_system);
3030 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3031 		volume = volume->super_volume;
3032 	}
3033 
3034 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3035 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3036 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3037 	set_debug_variable("_partition", (addr_t)mount->partition);
3038 }
3039 
3040 
3041 static bool
3042 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3043 	const char* name)
3044 {
3045 	bool insertSlash = buffer[bufferSize] != '\0';
3046 	size_t nameLength = strlen(name);
3047 
3048 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3049 		return false;
3050 
3051 	if (insertSlash)
3052 		buffer[--bufferSize] = '/';
3053 
3054 	bufferSize -= nameLength;
3055 	memcpy(buffer + bufferSize, name, nameLength);
3056 
3057 	return true;
3058 }
3059 
3060 
3061 static bool
3062 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3063 	ino_t nodeID)
3064 {
3065 	if (bufferSize == 0)
3066 		return false;
3067 
3068 	bool insertSlash = buffer[bufferSize] != '\0';
3069 	if (insertSlash)
3070 		buffer[--bufferSize] = '/';
3071 
3072 	size_t size = snprintf(buffer, bufferSize,
3073 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3074 	if (size > bufferSize) {
3075 		if (insertSlash)
3076 			bufferSize++;
3077 		return false;
3078 	}
3079 
3080 	if (size < bufferSize)
3081 		memmove(buffer + bufferSize - size, buffer, size);
3082 
3083 	bufferSize -= size;
3084 	return true;
3085 }
3086 
3087 
3088 static char*
3089 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3090 	bool& _truncated)
3091 {
3092 	// null-terminate the path
3093 	buffer[--bufferSize] = '\0';
3094 
3095 	while (true) {
3096 		while (vnode->covers != NULL)
3097 			vnode = vnode->covers;
3098 
3099 		if (vnode == sRoot) {
3100 			_truncated = bufferSize == 0;
3101 			if (!_truncated)
3102 				buffer[--bufferSize] = '/';
3103 			return buffer + bufferSize;
3104 		}
3105 
3106 		// resolve the name
3107 		ino_t dirID;
3108 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3109 			vnode->id, dirID);
3110 		if (name == NULL) {
3111 			// Failed to resolve the name -- prepend "<dev,node>/".
3112 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3113 				vnode->mount->id, vnode->id);
3114 			return buffer + bufferSize;
3115 		}
3116 
3117 		// prepend the name
3118 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3119 			_truncated = true;
3120 			return buffer + bufferSize;
3121 		}
3122 
3123 		// resolve the directory node
3124 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3125 		if (nextVnode == NULL) {
3126 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3127 				vnode->mount->id, dirID);
3128 			return buffer + bufferSize;
3129 		}
3130 
3131 		vnode = nextVnode;
3132 	}
3133 }
3134 
3135 
3136 static void
3137 _dump_vnode(struct vnode* vnode, bool printPath)
3138 {
3139 	kprintf("VNODE: %p\n", vnode);
3140 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3141 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3142 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3143 	kprintf(" private_node:  %p\n", vnode->private_node);
3144 	kprintf(" mount:         %p\n", vnode->mount);
3145 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3146 	kprintf(" covers:        %p\n", vnode->covers);
3147 	kprintf(" cache:         %p\n", vnode->cache);
3148 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3149 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3150 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3151 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3152 
3153 	_dump_advisory_locking(vnode->advisory_locking);
3154 
3155 	if (printPath) {
3156 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3157 		if (buffer != NULL) {
3158 			bool truncated;
3159 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3160 				B_PATH_NAME_LENGTH, truncated);
3161 			if (path != NULL) {
3162 				kprintf(" path:          ");
3163 				if (truncated)
3164 					kputs("<truncated>/");
3165 				kputs(path);
3166 				kputs("\n");
3167 			} else
3168 				kprintf("Failed to resolve vnode path.\n");
3169 
3170 			debug_free(buffer);
3171 		} else
3172 			kprintf("Failed to allocate memory for constructing the path.\n");
3173 	}
3174 
3175 	set_debug_variable("_node", (addr_t)vnode->private_node);
3176 	set_debug_variable("_mount", (addr_t)vnode->mount);
3177 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3178 	set_debug_variable("_covers", (addr_t)vnode->covers);
3179 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3180 }
3181 
3182 
3183 static int
3184 dump_mount(int argc, char** argv)
3185 {
3186 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3187 		kprintf("usage: %s [id|address]\n", argv[0]);
3188 		return 0;
3189 	}
3190 
3191 	ulong val = parse_expression(argv[1]);
3192 	uint32 id = val;
3193 
3194 	struct fs_mount* mount = sMountsTable->Lookup(id);
3195 	if (mount == NULL) {
3196 		if (IS_USER_ADDRESS(id)) {
3197 			kprintf("fs_mount not found\n");
3198 			return 0;
3199 		}
3200 		mount = (fs_mount*)val;
3201 	}
3202 
3203 	_dump_mount(mount);
3204 	return 0;
3205 }
3206 
3207 
3208 static int
3209 dump_mounts(int argc, char** argv)
3210 {
3211 	if (argc != 1) {
3212 		kprintf("usage: %s\n", argv[0]);
3213 		return 0;
3214 	}
3215 
3216 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3217 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3218 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3219 
3220 	struct fs_mount* mount;
3221 
3222 	MountTable::Iterator iterator(sMountsTable);
3223 	while (iterator.HasNext()) {
3224 		mount = iterator.Next();
3225 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3226 			mount->root_vnode->covers, mount->volume->private_volume,
3227 			mount->volume->file_system_name);
3228 
3229 		fs_volume* volume = mount->volume;
3230 		while (volume->super_volume != NULL) {
3231 			volume = volume->super_volume;
3232 			kprintf("                                     %p %s\n",
3233 				volume->private_volume, volume->file_system_name);
3234 		}
3235 	}
3236 
3237 	return 0;
3238 }
3239 
3240 
3241 static int
3242 dump_vnode(int argc, char** argv)
3243 {
3244 	bool printPath = false;
3245 	int argi = 1;
3246 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3247 		printPath = true;
3248 		argi++;
3249 	}
3250 
3251 	if (argi >= argc || argi + 2 < argc) {
3252 		print_debugger_command_usage(argv[0]);
3253 		return 0;
3254 	}
3255 
3256 	struct vnode* vnode = NULL;
3257 
3258 	if (argi + 1 == argc) {
3259 		vnode = (struct vnode*)parse_expression(argv[argi]);
3260 		if (IS_USER_ADDRESS(vnode)) {
3261 			kprintf("invalid vnode address\n");
3262 			return 0;
3263 		}
3264 		_dump_vnode(vnode, printPath);
3265 		return 0;
3266 	}
3267 
3268 	dev_t device = parse_expression(argv[argi]);
3269 	ino_t id = parse_expression(argv[argi + 1]);
3270 
3271 	VnodeTable::Iterator iterator(sVnodeTable);
3272 	while (iterator.HasNext()) {
3273 		vnode = iterator.Next();
3274 		if (vnode->id != id || vnode->device != device)
3275 			continue;
3276 
3277 		_dump_vnode(vnode, printPath);
3278 	}
3279 
3280 	return 0;
3281 }
3282 
3283 
3284 static int
3285 dump_vnodes(int argc, char** argv)
3286 {
3287 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3288 		kprintf("usage: %s [device]\n", argv[0]);
3289 		return 0;
3290 	}
3291 
3292 	// restrict dumped nodes to a certain device if requested
3293 	dev_t device = parse_expression(argv[1]);
3294 
3295 	struct vnode* vnode;
3296 
3297 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3298 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3299 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3300 
3301 	VnodeTable::Iterator iterator(sVnodeTable);
3302 	while (iterator.HasNext()) {
3303 		vnode = iterator.Next();
3304 		if (vnode->device != device)
3305 			continue;
3306 
3307 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3308 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3309 			vnode->private_node, vnode->advisory_locking,
3310 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3311 			vnode->IsUnpublished() ? "u" : "-");
3312 	}
3313 
3314 	return 0;
3315 }
3316 
3317 
3318 static int
3319 dump_vnode_caches(int argc, char** argv)
3320 {
3321 	struct vnode* vnode;
3322 
3323 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3324 		kprintf("usage: %s [device]\n", argv[0]);
3325 		return 0;
3326 	}
3327 
3328 	// restrict dumped nodes to a certain device if requested
3329 	dev_t device = -1;
3330 	if (argc > 1)
3331 		device = parse_expression(argv[1]);
3332 
3333 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3334 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3335 
3336 	VnodeTable::Iterator iterator(sVnodeTable);
3337 	while (iterator.HasNext()) {
3338 		vnode = iterator.Next();
3339 		if (vnode->cache == NULL)
3340 			continue;
3341 		if (device != -1 && vnode->device != device)
3342 			continue;
3343 
3344 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3345 			vnode, vnode->device, vnode->id, vnode->cache,
3346 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3347 			vnode->cache->page_count);
3348 	}
3349 
3350 	return 0;
3351 }
3352 
3353 
3354 int
3355 dump_io_context(int argc, char** argv)
3356 {
3357 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3358 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3359 		return 0;
3360 	}
3361 
3362 	struct io_context* context = NULL;
3363 
3364 	if (argc > 1) {
3365 		ulong num = parse_expression(argv[1]);
3366 		if (IS_KERNEL_ADDRESS(num))
3367 			context = (struct io_context*)num;
3368 		else {
3369 			Team* team = team_get_team_struct_locked(num);
3370 			if (team == NULL) {
3371 				kprintf("could not find team with ID %lu\n", num);
3372 				return 0;
3373 			}
3374 			context = (struct io_context*)team->io_context;
3375 		}
3376 	} else
3377 		context = get_current_io_context(true);
3378 
3379 	kprintf("I/O CONTEXT: %p\n", context);
3380 	kprintf(" root vnode:\t%p\n", context->root);
3381 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3382 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3383 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3384 
3385 	if (context->num_used_fds) {
3386 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3387 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3388 	}
3389 
3390 	for (uint32 i = 0; i < context->table_size; i++) {
3391 		struct file_descriptor* fd = context->fds[i];
3392 		if (fd == NULL)
3393 			continue;
3394 
3395 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3396 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3397 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3398 			fd->pos, fd->cookie,
3399 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3400 				? "mount" : "vnode",
3401 			fd->u.vnode);
3402 	}
3403 
3404 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3405 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3406 
3407 	set_debug_variable("_cwd", (addr_t)context->cwd);
3408 
3409 	return 0;
3410 }
3411 
3412 
3413 int
3414 dump_vnode_usage(int argc, char** argv)
3415 {
3416 	if (argc != 1) {
3417 		kprintf("usage: %s\n", argv[0]);
3418 		return 0;
3419 	}
3420 
3421 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3422 		sUnusedVnodes, kMaxUnusedVnodes);
3423 
3424 	uint32 count = sVnodeTable->CountElements();
3425 
3426 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3427 		count - sUnusedVnodes);
3428 	return 0;
3429 }
3430 
3431 #endif	// ADD_DEBUGGER_COMMANDS
3432 
3433 
3434 /*!	Clears memory specified by an iovec array.
3435 */
3436 static void
3437 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3438 {
3439 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3440 		size_t length = std::min(vecs[i].iov_len, bytes);
3441 		memset(vecs[i].iov_base, 0, length);
3442 		bytes -= length;
3443 	}
3444 }
3445 
3446 
3447 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3448 	and calls the file system hooks to read/write the request to disk.
3449 */
3450 static status_t
3451 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3452 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3453 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3454 	bool doWrite)
3455 {
3456 	if (fileVecCount == 0) {
3457 		// There are no file vecs at this offset, so we're obviously trying
3458 		// to access the file outside of its bounds
3459 		return B_BAD_VALUE;
3460 	}
3461 
3462 	size_t numBytes = *_numBytes;
3463 	uint32 fileVecIndex;
3464 	size_t vecOffset = *_vecOffset;
3465 	uint32 vecIndex = *_vecIndex;
3466 	status_t status;
3467 	size_t size;
3468 
3469 	if (!doWrite && vecOffset == 0) {
3470 		// now directly read the data from the device
3471 		// the first file_io_vec can be read directly
3472 
3473 		if (fileVecs[0].length < (off_t)numBytes)
3474 			size = fileVecs[0].length;
3475 		else
3476 			size = numBytes;
3477 
3478 		if (fileVecs[0].offset >= 0) {
3479 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3480 				&vecs[vecIndex], vecCount - vecIndex, &size);
3481 		} else {
3482 			// sparse read
3483 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3484 			status = B_OK;
3485 		}
3486 		if (status != B_OK)
3487 			return status;
3488 
3489 		// TODO: this is a work-around for buggy device drivers!
3490 		//	When our own drivers honour the length, we can:
3491 		//	a) also use this direct I/O for writes (otherwise, it would
3492 		//	   overwrite precious data)
3493 		//	b) panic if the term below is true (at least for writes)
3494 		if ((off_t)size > fileVecs[0].length) {
3495 			//dprintf("warning: device driver %p doesn't respect total length "
3496 			//	"in read_pages() call!\n", ref->device);
3497 			size = fileVecs[0].length;
3498 		}
3499 
3500 		ASSERT((off_t)size <= fileVecs[0].length);
3501 
3502 		// If the file portion was contiguous, we're already done now
3503 		if (size == numBytes)
3504 			return B_OK;
3505 
3506 		// if we reached the end of the file, we can return as well
3507 		if ((off_t)size != fileVecs[0].length) {
3508 			*_numBytes = size;
3509 			return B_OK;
3510 		}
3511 
3512 		fileVecIndex = 1;
3513 
3514 		// first, find out where we have to continue in our iovecs
3515 		for (; vecIndex < vecCount; vecIndex++) {
3516 			if (size < vecs[vecIndex].iov_len)
3517 				break;
3518 
3519 			size -= vecs[vecIndex].iov_len;
3520 		}
3521 
3522 		vecOffset = size;
3523 	} else {
3524 		fileVecIndex = 0;
3525 		size = 0;
3526 	}
3527 
3528 	// Too bad, let's process the rest of the file_io_vecs
3529 
3530 	size_t totalSize = size;
3531 	size_t bytesLeft = numBytes - size;
3532 
3533 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3534 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3535 		off_t fileOffset = fileVec.offset;
3536 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3537 
3538 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3539 			fileLeft));
3540 
3541 		// process the complete fileVec
3542 		while (fileLeft > 0) {
3543 			iovec tempVecs[MAX_TEMP_IO_VECS];
3544 			uint32 tempCount = 0;
3545 
3546 			// size tracks how much of what is left of the current fileVec
3547 			// (fileLeft) has been assigned to tempVecs
3548 			size = 0;
3549 
3550 			// assign what is left of the current fileVec to the tempVecs
3551 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3552 					&& tempCount < MAX_TEMP_IO_VECS;) {
3553 				// try to satisfy one iovec per iteration (or as much as
3554 				// possible)
3555 
3556 				// bytes left of the current iovec
3557 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3558 				if (vecLeft == 0) {
3559 					vecOffset = 0;
3560 					vecIndex++;
3561 					continue;
3562 				}
3563 
3564 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3565 					vecIndex, vecOffset, size));
3566 
3567 				// actually available bytes
3568 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3569 
3570 				tempVecs[tempCount].iov_base
3571 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3572 				tempVecs[tempCount].iov_len = tempVecSize;
3573 				tempCount++;
3574 
3575 				size += tempVecSize;
3576 				vecOffset += tempVecSize;
3577 			}
3578 
3579 			size_t bytes = size;
3580 
3581 			if (fileOffset == -1) {
3582 				if (doWrite) {
3583 					panic("sparse write attempt: vnode %p", vnode);
3584 					status = B_IO_ERROR;
3585 				} else {
3586 					// sparse read
3587 					zero_iovecs(tempVecs, tempCount, bytes);
3588 					status = B_OK;
3589 				}
3590 			} else if (doWrite) {
3591 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3592 					tempVecs, tempCount, &bytes);
3593 			} else {
3594 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3595 					tempVecs, tempCount, &bytes);
3596 			}
3597 			if (status != B_OK)
3598 				return status;
3599 
3600 			totalSize += bytes;
3601 			bytesLeft -= size;
3602 			if (fileOffset >= 0)
3603 				fileOffset += size;
3604 			fileLeft -= size;
3605 			//dprintf("-> file left = %Lu\n", fileLeft);
3606 
3607 			if (size != bytes || vecIndex >= vecCount) {
3608 				// there are no more bytes or iovecs, let's bail out
3609 				*_numBytes = totalSize;
3610 				return B_OK;
3611 			}
3612 		}
3613 	}
3614 
3615 	*_vecIndex = vecIndex;
3616 	*_vecOffset = vecOffset;
3617 	*_numBytes = totalSize;
3618 	return B_OK;
3619 }
3620 
3621 
3622 static bool
3623 is_user_in_group(gid_t gid)
3624 {
3625 	if (gid == getegid())
3626 		return true;
3627 
3628 	gid_t groups[NGROUPS_MAX];
3629 	int groupCount = getgroups(NGROUPS_MAX, groups);
3630 	for (int i = 0; i < groupCount; i++) {
3631 		if (gid == groups[i])
3632 			return true;
3633 	}
3634 
3635 	return false;
3636 }
3637 
3638 
3639 static status_t
3640 free_io_context(io_context* context)
3641 {
3642 	uint32 i;
3643 
3644 	TIOC(FreeIOContext(context));
3645 
3646 	if (context->root)
3647 		put_vnode(context->root);
3648 
3649 	if (context->cwd)
3650 		put_vnode(context->cwd);
3651 
3652 	mutex_lock(&context->io_mutex);
3653 
3654 	for (i = 0; i < context->table_size; i++) {
3655 		if (struct file_descriptor* descriptor = context->fds[i]) {
3656 			close_fd(context, descriptor);
3657 			put_fd(descriptor);
3658 		}
3659 	}
3660 
3661 	mutex_destroy(&context->io_mutex);
3662 
3663 	remove_node_monitors(context);
3664 	free(context->fds);
3665 	free(context);
3666 
3667 	return B_OK;
3668 }
3669 
3670 
3671 static status_t
3672 resize_monitor_table(struct io_context* context, const int newSize)
3673 {
3674 	int	status = B_OK;
3675 
3676 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3677 		return B_BAD_VALUE;
3678 
3679 	mutex_lock(&context->io_mutex);
3680 
3681 	if ((size_t)newSize < context->num_monitors) {
3682 		status = B_BUSY;
3683 		goto out;
3684 	}
3685 	context->max_monitors = newSize;
3686 
3687 out:
3688 	mutex_unlock(&context->io_mutex);
3689 	return status;
3690 }
3691 
3692 
3693 //	#pragma mark - public API for file systems
3694 
3695 
3696 extern "C" status_t
3697 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3698 	fs_vnode_ops* ops)
3699 {
3700 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3701 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3702 
3703 	if (privateNode == NULL)
3704 		return B_BAD_VALUE;
3705 
3706 	int32 tries = BUSY_VNODE_RETRIES;
3707 restart:
3708 	// create the node
3709 	bool nodeCreated;
3710 	struct vnode* vnode;
3711 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3712 		nodeCreated);
3713 	if (status != B_OK)
3714 		return status;
3715 
3716 	WriteLocker nodeLocker(sVnodeLock, true);
3717 		// create_new_vnode_and_lock() has locked for us
3718 
3719 	if (!nodeCreated && vnode->IsBusy()) {
3720 		nodeLocker.Unlock();
3721 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3722 			return B_BUSY;
3723 		goto restart;
3724 	}
3725 
3726 	// file system integrity check:
3727 	// test if the vnode already exists and bail out if this is the case!
3728 	if (!nodeCreated) {
3729 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3730 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3731 			vnode->private_node);
3732 		return B_ERROR;
3733 	}
3734 
3735 	vnode->private_node = privateNode;
3736 	vnode->ops = ops;
3737 	vnode->SetUnpublished(true);
3738 
3739 	TRACE(("returns: %s\n", strerror(status)));
3740 
3741 	return status;
3742 }
3743 
3744 
3745 extern "C" status_t
3746 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3747 	fs_vnode_ops* ops, int type, uint32 flags)
3748 {
3749 	FUNCTION(("publish_vnode()\n"));
3750 
3751 	int32 tries = BUSY_VNODE_RETRIES;
3752 restart:
3753 	WriteLocker locker(sVnodeLock);
3754 
3755 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3756 
3757 	bool nodeCreated = false;
3758 	if (vnode == NULL) {
3759 		if (privateNode == NULL)
3760 			return B_BAD_VALUE;
3761 
3762 		// create the node
3763 		locker.Unlock();
3764 			// create_new_vnode_and_lock() will re-lock for us on success
3765 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3766 			nodeCreated);
3767 		if (status != B_OK)
3768 			return status;
3769 
3770 		locker.SetTo(sVnodeLock, true);
3771 	}
3772 
3773 	if (nodeCreated) {
3774 		vnode->private_node = privateNode;
3775 		vnode->ops = ops;
3776 		vnode->SetUnpublished(true);
3777 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3778 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3779 		// already known, but not published
3780 	} else if (vnode->IsBusy()) {
3781 		locker.Unlock();
3782 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3783 			return B_BUSY;
3784 		goto restart;
3785 	} else
3786 		return B_BAD_VALUE;
3787 
3788 	bool publishSpecialSubNode = false;
3789 
3790 	vnode->SetType(type);
3791 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3792 	publishSpecialSubNode = is_special_node_type(type)
3793 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3794 
3795 	status_t status = B_OK;
3796 
3797 	// create sub vnodes, if necessary
3798 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3799 		locker.Unlock();
3800 
3801 		fs_volume* subVolume = volume;
3802 		if (volume->sub_volume != NULL) {
3803 			while (status == B_OK && subVolume->sub_volume != NULL) {
3804 				subVolume = subVolume->sub_volume;
3805 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3806 					vnode);
3807 			}
3808 		}
3809 
3810 		if (status == B_OK && publishSpecialSubNode)
3811 			status = create_special_sub_node(vnode, flags);
3812 
3813 		if (status != B_OK) {
3814 			// error -- clean up the created sub vnodes
3815 			while (subVolume->super_volume != volume) {
3816 				subVolume = subVolume->super_volume;
3817 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3818 			}
3819 		}
3820 
3821 		if (status == B_OK) {
3822 			ReadLocker vnodesReadLocker(sVnodeLock);
3823 			AutoLocker<Vnode> nodeLocker(vnode);
3824 			vnode->SetBusy(false);
3825 			vnode->SetUnpublished(false);
3826 		} else {
3827 			locker.Lock();
3828 			sVnodeTable->Remove(vnode);
3829 			remove_vnode_from_mount_list(vnode, vnode->mount);
3830 			free(vnode);
3831 		}
3832 	} else {
3833 		// we still hold the write lock -- mark the node unbusy and published
3834 		vnode->SetBusy(false);
3835 		vnode->SetUnpublished(false);
3836 	}
3837 
3838 	TRACE(("returns: %s\n", strerror(status)));
3839 
3840 	return status;
3841 }
3842 
3843 
3844 extern "C" status_t
3845 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3846 {
3847 	struct vnode* vnode;
3848 
3849 	if (volume == NULL)
3850 		return B_BAD_VALUE;
3851 
3852 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3853 	if (status != B_OK)
3854 		return status;
3855 
3856 	// If this is a layered FS, we need to get the node cookie for the requested
3857 	// layer.
3858 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3859 		fs_vnode resolvedNode;
3860 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3861 			&resolvedNode);
3862 		if (status != B_OK) {
3863 			panic("get_vnode(): Failed to get super node for vnode %p, "
3864 				"volume: %p", vnode, volume);
3865 			put_vnode(vnode);
3866 			return status;
3867 		}
3868 
3869 		if (_privateNode != NULL)
3870 			*_privateNode = resolvedNode.private_node;
3871 	} else if (_privateNode != NULL)
3872 		*_privateNode = vnode->private_node;
3873 
3874 	return B_OK;
3875 }
3876 
3877 
3878 extern "C" status_t
3879 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3880 {
3881 	struct vnode* vnode;
3882 
3883 	rw_lock_read_lock(&sVnodeLock);
3884 	vnode = lookup_vnode(volume->id, vnodeID);
3885 	rw_lock_read_unlock(&sVnodeLock);
3886 
3887 	if (vnode == NULL)
3888 		return B_BAD_VALUE;
3889 
3890 	inc_vnode_ref_count(vnode);
3891 	return B_OK;
3892 }
3893 
3894 
3895 extern "C" status_t
3896 put_vnode(fs_volume* volume, ino_t vnodeID)
3897 {
3898 	struct vnode* vnode;
3899 
3900 	rw_lock_read_lock(&sVnodeLock);
3901 	vnode = lookup_vnode(volume->id, vnodeID);
3902 	rw_lock_read_unlock(&sVnodeLock);
3903 
3904 	if (vnode == NULL)
3905 		return B_BAD_VALUE;
3906 
3907 	dec_vnode_ref_count(vnode, false, true);
3908 	return B_OK;
3909 }
3910 
3911 
3912 extern "C" status_t
3913 remove_vnode(fs_volume* volume, ino_t vnodeID)
3914 {
3915 	ReadLocker locker(sVnodeLock);
3916 
3917 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3918 	if (vnode == NULL)
3919 		return B_ENTRY_NOT_FOUND;
3920 
3921 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3922 		// this vnode is in use
3923 		return B_BUSY;
3924 	}
3925 
3926 	vnode->Lock();
3927 
3928 	vnode->SetRemoved(true);
3929 	bool removeUnpublished = false;
3930 
3931 	if (vnode->IsUnpublished()) {
3932 		// prepare the vnode for deletion
3933 		removeUnpublished = true;
3934 		vnode->SetBusy(true);
3935 	}
3936 
3937 	vnode->Unlock();
3938 	locker.Unlock();
3939 
3940 	if (removeUnpublished) {
3941 		// If the vnode hasn't been published yet, we delete it here
3942 		atomic_add(&vnode->ref_count, -1);
3943 		free_vnode(vnode, true);
3944 	}
3945 
3946 	return B_OK;
3947 }
3948 
3949 
3950 extern "C" status_t
3951 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3952 {
3953 	struct vnode* vnode;
3954 
3955 	rw_lock_read_lock(&sVnodeLock);
3956 
3957 	vnode = lookup_vnode(volume->id, vnodeID);
3958 	if (vnode) {
3959 		AutoLocker<Vnode> nodeLocker(vnode);
3960 		vnode->SetRemoved(false);
3961 	}
3962 
3963 	rw_lock_read_unlock(&sVnodeLock);
3964 	return B_OK;
3965 }
3966 
3967 
3968 extern "C" status_t
3969 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3970 {
3971 	ReadLocker _(sVnodeLock);
3972 
3973 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3974 		if (_removed != NULL)
3975 			*_removed = vnode->IsRemoved();
3976 		return B_OK;
3977 	}
3978 
3979 	return B_BAD_VALUE;
3980 }
3981 
3982 
3983 extern "C" status_t
3984 mark_vnode_busy(fs_volume* volume, ino_t vnodeID, bool busy)
3985 {
3986 	ReadLocker locker(sVnodeLock);
3987 
3988 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3989 	if (vnode == NULL)
3990 		return B_ENTRY_NOT_FOUND;
3991 
3992 	// are we trying to mark an already busy node busy again?
3993 	if (busy && vnode->IsBusy())
3994 		return B_BUSY;
3995 
3996 	vnode->Lock();
3997 	vnode->SetBusy(busy);
3998 	vnode->Unlock();
3999 
4000 	return B_OK;
4001 }
4002 
4003 
4004 extern "C" status_t
4005 change_vnode_id(fs_volume* volume, ino_t vnodeID, ino_t newID)
4006 {
4007 	WriteLocker locker(sVnodeLock);
4008 
4009 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
4010 	if (vnode == NULL)
4011 		return B_ENTRY_NOT_FOUND;
4012 
4013 	sVnodeTable->Remove(vnode);
4014 	vnode->id = newID;
4015 	sVnodeTable->Insert(vnode);
4016 
4017 	if (vnode->cache != NULL)
4018 		((VMVnodeCache*)vnode->cache)->SetVnodeID(newID);
4019 
4020 	return B_OK;
4021 }
4022 
4023 
4024 extern "C" fs_volume*
4025 volume_for_vnode(fs_vnode* _vnode)
4026 {
4027 	if (_vnode == NULL)
4028 		return NULL;
4029 
4030 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
4031 	return vnode->mount->volume;
4032 }
4033 
4034 
4035 extern "C" status_t
4036 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
4037 	uid_t nodeUserID)
4038 {
4039 	// get node permissions
4040 	int userPermissions = (mode & S_IRWXU) >> 6;
4041 	int groupPermissions = (mode & S_IRWXG) >> 3;
4042 	int otherPermissions = mode & S_IRWXO;
4043 
4044 	// get the node permissions for this uid/gid
4045 	int permissions = 0;
4046 	uid_t uid = geteuid();
4047 
4048 	if (uid == 0) {
4049 		// user is root
4050 		// root has always read/write permission, but at least one of the
4051 		// X bits must be set for execute permission
4052 		permissions = userPermissions | groupPermissions | otherPermissions
4053 			| S_IROTH | S_IWOTH;
4054 		if (S_ISDIR(mode))
4055 			permissions |= S_IXOTH;
4056 	} else if (uid == nodeUserID) {
4057 		// user is node owner
4058 		permissions = userPermissions;
4059 	} else if (is_user_in_group(nodeGroupID)) {
4060 		// user is in owning group
4061 		permissions = groupPermissions;
4062 	} else {
4063 		// user is one of the others
4064 		permissions = otherPermissions;
4065 	}
4066 
4067 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4068 }
4069 
4070 
4071 #if 0
4072 extern "C" status_t
4073 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4074 	size_t* _numBytes)
4075 {
4076 	struct file_descriptor* descriptor;
4077 	struct vnode* vnode;
4078 
4079 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4080 	if (descriptor == NULL)
4081 		return B_FILE_ERROR;
4082 
4083 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4084 		count, 0, _numBytes);
4085 
4086 	put_fd(descriptor);
4087 	return status;
4088 }
4089 
4090 
4091 extern "C" status_t
4092 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4093 	size_t* _numBytes)
4094 {
4095 	struct file_descriptor* descriptor;
4096 	struct vnode* vnode;
4097 
4098 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4099 	if (descriptor == NULL)
4100 		return B_FILE_ERROR;
4101 
4102 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4103 		count, 0, _numBytes);
4104 
4105 	put_fd(descriptor);
4106 	return status;
4107 }
4108 #endif
4109 
4110 
4111 extern "C" status_t
4112 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4113 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4114 	size_t* _bytes)
4115 {
4116 	struct file_descriptor* descriptor;
4117 	struct vnode* vnode;
4118 
4119 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4120 	if (descriptor == NULL)
4121 		return B_FILE_ERROR;
4122 
4123 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4124 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4125 		false);
4126 
4127 	put_fd(descriptor);
4128 	return status;
4129 }
4130 
4131 
4132 extern "C" status_t
4133 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4134 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4135 	size_t* _bytes)
4136 {
4137 	struct file_descriptor* descriptor;
4138 	struct vnode* vnode;
4139 
4140 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4141 	if (descriptor == NULL)
4142 		return B_FILE_ERROR;
4143 
4144 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4145 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4146 		true);
4147 
4148 	put_fd(descriptor);
4149 	return status;
4150 }
4151 
4152 
4153 extern "C" status_t
4154 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4155 {
4156 	// lookup mount -- the caller is required to make sure that the mount
4157 	// won't go away
4158 	MutexLocker locker(sMountMutex);
4159 	struct fs_mount* mount = find_mount(mountID);
4160 	if (mount == NULL)
4161 		return B_BAD_VALUE;
4162 	locker.Unlock();
4163 
4164 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4165 }
4166 
4167 
4168 extern "C" status_t
4169 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4170 {
4171 	// lookup mount -- the caller is required to make sure that the mount
4172 	// won't go away
4173 	MutexLocker locker(sMountMutex);
4174 	struct fs_mount* mount = find_mount(mountID);
4175 	if (mount == NULL)
4176 		return B_BAD_VALUE;
4177 	locker.Unlock();
4178 
4179 	return mount->entry_cache.Add(dirID, name, -1, true);
4180 }
4181 
4182 
4183 extern "C" status_t
4184 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4185 {
4186 	// lookup mount -- the caller is required to make sure that the mount
4187 	// won't go away
4188 	MutexLocker locker(sMountMutex);
4189 	struct fs_mount* mount = find_mount(mountID);
4190 	if (mount == NULL)
4191 		return B_BAD_VALUE;
4192 	locker.Unlock();
4193 
4194 	return mount->entry_cache.Remove(dirID, name);
4195 }
4196 
4197 
4198 //	#pragma mark - private VFS API
4199 //	Functions the VFS exports for other parts of the kernel
4200 
4201 
4202 /*! Acquires another reference to the vnode that has to be released
4203 	by calling vfs_put_vnode().
4204 */
4205 void
4206 vfs_acquire_vnode(struct vnode* vnode)
4207 {
4208 	inc_vnode_ref_count(vnode);
4209 }
4210 
4211 
4212 /*! This is currently called from file_cache_create() only.
4213 	It's probably a temporary solution as long as devfs requires that
4214 	fs_read_pages()/fs_write_pages() are called with the standard
4215 	open cookie and not with a device cookie.
4216 	If that's done differently, remove this call; it has no other
4217 	purpose.
4218 */
4219 extern "C" status_t
4220 vfs_get_cookie_from_fd(int fd, void** _cookie)
4221 {
4222 	struct file_descriptor* descriptor;
4223 
4224 	descriptor = get_fd(get_current_io_context(true), fd);
4225 	if (descriptor == NULL)
4226 		return B_FILE_ERROR;
4227 
4228 	*_cookie = descriptor->cookie;
4229 	return B_OK;
4230 }
4231 
4232 
4233 extern "C" status_t
4234 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4235 {
4236 	*vnode = get_vnode_from_fd(fd, kernel);
4237 
4238 	if (*vnode == NULL)
4239 		return B_FILE_ERROR;
4240 
4241 	return B_NO_ERROR;
4242 }
4243 
4244 
4245 extern "C" status_t
4246 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4247 {
4248 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4249 		path, kernel));
4250 
4251 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4252 	if (pathBuffer.InitCheck() != B_OK)
4253 		return B_NO_MEMORY;
4254 
4255 	char* buffer = pathBuffer.LockBuffer();
4256 	strlcpy(buffer, path, pathBuffer.BufferSize());
4257 
4258 	struct vnode* vnode;
4259 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4260 	if (status != B_OK)
4261 		return status;
4262 
4263 	*_vnode = vnode;
4264 	return B_OK;
4265 }
4266 
4267 
4268 extern "C" status_t
4269 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4270 {
4271 	struct vnode* vnode = NULL;
4272 
4273 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4274 	if (status != B_OK)
4275 		return status;
4276 
4277 	*_vnode = vnode;
4278 	return B_OK;
4279 }
4280 
4281 
4282 extern "C" status_t
4283 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4284 	const char* name, struct vnode** _vnode)
4285 {
4286 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4287 }
4288 
4289 
4290 extern "C" void
4291 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4292 {
4293 	*_mountID = vnode->device;
4294 	*_vnodeID = vnode->id;
4295 }
4296 
4297 
4298 /*!
4299 	Helper function abstracting the process of "converting" a given
4300 	vnode-pointer to a fs_vnode-pointer.
4301 	Currently only used in bindfs.
4302 */
4303 extern "C" fs_vnode*
4304 vfs_fsnode_for_vnode(struct vnode* vnode)
4305 {
4306 	return vnode;
4307 }
4308 
4309 
4310 /*!
4311 	Calls fs_open() on the given vnode and returns a new
4312 	file descriptor for it
4313 */
4314 int
4315 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4316 {
4317 	return open_vnode(vnode, openMode, kernel);
4318 }
4319 
4320 
4321 /*!	Looks up a vnode with the given mount and vnode ID.
4322 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4323 	to the node.
4324 	It's currently only be used by file_cache_create().
4325 */
4326 extern "C" status_t
4327 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4328 {
4329 	rw_lock_read_lock(&sVnodeLock);
4330 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4331 	rw_lock_read_unlock(&sVnodeLock);
4332 
4333 	if (vnode == NULL)
4334 		return B_ERROR;
4335 
4336 	*_vnode = vnode;
4337 	return B_OK;
4338 }
4339 
4340 
4341 extern "C" status_t
4342 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4343 	bool traverseLeafLink, bool kernel, void** _node)
4344 {
4345 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4346 		volume, path, kernel));
4347 
4348 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4349 	if (pathBuffer.InitCheck() != B_OK)
4350 		return B_NO_MEMORY;
4351 
4352 	fs_mount* mount;
4353 	status_t status = get_mount(volume->id, &mount);
4354 	if (status != B_OK)
4355 		return status;
4356 
4357 	char* buffer = pathBuffer.LockBuffer();
4358 	strlcpy(buffer, path, pathBuffer.BufferSize());
4359 
4360 	struct vnode* vnode = mount->root_vnode;
4361 
4362 	if (buffer[0] == '/')
4363 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4364 	else {
4365 		inc_vnode_ref_count(vnode);
4366 			// vnode_path_to_vnode() releases a reference to the starting vnode
4367 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4368 			kernel, &vnode, NULL);
4369 	}
4370 
4371 	put_mount(mount);
4372 
4373 	if (status != B_OK)
4374 		return status;
4375 
4376 	if (vnode->device != volume->id) {
4377 		// wrong mount ID - must not gain access on foreign file system nodes
4378 		put_vnode(vnode);
4379 		return B_BAD_VALUE;
4380 	}
4381 
4382 	// Use get_vnode() to resolve the cookie for the right layer.
4383 	status = get_vnode(volume, vnode->id, _node);
4384 	put_vnode(vnode);
4385 
4386 	return status;
4387 }
4388 
4389 
4390 status_t
4391 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4392 	struct stat* stat, bool kernel)
4393 {
4394 	status_t status;
4395 
4396 	if (path != NULL) {
4397 		// path given: get the stat of the node referred to by (fd, path)
4398 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
4399 		if (pathBuffer.InitCheck() != B_OK)
4400 			return B_NO_MEMORY;
4401 
4402 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4403 			traverseLeafLink, stat, kernel);
4404 	} else {
4405 		// no path given: get the FD and use the FD operation
4406 		struct file_descriptor* descriptor
4407 			= get_fd(get_current_io_context(kernel), fd);
4408 		if (descriptor == NULL)
4409 			return B_FILE_ERROR;
4410 
4411 		if (descriptor->ops->fd_read_stat)
4412 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4413 		else
4414 			status = B_UNSUPPORTED;
4415 
4416 		put_fd(descriptor);
4417 	}
4418 
4419 	return status;
4420 }
4421 
4422 
4423 /*!	Finds the full path to the file that contains the module \a moduleName,
4424 	puts it into \a pathBuffer, and returns B_OK for success.
4425 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4426 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4427 	\a pathBuffer is clobbered in any case and must not be relied on if this
4428 	functions returns unsuccessfully.
4429 	\a basePath and \a pathBuffer must not point to the same space.
4430 */
4431 status_t
4432 vfs_get_module_path(const char* basePath, const char* moduleName,
4433 	char* pathBuffer, size_t bufferSize)
4434 {
4435 	struct vnode* dir;
4436 	struct vnode* file;
4437 	status_t status;
4438 	size_t length;
4439 	char* path;
4440 
4441 	if (bufferSize == 0
4442 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4443 		return B_BUFFER_OVERFLOW;
4444 
4445 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4446 	if (status != B_OK)
4447 		return status;
4448 
4449 	// the path buffer had been clobbered by the above call
4450 	length = strlcpy(pathBuffer, basePath, bufferSize);
4451 	if (pathBuffer[length - 1] != '/')
4452 		pathBuffer[length++] = '/';
4453 
4454 	path = pathBuffer + length;
4455 	bufferSize -= length;
4456 
4457 	while (moduleName) {
4458 		char* nextPath = strchr(moduleName, '/');
4459 		if (nextPath == NULL)
4460 			length = strlen(moduleName);
4461 		else {
4462 			length = nextPath - moduleName;
4463 			nextPath++;
4464 		}
4465 
4466 		if (length + 1 >= bufferSize) {
4467 			status = B_BUFFER_OVERFLOW;
4468 			goto err;
4469 		}
4470 
4471 		memcpy(path, moduleName, length);
4472 		path[length] = '\0';
4473 		moduleName = nextPath;
4474 
4475 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4476 		if (status != B_OK) {
4477 			// vnode_path_to_vnode() has already released the reference to dir
4478 			return status;
4479 		}
4480 
4481 		if (S_ISDIR(file->Type())) {
4482 			// goto the next directory
4483 			path[length] = '/';
4484 			path[length + 1] = '\0';
4485 			path += length + 1;
4486 			bufferSize -= length + 1;
4487 
4488 			dir = file;
4489 		} else if (S_ISREG(file->Type())) {
4490 			// it's a file so it should be what we've searched for
4491 			put_vnode(file);
4492 
4493 			return B_OK;
4494 		} else {
4495 			TRACE(("vfs_get_module_path(): something is strange here: "
4496 				"0x%08" B_PRIx32 "...\n", file->Type()));
4497 			status = B_ERROR;
4498 			dir = file;
4499 			goto err;
4500 		}
4501 	}
4502 
4503 	// if we got here, the moduleName just pointed to a directory, not to
4504 	// a real module - what should we do in this case?
4505 	status = B_ENTRY_NOT_FOUND;
4506 
4507 err:
4508 	put_vnode(dir);
4509 	return status;
4510 }
4511 
4512 
4513 /*!	\brief Normalizes a given path.
4514 
4515 	The path must refer to an existing or non-existing entry in an existing
4516 	directory, that is chopping off the leaf component the remaining path must
4517 	refer to an existing directory.
4518 
4519 	The returned will be canonical in that it will be absolute, will not
4520 	contain any "." or ".." components or duplicate occurrences of '/'s,
4521 	and none of the directory components will by symbolic links.
4522 
4523 	Any two paths referring to the same entry, will result in the same
4524 	normalized path (well, that is pretty much the definition of `normalized',
4525 	isn't it :-).
4526 
4527 	\param path The path to be normalized.
4528 	\param buffer The buffer into which the normalized path will be written.
4529 		   May be the same one as \a path.
4530 	\param bufferSize The size of \a buffer.
4531 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4532 	\param kernel \c true, if the IO context of the kernel shall be used,
4533 		   otherwise that of the team this thread belongs to. Only relevant,
4534 		   if the path is relative (to get the CWD).
4535 	\return \c B_OK if everything went fine, another error code otherwise.
4536 */
4537 status_t
4538 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4539 	bool traverseLink, bool kernel)
4540 {
4541 	if (!path || !buffer || bufferSize < 1)
4542 		return B_BAD_VALUE;
4543 
4544 	if (path != buffer) {
4545 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4546 			return B_BUFFER_OVERFLOW;
4547 	}
4548 
4549 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4550 }
4551 
4552 
4553 /*!	\brief Gets the parent of the passed in node.
4554 
4555 	Gets the parent of the passed in node, and correctly resolves covered
4556 	nodes.
4557 */
4558 extern "C" status_t
4559 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4560 {
4561 	return resolve_covered_parent(parent, device, node,
4562 		get_current_io_context(true));
4563 }
4564 
4565 
4566 /*!	\brief Creates a special node in the file system.
4567 
4568 	The caller gets a reference to the newly created node (which is passed
4569 	back through \a _createdVnode) and is responsible for releasing it.
4570 
4571 	\param path The path where to create the entry for the node. Can be \c NULL,
4572 		in which case the node is created without an entry in the root FS -- it
4573 		will automatically be deleted when the last reference has been released.
4574 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4575 		the target file system will just create the node with its standard
4576 		operations. Depending on the type of the node a subnode might be created
4577 		automatically, though.
4578 	\param mode The type and permissions for the node to be created.
4579 	\param flags Flags to be passed to the creating FS.
4580 	\param kernel \c true, if called in the kernel context (relevant only if
4581 		\a path is not \c NULL and not absolute).
4582 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4583 		file system creating the node, with the private data pointer and
4584 		operations for the super node. Can be \c NULL.
4585 	\param _createVnode Pointer to pre-allocated storage where to store the
4586 		pointer to the newly created node.
4587 	\return \c B_OK, if everything went fine, another error code otherwise.
4588 */
4589 status_t
4590 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4591 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4592 	struct vnode** _createdVnode)
4593 {
4594 	struct vnode* dirNode;
4595 	char _leaf[B_FILE_NAME_LENGTH];
4596 	char* leaf = NULL;
4597 
4598 	if (path) {
4599 		// We've got a path. Get the dir vnode and the leaf name.
4600 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4601 		if (tmpPathBuffer.InitCheck() != B_OK)
4602 			return B_NO_MEMORY;
4603 
4604 		char* tmpPath = tmpPathBuffer.LockBuffer();
4605 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4606 			return B_NAME_TOO_LONG;
4607 
4608 		// get the dir vnode and the leaf name
4609 		leaf = _leaf;
4610 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4611 		if (error != B_OK)
4612 			return error;
4613 	} else {
4614 		// No path. Create the node in the root FS.
4615 		dirNode = sRoot;
4616 		inc_vnode_ref_count(dirNode);
4617 	}
4618 
4619 	VNodePutter _(dirNode);
4620 
4621 	// check support for creating special nodes
4622 	if (!HAS_FS_CALL(dirNode, create_special_node))
4623 		return B_UNSUPPORTED;
4624 
4625 	// create the node
4626 	fs_vnode superVnode;
4627 	ino_t nodeID;
4628 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4629 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4630 	if (status != B_OK)
4631 		return status;
4632 
4633 	// lookup the node
4634 	rw_lock_read_lock(&sVnodeLock);
4635 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4636 	rw_lock_read_unlock(&sVnodeLock);
4637 
4638 	if (*_createdVnode == NULL) {
4639 		panic("vfs_create_special_node(): lookup of node failed");
4640 		return B_ERROR;
4641 	}
4642 
4643 	return B_OK;
4644 }
4645 
4646 
4647 extern "C" void
4648 vfs_put_vnode(struct vnode* vnode)
4649 {
4650 	put_vnode(vnode);
4651 }
4652 
4653 
4654 extern "C" status_t
4655 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4656 {
4657 	// Get current working directory from io context
4658 	struct io_context* context = get_current_io_context(false);
4659 	status_t status = B_OK;
4660 
4661 	mutex_lock(&context->io_mutex);
4662 
4663 	if (context->cwd != NULL) {
4664 		*_mountID = context->cwd->device;
4665 		*_vnodeID = context->cwd->id;
4666 	} else
4667 		status = B_ERROR;
4668 
4669 	mutex_unlock(&context->io_mutex);
4670 	return status;
4671 }
4672 
4673 
4674 status_t
4675 vfs_unmount(dev_t mountID, uint32 flags)
4676 {
4677 	return fs_unmount(NULL, mountID, flags, true);
4678 }
4679 
4680 
4681 extern "C" status_t
4682 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4683 {
4684 	struct vnode* vnode;
4685 
4686 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4687 	if (status != B_OK)
4688 		return status;
4689 
4690 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4691 	put_vnode(vnode);
4692 	return B_OK;
4693 }
4694 
4695 
4696 extern "C" void
4697 vfs_free_unused_vnodes(int32 level)
4698 {
4699 	vnode_low_resource_handler(NULL,
4700 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4701 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4702 		level);
4703 }
4704 
4705 
4706 extern "C" bool
4707 vfs_can_page(struct vnode* vnode, void* cookie)
4708 {
4709 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4710 
4711 	if (HAS_FS_CALL(vnode, can_page))
4712 		return FS_CALL(vnode, can_page, cookie);
4713 	return false;
4714 }
4715 
4716 
4717 extern "C" status_t
4718 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4719 	const generic_io_vec* vecs, size_t count, uint32 flags,
4720 	generic_size_t* _numBytes)
4721 {
4722 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4723 		vecs, pos));
4724 
4725 #if VFS_PAGES_IO_TRACING
4726 	generic_size_t bytesRequested = *_numBytes;
4727 #endif
4728 
4729 	IORequest request;
4730 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4731 	if (status == B_OK) {
4732 		status = vfs_vnode_io(vnode, cookie, &request);
4733 		if (status == B_OK)
4734 			status = request.Wait();
4735 		*_numBytes = request.TransferredBytes();
4736 	}
4737 
4738 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4739 		status, *_numBytes));
4740 
4741 	return status;
4742 }
4743 
4744 
4745 extern "C" status_t
4746 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4747 	const generic_io_vec* vecs, size_t count, uint32 flags,
4748 	generic_size_t* _numBytes)
4749 {
4750 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4751 		vecs, pos));
4752 
4753 #if VFS_PAGES_IO_TRACING
4754 	generic_size_t bytesRequested = *_numBytes;
4755 #endif
4756 
4757 	IORequest request;
4758 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4759 	if (status == B_OK) {
4760 		status = vfs_vnode_io(vnode, cookie, &request);
4761 		if (status == B_OK)
4762 			status = request.Wait();
4763 		*_numBytes = request.TransferredBytes();
4764 	}
4765 
4766 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4767 		status, *_numBytes));
4768 
4769 	return status;
4770 }
4771 
4772 
4773 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4774 	created if \a allocate is \c true.
4775 	In case it's successful, it will also grab a reference to the cache
4776 	it returns.
4777 */
4778 extern "C" status_t
4779 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4780 {
4781 	if (vnode->cache != NULL) {
4782 		vnode->cache->AcquireRef();
4783 		*_cache = vnode->cache;
4784 		return B_OK;
4785 	}
4786 
4787 	rw_lock_read_lock(&sVnodeLock);
4788 	vnode->Lock();
4789 
4790 	status_t status = B_OK;
4791 
4792 	// The cache could have been created in the meantime
4793 	if (vnode->cache == NULL) {
4794 		if (allocate) {
4795 			// TODO: actually the vnode needs to be busy already here, or
4796 			//	else this won't work...
4797 			bool wasBusy = vnode->IsBusy();
4798 			vnode->SetBusy(true);
4799 
4800 			vnode->Unlock();
4801 			rw_lock_read_unlock(&sVnodeLock);
4802 
4803 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4804 
4805 			rw_lock_read_lock(&sVnodeLock);
4806 			vnode->Lock();
4807 			vnode->SetBusy(wasBusy);
4808 		} else
4809 			status = B_BAD_VALUE;
4810 	}
4811 
4812 	vnode->Unlock();
4813 	rw_lock_read_unlock(&sVnodeLock);
4814 
4815 	if (status == B_OK) {
4816 		vnode->cache->AcquireRef();
4817 		*_cache = vnode->cache;
4818 	}
4819 
4820 	return status;
4821 }
4822 
4823 
4824 status_t
4825 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4826 	file_io_vec* vecs, size_t* _count)
4827 {
4828 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4829 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4830 
4831 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4832 }
4833 
4834 
4835 status_t
4836 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4837 {
4838 	status_t status = FS_CALL(vnode, read_stat, stat);
4839 
4840 	// fill in the st_dev and st_ino fields
4841 	if (status == B_OK) {
4842 		stat->st_dev = vnode->device;
4843 		stat->st_ino = vnode->id;
4844 		// the rdev field must stay unset for non-special files
4845 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4846 			stat->st_rdev = -1;
4847 	}
4848 
4849 	return status;
4850 }
4851 
4852 
4853 status_t
4854 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4855 {
4856 	struct vnode* vnode;
4857 	status_t status = get_vnode(device, inode, &vnode, true, false);
4858 	if (status != B_OK)
4859 		return status;
4860 
4861 	status = vfs_stat_vnode(vnode, stat);
4862 
4863 	put_vnode(vnode);
4864 	return status;
4865 }
4866 
4867 
4868 status_t
4869 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4870 {
4871 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4872 }
4873 
4874 
4875 status_t
4876 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4877 	bool kernel, char* path, size_t pathLength)
4878 {
4879 	struct vnode* vnode;
4880 	status_t status;
4881 
4882 	// filter invalid leaf names
4883 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4884 		return B_BAD_VALUE;
4885 
4886 	// get the vnode matching the dir's node_ref
4887 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4888 		// special cases "." and "..": we can directly get the vnode of the
4889 		// referenced directory
4890 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4891 		leaf = NULL;
4892 	} else
4893 		status = get_vnode(device, inode, &vnode, true, false);
4894 	if (status != B_OK)
4895 		return status;
4896 
4897 	// get the directory path
4898 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4899 	put_vnode(vnode);
4900 		// we don't need the vnode anymore
4901 	if (status != B_OK)
4902 		return status;
4903 
4904 	// append the leaf name
4905 	if (leaf) {
4906 		// insert a directory separator if this is not the file system root
4907 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4908 				>= pathLength)
4909 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4910 			return B_NAME_TOO_LONG;
4911 		}
4912 	}
4913 
4914 	return B_OK;
4915 }
4916 
4917 
4918 /*!	If the given descriptor locked its vnode, that lock will be released. */
4919 void
4920 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4921 {
4922 	struct vnode* vnode = fd_vnode(descriptor);
4923 
4924 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4925 		vnode->mandatory_locked_by = NULL;
4926 }
4927 
4928 
4929 /*!	Releases any POSIX locks on the file descriptor. */
4930 status_t
4931 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4932 {
4933 	struct vnode* vnode = descriptor->u.vnode;
4934 	if (vnode == NULL)
4935 		return B_OK;
4936 
4937 	if (HAS_FS_CALL(vnode, release_lock))
4938 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4939 
4940 	return release_advisory_lock(vnode, context, NULL, NULL);
4941 }
4942 
4943 
4944 /*!	Closes all file descriptors of the specified I/O context that
4945 	have the O_CLOEXEC flag set.
4946 */
4947 void
4948 vfs_exec_io_context(io_context* context)
4949 {
4950 	uint32 i;
4951 
4952 	for (i = 0; i < context->table_size; i++) {
4953 		mutex_lock(&context->io_mutex);
4954 
4955 		struct file_descriptor* descriptor = context->fds[i];
4956 		bool remove = false;
4957 
4958 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4959 			context->fds[i] = NULL;
4960 			context->num_used_fds--;
4961 
4962 			remove = true;
4963 		}
4964 
4965 		mutex_unlock(&context->io_mutex);
4966 
4967 		if (remove) {
4968 			close_fd(context, descriptor);
4969 			put_fd(descriptor);
4970 		}
4971 	}
4972 }
4973 
4974 
4975 /*! Sets up a new io_control structure, and inherits the properties
4976 	of the parent io_control if it is given.
4977 */
4978 io_context*
4979 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4980 {
4981 	io_context* context = (io_context*)malloc(sizeof(io_context));
4982 	if (context == NULL)
4983 		return NULL;
4984 
4985 	TIOC(NewIOContext(context, parentContext));
4986 
4987 	memset(context, 0, sizeof(io_context));
4988 	context->ref_count = 1;
4989 
4990 	MutexLocker parentLocker;
4991 
4992 	size_t tableSize;
4993 	if (parentContext != NULL) {
4994 		parentLocker.SetTo(parentContext->io_mutex, false);
4995 		tableSize = parentContext->table_size;
4996 	} else
4997 		tableSize = DEFAULT_FD_TABLE_SIZE;
4998 
4999 	// allocate space for FDs and their close-on-exec flag
5000 	context->fds = (file_descriptor**)malloc(
5001 		sizeof(struct file_descriptor*) * tableSize
5002 		+ sizeof(struct select_sync*) * tableSize
5003 		+ (tableSize + 7) / 8);
5004 	if (context->fds == NULL) {
5005 		free(context);
5006 		return NULL;
5007 	}
5008 
5009 	context->select_infos = (select_info**)(context->fds + tableSize);
5010 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
5011 
5012 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
5013 		+ sizeof(struct select_sync*) * tableSize
5014 		+ (tableSize + 7) / 8);
5015 
5016 	mutex_init(&context->io_mutex, "I/O context");
5017 
5018 	// Copy all parent file descriptors
5019 
5020 	if (parentContext != NULL) {
5021 		size_t i;
5022 
5023 		mutex_lock(&sIOContextRootLock);
5024 		context->root = parentContext->root;
5025 		if (context->root)
5026 			inc_vnode_ref_count(context->root);
5027 		mutex_unlock(&sIOContextRootLock);
5028 
5029 		context->cwd = parentContext->cwd;
5030 		if (context->cwd)
5031 			inc_vnode_ref_count(context->cwd);
5032 
5033 		if (parentContext->inherit_fds) {
5034 			for (i = 0; i < tableSize; i++) {
5035 				struct file_descriptor* descriptor = parentContext->fds[i];
5036 
5037 				if (descriptor != NULL
5038 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
5039 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5040 					if (closeOnExec && purgeCloseOnExec)
5041 						continue;
5042 
5043 					TFD(InheritFD(context, i, descriptor, parentContext));
5044 
5045 					context->fds[i] = descriptor;
5046 					context->num_used_fds++;
5047 					atomic_add(&descriptor->ref_count, 1);
5048 					atomic_add(&descriptor->open_count, 1);
5049 
5050 					if (closeOnExec)
5051 						fd_set_close_on_exec(context, i, true);
5052 				}
5053 			}
5054 		}
5055 
5056 		parentLocker.Unlock();
5057 	} else {
5058 		context->root = sRoot;
5059 		context->cwd = sRoot;
5060 
5061 		if (context->root)
5062 			inc_vnode_ref_count(context->root);
5063 
5064 		if (context->cwd)
5065 			inc_vnode_ref_count(context->cwd);
5066 	}
5067 
5068 	context->table_size = tableSize;
5069 	context->inherit_fds = parentContext != NULL;
5070 
5071 	list_init(&context->node_monitors);
5072 	context->max_monitors = DEFAULT_NODE_MONITORS;
5073 
5074 	return context;
5075 }
5076 
5077 
5078 void
5079 vfs_get_io_context(io_context* context)
5080 {
5081 	atomic_add(&context->ref_count, 1);
5082 }
5083 
5084 
5085 void
5086 vfs_put_io_context(io_context* context)
5087 {
5088 	if (atomic_add(&context->ref_count, -1) == 1)
5089 		free_io_context(context);
5090 }
5091 
5092 
5093 status_t
5094 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5095 {
5096 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5097 		return B_BAD_VALUE;
5098 
5099 	TIOC(ResizeIOContext(context, newSize));
5100 
5101 	MutexLocker _(context->io_mutex);
5102 
5103 	uint32 oldSize = context->table_size;
5104 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5105 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5106 
5107 	// If the tables shrink, make sure none of the fds being dropped are in use.
5108 	if (newSize < oldSize) {
5109 		for (uint32 i = oldSize; i-- > newSize;) {
5110 			if (context->fds[i])
5111 				return B_BUSY;
5112 		}
5113 	}
5114 
5115 	// store pointers to the old tables
5116 	file_descriptor** oldFDs = context->fds;
5117 	select_info** oldSelectInfos = context->select_infos;
5118 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5119 
5120 	// allocate new tables
5121 	file_descriptor** newFDs = (file_descriptor**)malloc(
5122 		sizeof(struct file_descriptor*) * newSize
5123 		+ sizeof(struct select_sync*) * newSize
5124 		+ newCloseOnExitBitmapSize);
5125 	if (newFDs == NULL)
5126 		return B_NO_MEMORY;
5127 
5128 	context->fds = newFDs;
5129 	context->select_infos = (select_info**)(context->fds + newSize);
5130 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5131 	context->table_size = newSize;
5132 
5133 	// copy entries from old tables
5134 	uint32 toCopy = min_c(oldSize, newSize);
5135 
5136 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5137 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5138 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5139 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5140 
5141 	// clear additional entries, if the tables grow
5142 	if (newSize > oldSize) {
5143 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5144 		memset(context->select_infos + oldSize, 0,
5145 			sizeof(void*) * (newSize - oldSize));
5146 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5147 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5148 	}
5149 
5150 	free(oldFDs);
5151 
5152 	return B_OK;
5153 }
5154 
5155 
5156 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5157 
5158 	Given an arbitrary vnode (identified by mount and node ID), the function
5159 	checks, whether the vnode is covered by another vnode. If it is, the
5160 	function returns the mount and node ID of the covering vnode. Otherwise
5161 	it simply returns the supplied mount and node ID.
5162 
5163 	In case of error (e.g. the supplied node could not be found) the variables
5164 	for storing the resolved mount and node ID remain untouched and an error
5165 	code is returned.
5166 
5167 	\param mountID The mount ID of the vnode in question.
5168 	\param nodeID The node ID of the vnode in question.
5169 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5170 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5171 	\return
5172 	- \c B_OK, if everything went fine,
5173 	- another error code, if something went wrong.
5174 */
5175 status_t
5176 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5177 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5178 {
5179 	// get the node
5180 	struct vnode* node;
5181 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5182 	if (error != B_OK)
5183 		return error;
5184 
5185 	// resolve the node
5186 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5187 		put_vnode(node);
5188 		node = coveringNode;
5189 	}
5190 
5191 	// set the return values
5192 	*resolvedMountID = node->device;
5193 	*resolvedNodeID = node->id;
5194 
5195 	put_vnode(node);
5196 
5197 	return B_OK;
5198 }
5199 
5200 
5201 status_t
5202 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5203 	ino_t* _mountPointNodeID)
5204 {
5205 	ReadLocker nodeLocker(sVnodeLock);
5206 	MutexLocker mountLocker(sMountMutex);
5207 
5208 	struct fs_mount* mount = find_mount(mountID);
5209 	if (mount == NULL)
5210 		return B_BAD_VALUE;
5211 
5212 	Vnode* mountPoint = mount->covers_vnode;
5213 
5214 	*_mountPointMountID = mountPoint->device;
5215 	*_mountPointNodeID = mountPoint->id;
5216 
5217 	return B_OK;
5218 }
5219 
5220 
5221 status_t
5222 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5223 	ino_t coveredNodeID)
5224 {
5225 	// get the vnodes
5226 	Vnode* vnode;
5227 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5228 	if (error != B_OK)
5229 		return B_BAD_VALUE;
5230 	VNodePutter vnodePutter(vnode);
5231 
5232 	Vnode* coveredVnode;
5233 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5234 		false);
5235 	if (error != B_OK)
5236 		return B_BAD_VALUE;
5237 	VNodePutter coveredVnodePutter(coveredVnode);
5238 
5239 	// establish the covered/covering links
5240 	WriteLocker locker(sVnodeLock);
5241 
5242 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5243 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5244 		return B_BUSY;
5245 	}
5246 
5247 	vnode->covers = coveredVnode;
5248 	vnode->SetCovering(true);
5249 
5250 	coveredVnode->covered_by = vnode;
5251 	coveredVnode->SetCovered(true);
5252 
5253 	// the vnodes do now reference each other
5254 	inc_vnode_ref_count(vnode);
5255 	inc_vnode_ref_count(coveredVnode);
5256 
5257 	return B_OK;
5258 }
5259 
5260 
5261 int
5262 vfs_getrlimit(int resource, struct rlimit* rlp)
5263 {
5264 	if (!rlp)
5265 		return B_BAD_ADDRESS;
5266 
5267 	switch (resource) {
5268 		case RLIMIT_NOFILE:
5269 		{
5270 			struct io_context* context = get_current_io_context(false);
5271 			MutexLocker _(context->io_mutex);
5272 
5273 			rlp->rlim_cur = context->table_size;
5274 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5275 			return 0;
5276 		}
5277 
5278 		case RLIMIT_NOVMON:
5279 		{
5280 			struct io_context* context = get_current_io_context(false);
5281 			MutexLocker _(context->io_mutex);
5282 
5283 			rlp->rlim_cur = context->max_monitors;
5284 			rlp->rlim_max = MAX_NODE_MONITORS;
5285 			return 0;
5286 		}
5287 
5288 		default:
5289 			return B_BAD_VALUE;
5290 	}
5291 }
5292 
5293 
5294 int
5295 vfs_setrlimit(int resource, const struct rlimit* rlp)
5296 {
5297 	if (!rlp)
5298 		return B_BAD_ADDRESS;
5299 
5300 	switch (resource) {
5301 		case RLIMIT_NOFILE:
5302 			/* TODO: check getuid() */
5303 			if (rlp->rlim_max != RLIM_SAVED_MAX
5304 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5305 				return B_NOT_ALLOWED;
5306 
5307 			return vfs_resize_fd_table(get_current_io_context(false),
5308 				rlp->rlim_cur);
5309 
5310 		case RLIMIT_NOVMON:
5311 			/* TODO: check getuid() */
5312 			if (rlp->rlim_max != RLIM_SAVED_MAX
5313 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5314 				return B_NOT_ALLOWED;
5315 
5316 			return resize_monitor_table(get_current_io_context(false),
5317 				rlp->rlim_cur);
5318 
5319 		default:
5320 			return B_BAD_VALUE;
5321 	}
5322 }
5323 
5324 
5325 status_t
5326 vfs_init(kernel_args* args)
5327 {
5328 	vnode::StaticInit();
5329 
5330 	sVnodeTable = new(std::nothrow) VnodeTable();
5331 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5332 		panic("vfs_init: error creating vnode hash table\n");
5333 
5334 	struct vnode dummy_vnode;
5335 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5336 
5337 	struct fs_mount dummyMount;
5338 	sMountsTable = new(std::nothrow) MountTable();
5339 	if (sMountsTable == NULL
5340 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5341 		panic("vfs_init: error creating mounts hash table\n");
5342 
5343 	node_monitor_init();
5344 
5345 	sRoot = NULL;
5346 
5347 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5348 
5349 	if (block_cache_init() != B_OK)
5350 		return B_ERROR;
5351 
5352 #ifdef ADD_DEBUGGER_COMMANDS
5353 	// add some debugger commands
5354 	add_debugger_command_etc("vnode", &dump_vnode,
5355 		"Print info about the specified vnode",
5356 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5357 		"Prints information about the vnode specified by address <vnode> or\n"
5358 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5359 		"constructed and printed. It might not be possible to construct a\n"
5360 		"complete path, though.\n",
5361 		0);
5362 	add_debugger_command("vnodes", &dump_vnodes,
5363 		"list all vnodes (from the specified device)");
5364 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5365 		"list all vnode caches");
5366 	add_debugger_command("mount", &dump_mount,
5367 		"info about the specified fs_mount");
5368 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5369 	add_debugger_command("io_context", &dump_io_context,
5370 		"info about the I/O context");
5371 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5372 		"info about vnode usage");
5373 #endif
5374 
5375 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5376 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5377 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5378 		0);
5379 
5380 	fifo_init();
5381 	file_map_init();
5382 
5383 	return file_cache_init();
5384 }
5385 
5386 
5387 //	#pragma mark - fd_ops implementations
5388 
5389 
5390 /*!
5391 	Calls fs_open() on the given vnode and returns a new
5392 	file descriptor for it
5393 */
5394 static int
5395 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5396 {
5397 	void* cookie;
5398 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5399 	if (status != B_OK)
5400 		return status;
5401 
5402 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5403 	if (fd < 0) {
5404 		FS_CALL(vnode, close, cookie);
5405 		FS_CALL(vnode, free_cookie, cookie);
5406 	}
5407 	return fd;
5408 }
5409 
5410 
5411 /*!
5412 	Calls fs_open() on the given vnode and returns a new
5413 	file descriptor for it
5414 */
5415 static int
5416 create_vnode(struct vnode* directory, const char* name, int openMode,
5417 	int perms, bool kernel)
5418 {
5419 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5420 	status_t status = B_ERROR;
5421 	struct vnode* vnode;
5422 	void* cookie;
5423 	ino_t newID;
5424 
5425 	// This is somewhat tricky: If the entry already exists, the FS responsible
5426 	// for the directory might not necessarily also be the one responsible for
5427 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5428 	// we can actually never call the create() hook without O_EXCL. Instead we
5429 	// try to look the entry up first. If it already exists, we just open the
5430 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5431 	// introduces a race condition, since someone else might have created the
5432 	// entry in the meantime. We hope the respective FS returns the correct
5433 	// error code and retry (up to 3 times) again.
5434 
5435 	for (int i = 0; i < 3 && status != B_OK; i++) {
5436 		// look the node up
5437 		status = lookup_dir_entry(directory, name, &vnode);
5438 		if (status == B_OK) {
5439 			VNodePutter putter(vnode);
5440 
5441 			if ((openMode & O_EXCL) != 0)
5442 				return B_FILE_EXISTS;
5443 
5444 			// If the node is a symlink, we have to follow it, unless
5445 			// O_NOTRAVERSE is set.
5446 			if (S_ISLNK(vnode->Type()) && traverse) {
5447 				putter.Put();
5448 				char clonedName[B_FILE_NAME_LENGTH + 1];
5449 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5450 						>= B_FILE_NAME_LENGTH) {
5451 					return B_NAME_TOO_LONG;
5452 				}
5453 
5454 				inc_vnode_ref_count(directory);
5455 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5456 					kernel, &vnode, NULL);
5457 				if (status != B_OK)
5458 					return status;
5459 
5460 				putter.SetTo(vnode);
5461 			}
5462 
5463 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5464 				return B_LINK_LIMIT;
5465 
5466 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5467 			// on success keep the vnode reference for the FD
5468 			if (fd >= 0)
5469 				putter.Detach();
5470 
5471 			return fd;
5472 		}
5473 
5474 		// it doesn't exist yet -- try to create it
5475 
5476 		if (!HAS_FS_CALL(directory, create))
5477 			return B_READ_ONLY_DEVICE;
5478 
5479 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5480 			&cookie, &newID);
5481 		if (status != B_OK
5482 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5483 			return status;
5484 		}
5485 	}
5486 
5487 	if (status != B_OK)
5488 		return status;
5489 
5490 	// the node has been created successfully
5491 
5492 	rw_lock_read_lock(&sVnodeLock);
5493 	vnode = lookup_vnode(directory->device, newID);
5494 	rw_lock_read_unlock(&sVnodeLock);
5495 
5496 	if (vnode == NULL) {
5497 		panic("vfs: fs_create() returned success but there is no vnode, "
5498 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5499 		return B_BAD_VALUE;
5500 	}
5501 
5502 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5503 	if (fd >= 0)
5504 		return fd;
5505 
5506 	status = fd;
5507 
5508 	// something went wrong, clean up
5509 
5510 	FS_CALL(vnode, close, cookie);
5511 	FS_CALL(vnode, free_cookie, cookie);
5512 	put_vnode(vnode);
5513 
5514 	FS_CALL(directory, unlink, name);
5515 
5516 	return status;
5517 }
5518 
5519 
5520 /*! Calls fs open_dir() on the given vnode and returns a new
5521 	file descriptor for it
5522 */
5523 static int
5524 open_dir_vnode(struct vnode* vnode, bool kernel)
5525 {
5526 	void* cookie;
5527 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5528 	if (status != B_OK)
5529 		return status;
5530 
5531 	// directory is opened, create a fd
5532 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5533 	if (status >= 0)
5534 		return status;
5535 
5536 	FS_CALL(vnode, close_dir, cookie);
5537 	FS_CALL(vnode, free_dir_cookie, cookie);
5538 
5539 	return status;
5540 }
5541 
5542 
5543 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5544 	file descriptor for it.
5545 	Used by attr_dir_open(), and attr_dir_open_fd().
5546 */
5547 static int
5548 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5549 {
5550 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5551 		return B_UNSUPPORTED;
5552 
5553 	void* cookie;
5554 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5555 	if (status != B_OK)
5556 		return status;
5557 
5558 	// directory is opened, create a fd
5559 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5560 		kernel);
5561 	if (status >= 0)
5562 		return status;
5563 
5564 	FS_CALL(vnode, close_attr_dir, cookie);
5565 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5566 
5567 	return status;
5568 }
5569 
5570 
5571 static int
5572 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5573 	int openMode, int perms, bool kernel)
5574 {
5575 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5576 		"kernel %d\n", name, openMode, perms, kernel));
5577 
5578 	// get directory to put the new file in
5579 	struct vnode* directory;
5580 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5581 	if (status != B_OK)
5582 		return status;
5583 
5584 	status = create_vnode(directory, name, openMode, perms, kernel);
5585 	put_vnode(directory);
5586 
5587 	return status;
5588 }
5589 
5590 
5591 static int
5592 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5593 {
5594 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5595 		openMode, perms, kernel));
5596 
5597 	// get directory to put the new file in
5598 	char name[B_FILE_NAME_LENGTH];
5599 	struct vnode* directory;
5600 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5601 		kernel);
5602 	if (status < 0)
5603 		return status;
5604 
5605 	status = create_vnode(directory, name, openMode, perms, kernel);
5606 
5607 	put_vnode(directory);
5608 	return status;
5609 }
5610 
5611 
5612 static int
5613 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5614 	int openMode, bool kernel)
5615 {
5616 	if (name == NULL || *name == '\0')
5617 		return B_BAD_VALUE;
5618 
5619 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5620 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5621 
5622 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5623 
5624 	// get the vnode matching the entry_ref
5625 	struct vnode* vnode;
5626 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5627 		kernel, &vnode);
5628 	if (status != B_OK)
5629 		return status;
5630 
5631 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5632 		put_vnode(vnode);
5633 		return B_LINK_LIMIT;
5634 	}
5635 
5636 	int newFD = open_vnode(vnode, openMode, kernel);
5637 	if (newFD >= 0) {
5638 		// The vnode reference has been transferred to the FD
5639 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5640 			directoryID, vnode->id, name);
5641 	} else
5642 		put_vnode(vnode);
5643 
5644 	return newFD;
5645 }
5646 
5647 
5648 static int
5649 file_open(int fd, char* path, int openMode, bool kernel)
5650 {
5651 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5652 
5653 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5654 		fd, path, openMode, kernel));
5655 
5656 	// get the vnode matching the vnode + path combination
5657 	struct vnode* vnode;
5658 	ino_t parentID;
5659 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5660 		&parentID, kernel);
5661 	if (status != B_OK)
5662 		return status;
5663 
5664 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5665 		put_vnode(vnode);
5666 		return B_LINK_LIMIT;
5667 	}
5668 
5669 	// open the vnode
5670 	int newFD = open_vnode(vnode, openMode, kernel);
5671 	if (newFD >= 0) {
5672 		// The vnode reference has been transferred to the FD
5673 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5674 			vnode->device, parentID, vnode->id, NULL);
5675 	} else
5676 		put_vnode(vnode);
5677 
5678 	return newFD;
5679 }
5680 
5681 
5682 static status_t
5683 file_close(struct file_descriptor* descriptor)
5684 {
5685 	struct vnode* vnode = descriptor->u.vnode;
5686 	status_t status = B_OK;
5687 
5688 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5689 
5690 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5691 		vnode->id);
5692 	if (HAS_FS_CALL(vnode, close)) {
5693 		status = FS_CALL(vnode, close, descriptor->cookie);
5694 	}
5695 
5696 	if (status == B_OK) {
5697 		// remove all outstanding locks for this team
5698 		if (HAS_FS_CALL(vnode, release_lock))
5699 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5700 		else
5701 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5702 	}
5703 	return status;
5704 }
5705 
5706 
5707 static void
5708 file_free_fd(struct file_descriptor* descriptor)
5709 {
5710 	struct vnode* vnode = descriptor->u.vnode;
5711 
5712 	if (vnode != NULL) {
5713 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5714 		put_vnode(vnode);
5715 	}
5716 }
5717 
5718 
5719 static status_t
5720 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5721 	size_t* length)
5722 {
5723 	struct vnode* vnode = descriptor->u.vnode;
5724 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5725 		pos, length, *length));
5726 
5727 	if (S_ISDIR(vnode->Type()))
5728 		return B_IS_A_DIRECTORY;
5729 
5730 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5731 }
5732 
5733 
5734 static status_t
5735 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5736 	size_t* length)
5737 {
5738 	struct vnode* vnode = descriptor->u.vnode;
5739 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5740 		length));
5741 
5742 	if (S_ISDIR(vnode->Type()))
5743 		return B_IS_A_DIRECTORY;
5744 	if (!HAS_FS_CALL(vnode, write))
5745 		return B_READ_ONLY_DEVICE;
5746 
5747 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5748 }
5749 
5750 
5751 static off_t
5752 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5753 {
5754 	struct vnode* vnode = descriptor->u.vnode;
5755 	off_t offset;
5756 	bool isDevice = false;
5757 
5758 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5759 		seekType));
5760 
5761 	// some kinds of files are not seekable
5762 	switch (vnode->Type() & S_IFMT) {
5763 		case S_IFIFO:
5764 		case S_IFSOCK:
5765 			return ESPIPE;
5766 
5767 		// drivers publish block devices as chr, so pick both
5768 		case S_IFBLK:
5769 		case S_IFCHR:
5770 			isDevice = true;
5771 			break;
5772 		// The Open Group Base Specs don't mention any file types besides pipes,
5773 		// fifos, and sockets specially, so we allow seeking them.
5774 		case S_IFREG:
5775 		case S_IFDIR:
5776 		case S_IFLNK:
5777 			break;
5778 	}
5779 
5780 	switch (seekType) {
5781 		case SEEK_SET:
5782 			offset = 0;
5783 			break;
5784 		case SEEK_CUR:
5785 			offset = descriptor->pos;
5786 			break;
5787 		case SEEK_END:
5788 		{
5789 			// stat() the node
5790 			if (!HAS_FS_CALL(vnode, read_stat))
5791 				return B_UNSUPPORTED;
5792 
5793 			struct stat stat;
5794 			status_t status = FS_CALL(vnode, read_stat, &stat);
5795 			if (status != B_OK)
5796 				return status;
5797 
5798 			offset = stat.st_size;
5799 
5800 			if (offset == 0 && isDevice) {
5801 				// stat() on regular drivers doesn't report size
5802 				device_geometry geometry;
5803 
5804 				if (HAS_FS_CALL(vnode, ioctl)) {
5805 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5806 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5807 					if (status == B_OK)
5808 						offset = (off_t)geometry.bytes_per_sector
5809 							* geometry.sectors_per_track
5810 							* geometry.cylinder_count
5811 							* geometry.head_count;
5812 				}
5813 			}
5814 
5815 			break;
5816 		}
5817 		default:
5818 			return B_BAD_VALUE;
5819 	}
5820 
5821 	// assumes off_t is 64 bits wide
5822 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5823 		return B_BUFFER_OVERFLOW;
5824 
5825 	pos += offset;
5826 	if (pos < 0)
5827 		return B_BAD_VALUE;
5828 
5829 	return descriptor->pos = pos;
5830 }
5831 
5832 
5833 static status_t
5834 file_select(struct file_descriptor* descriptor, uint8 event,
5835 	struct selectsync* sync)
5836 {
5837 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5838 
5839 	struct vnode* vnode = descriptor->u.vnode;
5840 
5841 	// If the FS has no select() hook, notify select() now.
5842 	if (!HAS_FS_CALL(vnode, select)) {
5843 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5844 			return notify_select_event(sync, event);
5845 		else
5846 			return B_OK;
5847 	}
5848 
5849 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5850 }
5851 
5852 
5853 static status_t
5854 file_deselect(struct file_descriptor* descriptor, uint8 event,
5855 	struct selectsync* sync)
5856 {
5857 	struct vnode* vnode = descriptor->u.vnode;
5858 
5859 	if (!HAS_FS_CALL(vnode, deselect))
5860 		return B_OK;
5861 
5862 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5863 }
5864 
5865 
5866 static status_t
5867 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5868 	bool kernel)
5869 {
5870 	struct vnode* vnode;
5871 	status_t status;
5872 
5873 	if (name == NULL || *name == '\0')
5874 		return B_BAD_VALUE;
5875 
5876 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5877 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5878 
5879 	status = get_vnode(mountID, parentID, &vnode, true, false);
5880 	if (status != B_OK)
5881 		return status;
5882 
5883 	if (HAS_FS_CALL(vnode, create_dir))
5884 		status = FS_CALL(vnode, create_dir, name, perms);
5885 	else
5886 		status = B_READ_ONLY_DEVICE;
5887 
5888 	put_vnode(vnode);
5889 	return status;
5890 }
5891 
5892 
5893 static status_t
5894 dir_create(int fd, char* path, int perms, bool kernel)
5895 {
5896 	char filename[B_FILE_NAME_LENGTH];
5897 	struct vnode* vnode;
5898 	status_t status;
5899 
5900 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5901 		kernel));
5902 
5903 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5904 	if (status < 0)
5905 		return status;
5906 
5907 	if (HAS_FS_CALL(vnode, create_dir)) {
5908 		status = FS_CALL(vnode, create_dir, filename, perms);
5909 	} else
5910 		status = B_READ_ONLY_DEVICE;
5911 
5912 	put_vnode(vnode);
5913 	return status;
5914 }
5915 
5916 
5917 static int
5918 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5919 {
5920 	FUNCTION(("dir_open_entry_ref()\n"));
5921 
5922 	if (name && name[0] == '\0')
5923 		return B_BAD_VALUE;
5924 
5925 	// get the vnode matching the entry_ref/node_ref
5926 	struct vnode* vnode;
5927 	status_t status;
5928 	if (name) {
5929 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5930 			&vnode);
5931 	} else
5932 		status = get_vnode(mountID, parentID, &vnode, true, false);
5933 	if (status != B_OK)
5934 		return status;
5935 
5936 	int newFD = open_dir_vnode(vnode, kernel);
5937 	if (newFD >= 0) {
5938 		// The vnode reference has been transferred to the FD
5939 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5940 			vnode->id, name);
5941 	} else
5942 		put_vnode(vnode);
5943 
5944 	return newFD;
5945 }
5946 
5947 
5948 static int
5949 dir_open(int fd, char* path, bool kernel)
5950 {
5951 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5952 		kernel));
5953 
5954 	// get the vnode matching the vnode + path combination
5955 	struct vnode* vnode = NULL;
5956 	ino_t parentID;
5957 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5958 		kernel);
5959 	if (status != B_OK)
5960 		return status;
5961 
5962 	// open the dir
5963 	int newFD = open_dir_vnode(vnode, kernel);
5964 	if (newFD >= 0) {
5965 		// The vnode reference has been transferred to the FD
5966 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5967 			parentID, vnode->id, NULL);
5968 	} else
5969 		put_vnode(vnode);
5970 
5971 	return newFD;
5972 }
5973 
5974 
5975 static status_t
5976 dir_close(struct file_descriptor* descriptor)
5977 {
5978 	struct vnode* vnode = descriptor->u.vnode;
5979 
5980 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5981 
5982 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5983 		vnode->id);
5984 	if (HAS_FS_CALL(vnode, close_dir))
5985 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5986 
5987 	return B_OK;
5988 }
5989 
5990 
5991 static void
5992 dir_free_fd(struct file_descriptor* descriptor)
5993 {
5994 	struct vnode* vnode = descriptor->u.vnode;
5995 
5996 	if (vnode != NULL) {
5997 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5998 		put_vnode(vnode);
5999 	}
6000 }
6001 
6002 
6003 static status_t
6004 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6005 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6006 {
6007 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6008 		bufferSize, _count);
6009 }
6010 
6011 
6012 static status_t
6013 fix_dirent(struct vnode* parent, struct dirent* entry,
6014 	struct io_context* ioContext)
6015 {
6016 	// set d_pdev and d_pino
6017 	entry->d_pdev = parent->device;
6018 	entry->d_pino = parent->id;
6019 
6020 	// If this is the ".." entry and the directory covering another vnode,
6021 	// we need to replace d_dev and d_ino with the actual values.
6022 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6023 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6024 			ioContext);
6025 	}
6026 
6027 	// resolve covered vnodes
6028 	ReadLocker _(&sVnodeLock);
6029 
6030 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6031 	if (vnode != NULL && vnode->covered_by != NULL) {
6032 		do {
6033 			vnode = vnode->covered_by;
6034 		} while (vnode->covered_by != NULL);
6035 
6036 		entry->d_dev = vnode->device;
6037 		entry->d_ino = vnode->id;
6038 	}
6039 
6040 	return B_OK;
6041 }
6042 
6043 
6044 static status_t
6045 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6046 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6047 {
6048 	if (!HAS_FS_CALL(vnode, read_dir))
6049 		return B_UNSUPPORTED;
6050 
6051 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6052 		_count);
6053 	if (error != B_OK)
6054 		return error;
6055 
6056 	// we need to adjust the read dirents
6057 	uint32 count = *_count;
6058 	for (uint32 i = 0; i < count; i++) {
6059 		error = fix_dirent(vnode, buffer, ioContext);
6060 		if (error != B_OK)
6061 			return error;
6062 
6063 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6064 	}
6065 
6066 	return error;
6067 }
6068 
6069 
6070 static status_t
6071 dir_rewind(struct file_descriptor* descriptor)
6072 {
6073 	struct vnode* vnode = descriptor->u.vnode;
6074 
6075 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6076 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6077 	}
6078 
6079 	return B_UNSUPPORTED;
6080 }
6081 
6082 
6083 static status_t
6084 dir_remove(int fd, char* path, bool kernel)
6085 {
6086 	char name[B_FILE_NAME_LENGTH];
6087 	struct vnode* directory;
6088 	status_t status;
6089 
6090 	if (path != NULL) {
6091 		// we need to make sure our path name doesn't stop with "/", ".",
6092 		// or ".."
6093 		char* lastSlash;
6094 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6095 			char* leaf = lastSlash + 1;
6096 			if (!strcmp(leaf, ".."))
6097 				return B_NOT_ALLOWED;
6098 
6099 			// omit multiple slashes
6100 			while (lastSlash > path && lastSlash[-1] == '/')
6101 				lastSlash--;
6102 
6103 			if (leaf[0]
6104 				&& strcmp(leaf, ".")) {
6105 				break;
6106 			}
6107 			// "name/" -> "name", or "name/." -> "name"
6108 			lastSlash[0] = '\0';
6109 		}
6110 
6111 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6112 			return B_NOT_ALLOWED;
6113 	}
6114 
6115 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6116 	if (status != B_OK)
6117 		return status;
6118 
6119 	if (HAS_FS_CALL(directory, remove_dir))
6120 		status = FS_CALL(directory, remove_dir, name);
6121 	else
6122 		status = B_READ_ONLY_DEVICE;
6123 
6124 	put_vnode(directory);
6125 	return status;
6126 }
6127 
6128 
6129 static status_t
6130 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6131 	size_t length)
6132 {
6133 	struct vnode* vnode = descriptor->u.vnode;
6134 
6135 	if (HAS_FS_CALL(vnode, ioctl))
6136 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6137 
6138 	return B_DEV_INVALID_IOCTL;
6139 }
6140 
6141 
6142 static status_t
6143 common_fcntl(int fd, int op, size_t argument, bool kernel)
6144 {
6145 	struct flock flock;
6146 
6147 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6148 		fd, op, argument, kernel ? "kernel" : "user"));
6149 
6150 	struct io_context* context = get_current_io_context(kernel);
6151 
6152 	struct file_descriptor* descriptor = get_fd(context, fd);
6153 	if (descriptor == NULL)
6154 		return B_FILE_ERROR;
6155 
6156 	struct vnode* vnode = fd_vnode(descriptor);
6157 
6158 	status_t status = B_OK;
6159 
6160 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6161 		if (descriptor->type != FDTYPE_FILE)
6162 			status = B_BAD_VALUE;
6163 		else if (kernel)
6164 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6165 		else if (user_memcpy(&flock, (struct flock*)argument,
6166 				sizeof(struct flock)) != B_OK)
6167 			status = B_BAD_ADDRESS;
6168 		if (status != B_OK) {
6169 			put_fd(descriptor);
6170 			return status;
6171 		}
6172 	}
6173 
6174 	switch (op) {
6175 		case F_SETFD:
6176 		{
6177 			// Set file descriptor flags
6178 
6179 			// O_CLOEXEC is the only flag available at this time
6180 			mutex_lock(&context->io_mutex);
6181 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6182 			mutex_unlock(&context->io_mutex);
6183 
6184 			status = B_OK;
6185 			break;
6186 		}
6187 
6188 		case F_GETFD:
6189 		{
6190 			// Get file descriptor flags
6191 			mutex_lock(&context->io_mutex);
6192 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6193 			mutex_unlock(&context->io_mutex);
6194 			break;
6195 		}
6196 
6197 		case F_SETFL:
6198 			// Set file descriptor open mode
6199 
6200 			// we only accept changes to O_APPEND and O_NONBLOCK
6201 			argument &= O_APPEND | O_NONBLOCK;
6202 			if (descriptor->ops->fd_set_flags != NULL) {
6203 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6204 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6205 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6206 					(int)argument);
6207 			} else
6208 				status = B_UNSUPPORTED;
6209 
6210 			if (status == B_OK) {
6211 				// update this descriptor's open_mode field
6212 				descriptor->open_mode = (descriptor->open_mode
6213 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6214 			}
6215 
6216 			break;
6217 
6218 		case F_GETFL:
6219 			// Get file descriptor open mode
6220 			status = descriptor->open_mode;
6221 			break;
6222 
6223 		case F_DUPFD:
6224 		case F_DUPFD_CLOEXEC:
6225 		{
6226 			status = new_fd_etc(context, descriptor, (int)argument);
6227 			if (status >= 0) {
6228 				mutex_lock(&context->io_mutex);
6229 				fd_set_close_on_exec(context, fd, op == F_DUPFD_CLOEXEC);
6230 				mutex_unlock(&context->io_mutex);
6231 
6232 				atomic_add(&descriptor->ref_count, 1);
6233 			}
6234 			break;
6235 		}
6236 
6237 		case F_GETLK:
6238 			if (vnode != NULL) {
6239 				struct flock normalizedLock;
6240 
6241 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6242 				status = normalize_flock(descriptor, &normalizedLock);
6243 				if (status != B_OK)
6244 					break;
6245 
6246 				if (HAS_FS_CALL(vnode, test_lock)) {
6247 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6248 						&normalizedLock);
6249 				} else
6250 					status = test_advisory_lock(vnode, &normalizedLock);
6251 				if (status == B_OK) {
6252 					if (normalizedLock.l_type == F_UNLCK) {
6253 						// no conflicting lock found, copy back the same struct
6254 						// we were given except change type to F_UNLCK
6255 						flock.l_type = F_UNLCK;
6256 						if (kernel) {
6257 							memcpy((struct flock*)argument, &flock,
6258 								sizeof(struct flock));
6259 						} else {
6260 							status = user_memcpy((struct flock*)argument,
6261 								&flock, sizeof(struct flock));
6262 						}
6263 					} else {
6264 						// a conflicting lock was found, copy back its range and
6265 						// type
6266 						if (normalizedLock.l_len == OFF_MAX)
6267 							normalizedLock.l_len = 0;
6268 
6269 						if (kernel) {
6270 							memcpy((struct flock*)argument,
6271 								&normalizedLock, sizeof(struct flock));
6272 						} else {
6273 							status = user_memcpy((struct flock*)argument,
6274 								&normalizedLock, sizeof(struct flock));
6275 						}
6276 					}
6277 				}
6278 			} else
6279 				status = B_BAD_VALUE;
6280 			break;
6281 
6282 		case F_SETLK:
6283 		case F_SETLKW:
6284 			status = normalize_flock(descriptor, &flock);
6285 			if (status != B_OK)
6286 				break;
6287 
6288 			if (vnode == NULL) {
6289 				status = B_BAD_VALUE;
6290 			} else if (flock.l_type == F_UNLCK) {
6291 				if (HAS_FS_CALL(vnode, release_lock)) {
6292 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6293 						&flock);
6294 				} else {
6295 					status = release_advisory_lock(vnode, context, NULL,
6296 						&flock);
6297 				}
6298 			} else {
6299 				// the open mode must match the lock type
6300 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6301 						&& flock.l_type == F_WRLCK)
6302 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6303 						&& flock.l_type == F_RDLCK))
6304 					status = B_FILE_ERROR;
6305 				else {
6306 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6307 						status = FS_CALL(vnode, acquire_lock,
6308 							descriptor->cookie, &flock, op == F_SETLKW);
6309 					} else {
6310 						status = acquire_advisory_lock(vnode, context, NULL,
6311 							&flock, op == F_SETLKW);
6312 					}
6313 				}
6314 			}
6315 			break;
6316 
6317 		// ToDo: add support for more ops?
6318 
6319 		default:
6320 			status = B_BAD_VALUE;
6321 	}
6322 
6323 	put_fd(descriptor);
6324 	return status;
6325 }
6326 
6327 
6328 static status_t
6329 common_sync(int fd, bool kernel)
6330 {
6331 	struct file_descriptor* descriptor;
6332 	struct vnode* vnode;
6333 	status_t status;
6334 
6335 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6336 
6337 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6338 	if (descriptor == NULL)
6339 		return B_FILE_ERROR;
6340 
6341 	if (HAS_FS_CALL(vnode, fsync))
6342 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6343 	else
6344 		status = B_UNSUPPORTED;
6345 
6346 	put_fd(descriptor);
6347 	return status;
6348 }
6349 
6350 
6351 static status_t
6352 common_lock_node(int fd, bool kernel)
6353 {
6354 	struct file_descriptor* descriptor;
6355 	struct vnode* vnode;
6356 
6357 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6358 	if (descriptor == NULL)
6359 		return B_FILE_ERROR;
6360 
6361 	status_t status = B_OK;
6362 
6363 	// We need to set the locking atomically - someone
6364 	// else might set one at the same time
6365 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6366 			(file_descriptor*)NULL) != NULL)
6367 		status = B_BUSY;
6368 
6369 	put_fd(descriptor);
6370 	return status;
6371 }
6372 
6373 
6374 static status_t
6375 common_unlock_node(int fd, bool kernel)
6376 {
6377 	struct file_descriptor* descriptor;
6378 	struct vnode* vnode;
6379 
6380 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6381 	if (descriptor == NULL)
6382 		return B_FILE_ERROR;
6383 
6384 	status_t status = B_OK;
6385 
6386 	// We need to set the locking atomically - someone
6387 	// else might set one at the same time
6388 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6389 			(file_descriptor*)NULL, descriptor) != descriptor)
6390 		status = B_BAD_VALUE;
6391 
6392 	put_fd(descriptor);
6393 	return status;
6394 }
6395 
6396 
6397 static status_t
6398 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6399 	bool kernel)
6400 {
6401 	struct vnode* vnode;
6402 	status_t status;
6403 
6404 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6405 	if (status != B_OK)
6406 		return status;
6407 
6408 	if (HAS_FS_CALL(vnode, read_symlink)) {
6409 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6410 	} else
6411 		status = B_BAD_VALUE;
6412 
6413 	put_vnode(vnode);
6414 	return status;
6415 }
6416 
6417 
6418 static status_t
6419 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6420 	bool kernel)
6421 {
6422 	// path validity checks have to be in the calling function!
6423 	char name[B_FILE_NAME_LENGTH];
6424 	struct vnode* vnode;
6425 	status_t status;
6426 
6427 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6428 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6429 
6430 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6431 	if (status != B_OK)
6432 		return status;
6433 
6434 	if (HAS_FS_CALL(vnode, create_symlink))
6435 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6436 	else {
6437 		status = HAS_FS_CALL(vnode, write)
6438 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6439 	}
6440 
6441 	put_vnode(vnode);
6442 
6443 	return status;
6444 }
6445 
6446 
6447 static status_t
6448 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6449 	bool traverseLeafLink, bool kernel)
6450 {
6451 	// path validity checks have to be in the calling function!
6452 
6453 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6454 		toPath, kernel));
6455 
6456 	char name[B_FILE_NAME_LENGTH];
6457 	struct vnode* directory;
6458 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6459 		kernel);
6460 	if (status != B_OK)
6461 		return status;
6462 
6463 	struct vnode* vnode;
6464 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6465 		kernel);
6466 	if (status != B_OK)
6467 		goto err;
6468 
6469 	if (directory->mount != vnode->mount) {
6470 		status = B_CROSS_DEVICE_LINK;
6471 		goto err1;
6472 	}
6473 
6474 	if (HAS_FS_CALL(directory, link))
6475 		status = FS_CALL(directory, link, name, vnode);
6476 	else
6477 		status = B_READ_ONLY_DEVICE;
6478 
6479 err1:
6480 	put_vnode(vnode);
6481 err:
6482 	put_vnode(directory);
6483 
6484 	return status;
6485 }
6486 
6487 
6488 static status_t
6489 common_unlink(int fd, char* path, bool kernel)
6490 {
6491 	char filename[B_FILE_NAME_LENGTH];
6492 	struct vnode* vnode;
6493 	status_t status;
6494 
6495 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6496 		kernel));
6497 
6498 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6499 	if (status < 0)
6500 		return status;
6501 
6502 	if (HAS_FS_CALL(vnode, unlink))
6503 		status = FS_CALL(vnode, unlink, filename);
6504 	else
6505 		status = B_READ_ONLY_DEVICE;
6506 
6507 	put_vnode(vnode);
6508 
6509 	return status;
6510 }
6511 
6512 
6513 static status_t
6514 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6515 {
6516 	struct vnode* vnode;
6517 	status_t status;
6518 
6519 	// TODO: honor effectiveUserGroup argument
6520 
6521 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6522 	if (status != B_OK)
6523 		return status;
6524 
6525 	if (HAS_FS_CALL(vnode, access))
6526 		status = FS_CALL(vnode, access, mode);
6527 	else
6528 		status = B_OK;
6529 
6530 	put_vnode(vnode);
6531 
6532 	return status;
6533 }
6534 
6535 
6536 static status_t
6537 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6538 {
6539 	struct vnode* fromVnode;
6540 	struct vnode* toVnode;
6541 	char fromName[B_FILE_NAME_LENGTH];
6542 	char toName[B_FILE_NAME_LENGTH];
6543 	status_t status;
6544 
6545 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6546 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6547 
6548 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6549 	if (status != B_OK)
6550 		return status;
6551 
6552 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6553 	if (status != B_OK)
6554 		goto err1;
6555 
6556 	if (fromVnode->device != toVnode->device) {
6557 		status = B_CROSS_DEVICE_LINK;
6558 		goto err2;
6559 	}
6560 
6561 	if (fromName[0] == '\0' || toName[0] == '\0'
6562 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6563 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6564 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6565 		status = B_BAD_VALUE;
6566 		goto err2;
6567 	}
6568 
6569 	if (HAS_FS_CALL(fromVnode, rename))
6570 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6571 	else
6572 		status = B_READ_ONLY_DEVICE;
6573 
6574 err2:
6575 	put_vnode(toVnode);
6576 err1:
6577 	put_vnode(fromVnode);
6578 
6579 	return status;
6580 }
6581 
6582 
6583 static status_t
6584 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6585 {
6586 	struct vnode* vnode = descriptor->u.vnode;
6587 
6588 	FUNCTION(("common_read_stat: stat %p\n", stat));
6589 
6590 	// TODO: remove this once all file systems properly set them!
6591 	stat->st_crtim.tv_nsec = 0;
6592 	stat->st_ctim.tv_nsec = 0;
6593 	stat->st_mtim.tv_nsec = 0;
6594 	stat->st_atim.tv_nsec = 0;
6595 
6596 	return vfs_stat_vnode(vnode, stat);
6597 }
6598 
6599 
6600 static status_t
6601 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6602 	int statMask)
6603 {
6604 	struct vnode* vnode = descriptor->u.vnode;
6605 
6606 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6607 		vnode, stat, statMask));
6608 
6609 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
6610 		return B_BAD_VALUE;
6611 
6612 	if (!HAS_FS_CALL(vnode, write_stat))
6613 		return B_READ_ONLY_DEVICE;
6614 
6615 	return FS_CALL(vnode, write_stat, stat, statMask);
6616 }
6617 
6618 
6619 static status_t
6620 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6621 	struct stat* stat, bool kernel)
6622 {
6623 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6624 		stat));
6625 
6626 	struct vnode* vnode;
6627 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6628 		NULL, kernel);
6629 	if (status != B_OK)
6630 		return status;
6631 
6632 	status = vfs_stat_vnode(vnode, stat);
6633 
6634 	put_vnode(vnode);
6635 	return status;
6636 }
6637 
6638 
6639 static status_t
6640 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6641 	const struct stat* stat, int statMask, bool kernel)
6642 {
6643 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6644 		"kernel %d\n", fd, path, stat, statMask, kernel));
6645 
6646 	struct vnode* vnode;
6647 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6648 		NULL, kernel);
6649 	if (status != B_OK)
6650 		return status;
6651 
6652 	if (HAS_FS_CALL(vnode, write_stat))
6653 		status = FS_CALL(vnode, write_stat, stat, statMask);
6654 	else
6655 		status = B_READ_ONLY_DEVICE;
6656 
6657 	put_vnode(vnode);
6658 
6659 	return status;
6660 }
6661 
6662 
6663 static int
6664 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6665 {
6666 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6667 		kernel));
6668 
6669 	struct vnode* vnode;
6670 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6671 		NULL, kernel);
6672 	if (status != B_OK)
6673 		return status;
6674 
6675 	status = open_attr_dir_vnode(vnode, kernel);
6676 	if (status < 0)
6677 		put_vnode(vnode);
6678 
6679 	return status;
6680 }
6681 
6682 
6683 static status_t
6684 attr_dir_close(struct file_descriptor* descriptor)
6685 {
6686 	struct vnode* vnode = descriptor->u.vnode;
6687 
6688 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6689 
6690 	if (HAS_FS_CALL(vnode, close_attr_dir))
6691 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6692 
6693 	return B_OK;
6694 }
6695 
6696 
6697 static void
6698 attr_dir_free_fd(struct file_descriptor* descriptor)
6699 {
6700 	struct vnode* vnode = descriptor->u.vnode;
6701 
6702 	if (vnode != NULL) {
6703 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6704 		put_vnode(vnode);
6705 	}
6706 }
6707 
6708 
6709 static status_t
6710 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6711 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6712 {
6713 	struct vnode* vnode = descriptor->u.vnode;
6714 
6715 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6716 
6717 	if (HAS_FS_CALL(vnode, read_attr_dir))
6718 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6719 			bufferSize, _count);
6720 
6721 	return B_UNSUPPORTED;
6722 }
6723 
6724 
6725 static status_t
6726 attr_dir_rewind(struct file_descriptor* descriptor)
6727 {
6728 	struct vnode* vnode = descriptor->u.vnode;
6729 
6730 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6731 
6732 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6733 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6734 
6735 	return B_UNSUPPORTED;
6736 }
6737 
6738 
6739 static int
6740 attr_create(int fd, char* path, const char* name, uint32 type,
6741 	int openMode, bool kernel)
6742 {
6743 	if (name == NULL || *name == '\0')
6744 		return B_BAD_VALUE;
6745 
6746 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6747 	struct vnode* vnode;
6748 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6749 		kernel);
6750 	if (status != B_OK)
6751 		return status;
6752 
6753 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6754 		status = B_LINK_LIMIT;
6755 		goto err;
6756 	}
6757 
6758 	if (!HAS_FS_CALL(vnode, create_attr)) {
6759 		status = B_READ_ONLY_DEVICE;
6760 		goto err;
6761 	}
6762 
6763 	void* cookie;
6764 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6765 	if (status != B_OK)
6766 		goto err;
6767 
6768 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6769 	if (fd >= 0)
6770 		return fd;
6771 
6772 	status = fd;
6773 
6774 	FS_CALL(vnode, close_attr, cookie);
6775 	FS_CALL(vnode, free_attr_cookie, cookie);
6776 
6777 	FS_CALL(vnode, remove_attr, name);
6778 
6779 err:
6780 	put_vnode(vnode);
6781 
6782 	return status;
6783 }
6784 
6785 
6786 static int
6787 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6788 {
6789 	if (name == NULL || *name == '\0')
6790 		return B_BAD_VALUE;
6791 
6792 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6793 	struct vnode* vnode;
6794 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6795 		kernel);
6796 	if (status != B_OK)
6797 		return status;
6798 
6799 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6800 		status = B_LINK_LIMIT;
6801 		goto err;
6802 	}
6803 
6804 	if (!HAS_FS_CALL(vnode, open_attr)) {
6805 		status = B_UNSUPPORTED;
6806 		goto err;
6807 	}
6808 
6809 	void* cookie;
6810 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6811 	if (status != B_OK)
6812 		goto err;
6813 
6814 	// now we only need a file descriptor for this attribute and we're done
6815 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6816 	if (fd >= 0)
6817 		return fd;
6818 
6819 	status = fd;
6820 
6821 	FS_CALL(vnode, close_attr, cookie);
6822 	FS_CALL(vnode, free_attr_cookie, cookie);
6823 
6824 err:
6825 	put_vnode(vnode);
6826 
6827 	return status;
6828 }
6829 
6830 
6831 static status_t
6832 attr_close(struct file_descriptor* descriptor)
6833 {
6834 	struct vnode* vnode = descriptor->u.vnode;
6835 
6836 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6837 
6838 	if (HAS_FS_CALL(vnode, close_attr))
6839 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6840 
6841 	return B_OK;
6842 }
6843 
6844 
6845 static void
6846 attr_free_fd(struct file_descriptor* descriptor)
6847 {
6848 	struct vnode* vnode = descriptor->u.vnode;
6849 
6850 	if (vnode != NULL) {
6851 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6852 		put_vnode(vnode);
6853 	}
6854 }
6855 
6856 
6857 static status_t
6858 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6859 	size_t* length)
6860 {
6861 	struct vnode* vnode = descriptor->u.vnode;
6862 
6863 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6864 		pos, length, *length));
6865 
6866 	if (!HAS_FS_CALL(vnode, read_attr))
6867 		return B_UNSUPPORTED;
6868 
6869 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6870 }
6871 
6872 
6873 static status_t
6874 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6875 	size_t* length)
6876 {
6877 	struct vnode* vnode = descriptor->u.vnode;
6878 
6879 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6880 		length));
6881 
6882 	if (!HAS_FS_CALL(vnode, write_attr))
6883 		return B_UNSUPPORTED;
6884 
6885 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6886 }
6887 
6888 
6889 static off_t
6890 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6891 {
6892 	off_t offset;
6893 
6894 	switch (seekType) {
6895 		case SEEK_SET:
6896 			offset = 0;
6897 			break;
6898 		case SEEK_CUR:
6899 			offset = descriptor->pos;
6900 			break;
6901 		case SEEK_END:
6902 		{
6903 			struct vnode* vnode = descriptor->u.vnode;
6904 			if (!HAS_FS_CALL(vnode, read_stat))
6905 				return B_UNSUPPORTED;
6906 
6907 			struct stat stat;
6908 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6909 				&stat);
6910 			if (status != B_OK)
6911 				return status;
6912 
6913 			offset = stat.st_size;
6914 			break;
6915 		}
6916 		default:
6917 			return B_BAD_VALUE;
6918 	}
6919 
6920 	// assumes off_t is 64 bits wide
6921 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6922 		return B_BUFFER_OVERFLOW;
6923 
6924 	pos += offset;
6925 	if (pos < 0)
6926 		return B_BAD_VALUE;
6927 
6928 	return descriptor->pos = pos;
6929 }
6930 
6931 
6932 static status_t
6933 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6934 {
6935 	struct vnode* vnode = descriptor->u.vnode;
6936 
6937 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6938 
6939 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6940 		return B_UNSUPPORTED;
6941 
6942 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6943 }
6944 
6945 
6946 static status_t
6947 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6948 	int statMask)
6949 {
6950 	struct vnode* vnode = descriptor->u.vnode;
6951 
6952 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6953 
6954 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6955 		return B_READ_ONLY_DEVICE;
6956 
6957 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6958 }
6959 
6960 
6961 static status_t
6962 attr_remove(int fd, const char* name, bool kernel)
6963 {
6964 	struct file_descriptor* descriptor;
6965 	struct vnode* vnode;
6966 	status_t status;
6967 
6968 	if (name == NULL || *name == '\0')
6969 		return B_BAD_VALUE;
6970 
6971 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6972 		kernel));
6973 
6974 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6975 	if (descriptor == NULL)
6976 		return B_FILE_ERROR;
6977 
6978 	if (HAS_FS_CALL(vnode, remove_attr))
6979 		status = FS_CALL(vnode, remove_attr, name);
6980 	else
6981 		status = B_READ_ONLY_DEVICE;
6982 
6983 	put_fd(descriptor);
6984 
6985 	return status;
6986 }
6987 
6988 
6989 static status_t
6990 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6991 	bool kernel)
6992 {
6993 	struct file_descriptor* fromDescriptor;
6994 	struct file_descriptor* toDescriptor;
6995 	struct vnode* fromVnode;
6996 	struct vnode* toVnode;
6997 	status_t status;
6998 
6999 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7000 		|| *toName == '\0')
7001 		return B_BAD_VALUE;
7002 
7003 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7004 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7005 
7006 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
7007 	if (fromDescriptor == NULL)
7008 		return B_FILE_ERROR;
7009 
7010 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
7011 	if (toDescriptor == NULL) {
7012 		status = B_FILE_ERROR;
7013 		goto err;
7014 	}
7015 
7016 	// are the files on the same volume?
7017 	if (fromVnode->device != toVnode->device) {
7018 		status = B_CROSS_DEVICE_LINK;
7019 		goto err1;
7020 	}
7021 
7022 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7023 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7024 	} else
7025 		status = B_READ_ONLY_DEVICE;
7026 
7027 err1:
7028 	put_fd(toDescriptor);
7029 err:
7030 	put_fd(fromDescriptor);
7031 
7032 	return status;
7033 }
7034 
7035 
7036 static int
7037 index_dir_open(dev_t mountID, bool kernel)
7038 {
7039 	struct fs_mount* mount;
7040 	void* cookie;
7041 
7042 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7043 		kernel));
7044 
7045 	status_t status = get_mount(mountID, &mount);
7046 	if (status != B_OK)
7047 		return status;
7048 
7049 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7050 		status = B_UNSUPPORTED;
7051 		goto error;
7052 	}
7053 
7054 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7055 	if (status != B_OK)
7056 		goto error;
7057 
7058 	// get fd for the index directory
7059 	int fd;
7060 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7061 	if (fd >= 0)
7062 		return fd;
7063 
7064 	// something went wrong
7065 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7066 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7067 
7068 	status = fd;
7069 
7070 error:
7071 	put_mount(mount);
7072 	return status;
7073 }
7074 
7075 
7076 static status_t
7077 index_dir_close(struct file_descriptor* descriptor)
7078 {
7079 	struct fs_mount* mount = descriptor->u.mount;
7080 
7081 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7082 
7083 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7084 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7085 
7086 	return B_OK;
7087 }
7088 
7089 
7090 static void
7091 index_dir_free_fd(struct file_descriptor* descriptor)
7092 {
7093 	struct fs_mount* mount = descriptor->u.mount;
7094 
7095 	if (mount != NULL) {
7096 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7097 		put_mount(mount);
7098 	}
7099 }
7100 
7101 
7102 static status_t
7103 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7104 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7105 {
7106 	struct fs_mount* mount = descriptor->u.mount;
7107 
7108 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7109 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7110 			bufferSize, _count);
7111 	}
7112 
7113 	return B_UNSUPPORTED;
7114 }
7115 
7116 
7117 static status_t
7118 index_dir_rewind(struct file_descriptor* descriptor)
7119 {
7120 	struct fs_mount* mount = descriptor->u.mount;
7121 
7122 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7123 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7124 
7125 	return B_UNSUPPORTED;
7126 }
7127 
7128 
7129 static status_t
7130 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7131 	bool kernel)
7132 {
7133 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7134 		mountID, name, kernel));
7135 
7136 	struct fs_mount* mount;
7137 	status_t status = get_mount(mountID, &mount);
7138 	if (status != B_OK)
7139 		return status;
7140 
7141 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7142 		status = B_READ_ONLY_DEVICE;
7143 		goto out;
7144 	}
7145 
7146 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7147 
7148 out:
7149 	put_mount(mount);
7150 	return status;
7151 }
7152 
7153 
7154 #if 0
7155 static status_t
7156 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7157 {
7158 	struct vnode* vnode = descriptor->u.vnode;
7159 
7160 	// ToDo: currently unused!
7161 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7162 	if (!HAS_FS_CALL(vnode, read_index_stat))
7163 		return B_UNSUPPORTED;
7164 
7165 	return B_UNSUPPORTED;
7166 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7167 }
7168 
7169 
7170 static void
7171 index_free_fd(struct file_descriptor* descriptor)
7172 {
7173 	struct vnode* vnode = descriptor->u.vnode;
7174 
7175 	if (vnode != NULL) {
7176 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7177 		put_vnode(vnode);
7178 	}
7179 }
7180 #endif
7181 
7182 
7183 static status_t
7184 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7185 	bool kernel)
7186 {
7187 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7188 		mountID, name, kernel));
7189 
7190 	struct fs_mount* mount;
7191 	status_t status = get_mount(mountID, &mount);
7192 	if (status != B_OK)
7193 		return status;
7194 
7195 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7196 		status = B_UNSUPPORTED;
7197 		goto out;
7198 	}
7199 
7200 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7201 
7202 out:
7203 	put_mount(mount);
7204 	return status;
7205 }
7206 
7207 
7208 static status_t
7209 index_remove(dev_t mountID, const char* name, bool kernel)
7210 {
7211 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7212 		mountID, name, kernel));
7213 
7214 	struct fs_mount* mount;
7215 	status_t status = get_mount(mountID, &mount);
7216 	if (status != B_OK)
7217 		return status;
7218 
7219 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7220 		status = B_READ_ONLY_DEVICE;
7221 		goto out;
7222 	}
7223 
7224 	status = FS_MOUNT_CALL(mount, remove_index, name);
7225 
7226 out:
7227 	put_mount(mount);
7228 	return status;
7229 }
7230 
7231 
7232 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7233 		It would be nice if the FS would find some more kernel support
7234 		for them.
7235 		For example, query parsing should be moved into the kernel.
7236 */
7237 static int
7238 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7239 	int32 token, bool kernel)
7240 {
7241 	struct fs_mount* mount;
7242 	void* cookie;
7243 
7244 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7245 		device, query, kernel));
7246 
7247 	status_t status = get_mount(device, &mount);
7248 	if (status != B_OK)
7249 		return status;
7250 
7251 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7252 		status = B_UNSUPPORTED;
7253 		goto error;
7254 	}
7255 
7256 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7257 		&cookie);
7258 	if (status != B_OK)
7259 		goto error;
7260 
7261 	// get fd for the index directory
7262 	int fd;
7263 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7264 	if (fd >= 0)
7265 		return fd;
7266 
7267 	status = fd;
7268 
7269 	// something went wrong
7270 	FS_MOUNT_CALL(mount, close_query, cookie);
7271 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7272 
7273 error:
7274 	put_mount(mount);
7275 	return status;
7276 }
7277 
7278 
7279 static status_t
7280 query_close(struct file_descriptor* descriptor)
7281 {
7282 	struct fs_mount* mount = descriptor->u.mount;
7283 
7284 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7285 
7286 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7287 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7288 
7289 	return B_OK;
7290 }
7291 
7292 
7293 static void
7294 query_free_fd(struct file_descriptor* descriptor)
7295 {
7296 	struct fs_mount* mount = descriptor->u.mount;
7297 
7298 	if (mount != NULL) {
7299 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7300 		put_mount(mount);
7301 	}
7302 }
7303 
7304 
7305 static status_t
7306 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7307 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7308 {
7309 	struct fs_mount* mount = descriptor->u.mount;
7310 
7311 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7312 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7313 			bufferSize, _count);
7314 	}
7315 
7316 	return B_UNSUPPORTED;
7317 }
7318 
7319 
7320 static status_t
7321 query_rewind(struct file_descriptor* descriptor)
7322 {
7323 	struct fs_mount* mount = descriptor->u.mount;
7324 
7325 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7326 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7327 
7328 	return B_UNSUPPORTED;
7329 }
7330 
7331 
7332 //	#pragma mark - General File System functions
7333 
7334 
7335 static dev_t
7336 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7337 	const char* args, bool kernel)
7338 {
7339 	struct ::fs_mount* mount;
7340 	status_t status = B_OK;
7341 	fs_volume* volume = NULL;
7342 	int32 layer = 0;
7343 	Vnode* coveredNode = NULL;
7344 
7345 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7346 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7347 
7348 	// The path is always safe, we just have to make sure that fsName is
7349 	// almost valid - we can't make any assumptions about args, though.
7350 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7351 	// We'll get it from the DDM later.
7352 	if (fsName == NULL) {
7353 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7354 			return B_BAD_VALUE;
7355 	} else if (fsName[0] == '\0')
7356 		return B_BAD_VALUE;
7357 
7358 	RecursiveLocker mountOpLocker(sMountOpLock);
7359 
7360 	// Helper to delete a newly created file device on failure.
7361 	// Not exactly beautiful, but helps to keep the code below cleaner.
7362 	struct FileDeviceDeleter {
7363 		FileDeviceDeleter() : id(-1) {}
7364 		~FileDeviceDeleter()
7365 		{
7366 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7367 		}
7368 
7369 		partition_id id;
7370 	} fileDeviceDeleter;
7371 
7372 	// If the file system is not a "virtual" one, the device argument should
7373 	// point to a real file/device (if given at all).
7374 	// get the partition
7375 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7376 	KPartition* partition = NULL;
7377 	KPath normalizedDevice;
7378 	bool newlyCreatedFileDevice = false;
7379 
7380 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7381 		// normalize the device path
7382 		status = normalizedDevice.SetTo(device, true);
7383 		if (status != B_OK)
7384 			return status;
7385 
7386 		// get a corresponding partition from the DDM
7387 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7388 		if (partition == NULL) {
7389 			// Partition not found: This either means, the user supplied
7390 			// an invalid path, or the path refers to an image file. We try
7391 			// to let the DDM create a file device for the path.
7392 			partition_id deviceID = ddm->CreateFileDevice(
7393 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7394 			if (deviceID >= 0) {
7395 				partition = ddm->RegisterPartition(deviceID);
7396 				if (newlyCreatedFileDevice)
7397 					fileDeviceDeleter.id = deviceID;
7398 			}
7399 		}
7400 
7401 		if (!partition) {
7402 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7403 				normalizedDevice.Path()));
7404 			return B_ENTRY_NOT_FOUND;
7405 		}
7406 
7407 		device = normalizedDevice.Path();
7408 			// correct path to file device
7409 	}
7410 	PartitionRegistrar partitionRegistrar(partition, true);
7411 
7412 	// Write lock the partition's device. For the time being, we keep the lock
7413 	// until we're done mounting -- not nice, but ensure, that no-one is
7414 	// interfering.
7415 	// TODO: Just mark the partition busy while mounting!
7416 	KDiskDevice* diskDevice = NULL;
7417 	if (partition) {
7418 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7419 		if (!diskDevice) {
7420 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7421 			return B_ERROR;
7422 		}
7423 	}
7424 
7425 	DeviceWriteLocker writeLocker(diskDevice, true);
7426 		// this takes over the write lock acquired before
7427 
7428 	if (partition != NULL) {
7429 		// make sure, that the partition is not busy
7430 		if (partition->IsBusy()) {
7431 			TRACE(("fs_mount(): Partition is busy.\n"));
7432 			return B_BUSY;
7433 		}
7434 
7435 		// if no FS name had been supplied, we get it from the partition
7436 		if (fsName == NULL) {
7437 			KDiskSystem* diskSystem = partition->DiskSystem();
7438 			if (!diskSystem) {
7439 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7440 					"recognize it.\n"));
7441 				return B_BAD_VALUE;
7442 			}
7443 
7444 			if (!diskSystem->IsFileSystem()) {
7445 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7446 					"partitioning system.\n"));
7447 				return B_BAD_VALUE;
7448 			}
7449 
7450 			// The disk system name will not change, and the KDiskSystem
7451 			// object will not go away while the disk device is locked (and
7452 			// the partition has a reference to it), so this is safe.
7453 			fsName = diskSystem->Name();
7454 		}
7455 	}
7456 
7457 	mount = new(std::nothrow) (struct ::fs_mount);
7458 	if (mount == NULL)
7459 		return B_NO_MEMORY;
7460 
7461 	mount->device_name = strdup(device);
7462 		// "device" can be NULL
7463 
7464 	status = mount->entry_cache.Init();
7465 	if (status != B_OK)
7466 		goto err1;
7467 
7468 	// initialize structure
7469 	mount->id = sNextMountID++;
7470 	mount->partition = NULL;
7471 	mount->root_vnode = NULL;
7472 	mount->covers_vnode = NULL;
7473 	mount->unmounting = false;
7474 	mount->owns_file_device = false;
7475 	mount->volume = NULL;
7476 
7477 	// build up the volume(s)
7478 	while (true) {
7479 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7480 		if (layerFSName == NULL) {
7481 			if (layer == 0) {
7482 				status = B_NO_MEMORY;
7483 				goto err1;
7484 			}
7485 
7486 			break;
7487 		}
7488 		MemoryDeleter layerFSNameDeleter(layerFSName);
7489 
7490 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7491 		if (volume == NULL) {
7492 			status = B_NO_MEMORY;
7493 			goto err1;
7494 		}
7495 
7496 		volume->id = mount->id;
7497 		volume->partition = partition != NULL ? partition->ID() : -1;
7498 		volume->layer = layer++;
7499 		volume->private_volume = NULL;
7500 		volume->ops = NULL;
7501 		volume->sub_volume = NULL;
7502 		volume->super_volume = NULL;
7503 		volume->file_system = NULL;
7504 		volume->file_system_name = NULL;
7505 
7506 		volume->file_system_name = get_file_system_name(layerFSName);
7507 		if (volume->file_system_name == NULL) {
7508 			status = B_NO_MEMORY;
7509 			free(volume);
7510 			goto err1;
7511 		}
7512 
7513 		volume->file_system = get_file_system(layerFSName);
7514 		if (volume->file_system == NULL) {
7515 			status = B_DEVICE_NOT_FOUND;
7516 			free(volume->file_system_name);
7517 			free(volume);
7518 			goto err1;
7519 		}
7520 
7521 		if (mount->volume == NULL)
7522 			mount->volume = volume;
7523 		else {
7524 			volume->super_volume = mount->volume;
7525 			mount->volume->sub_volume = volume;
7526 			mount->volume = volume;
7527 		}
7528 	}
7529 
7530 	// insert mount struct into list before we call FS's mount() function
7531 	// so that vnodes can be created for this mount
7532 	mutex_lock(&sMountMutex);
7533 	sMountsTable->Insert(mount);
7534 	mutex_unlock(&sMountMutex);
7535 
7536 	ino_t rootID;
7537 
7538 	if (!sRoot) {
7539 		// we haven't mounted anything yet
7540 		if (strcmp(path, "/") != 0) {
7541 			status = B_ERROR;
7542 			goto err2;
7543 		}
7544 
7545 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7546 			args, &rootID);
7547 		if (status != B_OK || mount->volume->ops == NULL)
7548 			goto err2;
7549 	} else {
7550 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7551 		if (status != B_OK)
7552 			goto err2;
7553 
7554 		mount->covers_vnode = coveredNode;
7555 
7556 		// make sure covered_vnode is a directory
7557 		if (!S_ISDIR(coveredNode->Type())) {
7558 			status = B_NOT_A_DIRECTORY;
7559 			goto err3;
7560 		}
7561 
7562 		if (coveredNode->IsCovered()) {
7563 			// this is already a covered vnode
7564 			status = B_BUSY;
7565 			goto err3;
7566 		}
7567 
7568 		// mount it/them
7569 		fs_volume* volume = mount->volume;
7570 		while (volume) {
7571 			status = volume->file_system->mount(volume, device, flags, args,
7572 				&rootID);
7573 			if (status != B_OK || volume->ops == NULL) {
7574 				if (status == B_OK && volume->ops == NULL)
7575 					panic("fs_mount: mount() succeeded but ops is NULL!");
7576 				if (volume->sub_volume)
7577 					goto err4;
7578 				goto err3;
7579 			}
7580 
7581 			volume = volume->super_volume;
7582 		}
7583 
7584 		volume = mount->volume;
7585 		while (volume) {
7586 			if (volume->ops->all_layers_mounted != NULL)
7587 				volume->ops->all_layers_mounted(volume);
7588 			volume = volume->super_volume;
7589 		}
7590 	}
7591 
7592 	// the root node is supposed to be owned by the file system - it must
7593 	// exist at this point
7594 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7595 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7596 		panic("fs_mount: file system does not own its root node!\n");
7597 		status = B_ERROR;
7598 		goto err4;
7599 	}
7600 
7601 	// set up the links between the root vnode and the vnode it covers
7602 	rw_lock_write_lock(&sVnodeLock);
7603 	if (coveredNode != NULL) {
7604 		if (coveredNode->IsCovered()) {
7605 			// the vnode is covered now
7606 			status = B_BUSY;
7607 			rw_lock_write_unlock(&sVnodeLock);
7608 			goto err4;
7609 		}
7610 
7611 		mount->root_vnode->covers = coveredNode;
7612 		mount->root_vnode->SetCovering(true);
7613 
7614 		coveredNode->covered_by = mount->root_vnode;
7615 		coveredNode->SetCovered(true);
7616 	}
7617 	rw_lock_write_unlock(&sVnodeLock);
7618 
7619 	if (!sRoot) {
7620 		sRoot = mount->root_vnode;
7621 		mutex_lock(&sIOContextRootLock);
7622 		get_current_io_context(true)->root = sRoot;
7623 		mutex_unlock(&sIOContextRootLock);
7624 		inc_vnode_ref_count(sRoot);
7625 	}
7626 
7627 	// supply the partition (if any) with the mount cookie and mark it mounted
7628 	if (partition) {
7629 		partition->SetMountCookie(mount->volume->private_volume);
7630 		partition->SetVolumeID(mount->id);
7631 
7632 		// keep a partition reference as long as the partition is mounted
7633 		partitionRegistrar.Detach();
7634 		mount->partition = partition;
7635 		mount->owns_file_device = newlyCreatedFileDevice;
7636 		fileDeviceDeleter.id = -1;
7637 	}
7638 
7639 	notify_mount(mount->id,
7640 		coveredNode != NULL ? coveredNode->device : -1,
7641 		coveredNode ? coveredNode->id : -1);
7642 
7643 	return mount->id;
7644 
7645 err4:
7646 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7647 err3:
7648 	if (coveredNode != NULL)
7649 		put_vnode(coveredNode);
7650 err2:
7651 	mutex_lock(&sMountMutex);
7652 	sMountsTable->Remove(mount);
7653 	mutex_unlock(&sMountMutex);
7654 err1:
7655 	delete mount;
7656 
7657 	return status;
7658 }
7659 
7660 
7661 static status_t
7662 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7663 {
7664 	struct fs_mount* mount;
7665 	status_t err;
7666 
7667 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7668 		mountID, kernel));
7669 
7670 	struct vnode* pathVnode = NULL;
7671 	if (path != NULL) {
7672 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7673 		if (err != B_OK)
7674 			return B_ENTRY_NOT_FOUND;
7675 	}
7676 
7677 	RecursiveLocker mountOpLocker(sMountOpLock);
7678 
7679 	// this lock is not strictly necessary, but here in case of KDEBUG
7680 	// to keep the ASSERT in find_mount() working.
7681 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7682 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7683 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7684 	if (mount == NULL) {
7685 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7686 			pathVnode);
7687 	}
7688 
7689 	if (path != NULL) {
7690 		put_vnode(pathVnode);
7691 
7692 		if (mount->root_vnode != pathVnode) {
7693 			// not mountpoint
7694 			return B_BAD_VALUE;
7695 		}
7696 	}
7697 
7698 	// if the volume is associated with a partition, lock the device of the
7699 	// partition as long as we are unmounting
7700 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7701 	KPartition* partition = mount->partition;
7702 	KDiskDevice* diskDevice = NULL;
7703 	if (partition != NULL) {
7704 		if (partition->Device() == NULL) {
7705 			dprintf("fs_unmount(): There is no device!\n");
7706 			return B_ERROR;
7707 		}
7708 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7709 		if (!diskDevice) {
7710 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7711 			return B_ERROR;
7712 		}
7713 	}
7714 	DeviceWriteLocker writeLocker(diskDevice, true);
7715 
7716 	// make sure, that the partition is not busy
7717 	if (partition != NULL) {
7718 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7719 			TRACE(("fs_unmount(): Partition is busy.\n"));
7720 			return B_BUSY;
7721 		}
7722 	}
7723 
7724 	// grab the vnode master mutex to keep someone from creating
7725 	// a vnode while we're figuring out if we can continue
7726 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7727 
7728 	bool disconnectedDescriptors = false;
7729 
7730 	while (true) {
7731 		bool busy = false;
7732 
7733 		// cycle through the list of vnodes associated with this mount and
7734 		// make sure all of them are not busy or have refs on them
7735 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7736 		while (struct vnode* vnode = iterator.Next()) {
7737 			if (vnode->IsBusy()) {
7738 				busy = true;
7739 				break;
7740 			}
7741 
7742 			// check the vnode's ref count -- subtract additional references for
7743 			// covering
7744 			int32 refCount = vnode->ref_count;
7745 			if (vnode->covers != NULL)
7746 				refCount--;
7747 			if (vnode->covered_by != NULL)
7748 				refCount--;
7749 
7750 			if (refCount != 0) {
7751 				// there are still vnodes in use on this mount, so we cannot
7752 				// unmount yet
7753 				busy = true;
7754 				break;
7755 			}
7756 		}
7757 
7758 		if (!busy)
7759 			break;
7760 
7761 		if ((flags & B_FORCE_UNMOUNT) == 0)
7762 			return B_BUSY;
7763 
7764 		if (disconnectedDescriptors) {
7765 			// wait a bit until the last access is finished, and then try again
7766 			vnodesWriteLocker.Unlock();
7767 			snooze(100000);
7768 			// TODO: if there is some kind of bug that prevents the ref counts
7769 			// from getting back to zero, this will fall into an endless loop...
7770 			vnodesWriteLocker.Lock();
7771 			continue;
7772 		}
7773 
7774 		// the file system is still busy - but we're forced to unmount it,
7775 		// so let's disconnect all open file descriptors
7776 
7777 		mount->unmounting = true;
7778 			// prevent new vnodes from being created
7779 
7780 		vnodesWriteLocker.Unlock();
7781 
7782 		disconnect_mount_or_vnode_fds(mount, NULL);
7783 		disconnectedDescriptors = true;
7784 
7785 		vnodesWriteLocker.Lock();
7786 	}
7787 
7788 	// We can safely continue. Mark all of the vnodes busy and this mount
7789 	// structure in unmounting state. Also undo the vnode covers/covered_by
7790 	// links.
7791 	mount->unmounting = true;
7792 
7793 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7794 	while (struct vnode* vnode = iterator.Next()) {
7795 		// Remove all covers/covered_by links from other mounts' nodes to this
7796 		// vnode and adjust the node ref count accordingly. We will release the
7797 		// references to the external vnodes below.
7798 		if (Vnode* coveredNode = vnode->covers) {
7799 			if (Vnode* coveringNode = vnode->covered_by) {
7800 				// We have both covered and covering vnodes, so just remove us
7801 				// from the chain.
7802 				coveredNode->covered_by = coveringNode;
7803 				coveringNode->covers = coveredNode;
7804 				vnode->ref_count -= 2;
7805 
7806 				vnode->covered_by = NULL;
7807 				vnode->covers = NULL;
7808 				vnode->SetCovering(false);
7809 				vnode->SetCovered(false);
7810 			} else {
7811 				// We only have a covered vnode. Remove its link to us.
7812 				coveredNode->covered_by = NULL;
7813 				coveredNode->SetCovered(false);
7814 				vnode->ref_count--;
7815 
7816 				// If the other node is an external vnode, we keep its link
7817 				// link around so we can put the reference later on. Otherwise
7818 				// we get rid of it right now.
7819 				if (coveredNode->mount == mount) {
7820 					vnode->covers = NULL;
7821 					coveredNode->ref_count--;
7822 				}
7823 			}
7824 		} else if (Vnode* coveringNode = vnode->covered_by) {
7825 			// We only have a covering vnode. Remove its link to us.
7826 			coveringNode->covers = NULL;
7827 			coveringNode->SetCovering(false);
7828 			vnode->ref_count--;
7829 
7830 			// If the other node is an external vnode, we keep its link
7831 			// link around so we can put the reference later on. Otherwise
7832 			// we get rid of it right now.
7833 			if (coveringNode->mount == mount) {
7834 				vnode->covered_by = NULL;
7835 				coveringNode->ref_count--;
7836 			}
7837 		}
7838 
7839 		vnode->SetBusy(true);
7840 		vnode_to_be_freed(vnode);
7841 	}
7842 
7843 	vnodesWriteLocker.Unlock();
7844 
7845 	// Free all vnodes associated with this mount.
7846 	// They will be removed from the mount list by free_vnode(), so
7847 	// we don't have to do this.
7848 	while (struct vnode* vnode = mount->vnodes.Head()) {
7849 		// Put the references to external covered/covering vnodes we kept above.
7850 		if (Vnode* coveredNode = vnode->covers)
7851 			put_vnode(coveredNode);
7852 		if (Vnode* coveringNode = vnode->covered_by)
7853 			put_vnode(coveringNode);
7854 
7855 		free_vnode(vnode, false);
7856 	}
7857 
7858 	// remove the mount structure from the hash table
7859 	mutex_lock(&sMountMutex);
7860 	sMountsTable->Remove(mount);
7861 	mutex_unlock(&sMountMutex);
7862 
7863 	mountOpLocker.Unlock();
7864 
7865 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7866 	notify_unmount(mount->id);
7867 
7868 	// dereference the partition and mark it unmounted
7869 	if (partition) {
7870 		partition->SetVolumeID(-1);
7871 		partition->SetMountCookie(NULL);
7872 
7873 		if (mount->owns_file_device)
7874 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7875 		partition->Unregister();
7876 	}
7877 
7878 	delete mount;
7879 	return B_OK;
7880 }
7881 
7882 
7883 static status_t
7884 fs_sync(dev_t device)
7885 {
7886 	struct fs_mount* mount;
7887 	status_t status = get_mount(device, &mount);
7888 	if (status != B_OK)
7889 		return status;
7890 
7891 	struct vnode marker;
7892 	memset(&marker, 0, sizeof(marker));
7893 	marker.SetBusy(true);
7894 	marker.SetRemoved(true);
7895 
7896 	// First, synchronize all file caches
7897 
7898 	while (true) {
7899 		WriteLocker locker(sVnodeLock);
7900 			// Note: That's the easy way. Which is probably OK for sync(),
7901 			// since it's a relatively rare call and doesn't need to allow for
7902 			// a lot of concurrency. Using a read lock would be possible, but
7903 			// also more involved, since we had to lock the individual nodes
7904 			// and take care of the locking order, which we might not want to
7905 			// do while holding fs_mount::lock.
7906 
7907 		// synchronize access to vnode list
7908 		mutex_lock(&mount->lock);
7909 
7910 		struct vnode* vnode;
7911 		if (!marker.IsRemoved()) {
7912 			vnode = mount->vnodes.GetNext(&marker);
7913 			mount->vnodes.Remove(&marker);
7914 			marker.SetRemoved(true);
7915 		} else
7916 			vnode = mount->vnodes.First();
7917 
7918 		while (vnode != NULL && (vnode->cache == NULL
7919 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7920 			// TODO: we could track writes (and writable mapped vnodes)
7921 			//	and have a simple flag that we could test for here
7922 			vnode = mount->vnodes.GetNext(vnode);
7923 		}
7924 
7925 		if (vnode != NULL) {
7926 			// insert marker vnode again
7927 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7928 			marker.SetRemoved(false);
7929 		}
7930 
7931 		mutex_unlock(&mount->lock);
7932 
7933 		if (vnode == NULL)
7934 			break;
7935 
7936 		vnode = lookup_vnode(mount->id, vnode->id);
7937 		if (vnode == NULL || vnode->IsBusy())
7938 			continue;
7939 
7940 		if (vnode->ref_count == 0) {
7941 			// this vnode has been unused before
7942 			vnode_used(vnode);
7943 		}
7944 		inc_vnode_ref_count(vnode);
7945 
7946 		locker.Unlock();
7947 
7948 		if (vnode->cache != NULL && !vnode->IsRemoved())
7949 			vnode->cache->WriteModified();
7950 
7951 		put_vnode(vnode);
7952 	}
7953 
7954 	// Let the file systems do their synchronizing work
7955 	if (HAS_FS_MOUNT_CALL(mount, sync))
7956 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7957 
7958 	// Finally, flush the underlying device's write cache (if possible.)
7959 	if (mount->partition != NULL && mount->partition->Device() != NULL)
7960 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
7961 
7962 	put_mount(mount);
7963 	return status;
7964 }
7965 
7966 
7967 static status_t
7968 fs_read_info(dev_t device, struct fs_info* info)
7969 {
7970 	struct fs_mount* mount;
7971 	status_t status = get_mount(device, &mount);
7972 	if (status != B_OK)
7973 		return status;
7974 
7975 	memset(info, 0, sizeof(struct fs_info));
7976 
7977 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7978 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7979 
7980 	// fill in info the file system doesn't (have to) know about
7981 	if (status == B_OK) {
7982 		info->dev = mount->id;
7983 		info->root = mount->root_vnode->id;
7984 
7985 		fs_volume* volume = mount->volume;
7986 		while (volume->super_volume != NULL)
7987 			volume = volume->super_volume;
7988 
7989 		strlcpy(info->fsh_name, volume->file_system_name,
7990 			sizeof(info->fsh_name));
7991 		if (mount->device_name != NULL) {
7992 			strlcpy(info->device_name, mount->device_name,
7993 				sizeof(info->device_name));
7994 		}
7995 	}
7996 
7997 	// if the call is not supported by the file system, there are still
7998 	// the parts that we filled out ourselves
7999 
8000 	put_mount(mount);
8001 	return status;
8002 }
8003 
8004 
8005 static status_t
8006 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8007 {
8008 	struct fs_mount* mount;
8009 	status_t status = get_mount(device, &mount);
8010 	if (status != B_OK)
8011 		return status;
8012 
8013 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8014 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8015 	else
8016 		status = B_READ_ONLY_DEVICE;
8017 
8018 	put_mount(mount);
8019 	return status;
8020 }
8021 
8022 
8023 static dev_t
8024 fs_next_device(int32* _cookie)
8025 {
8026 	struct fs_mount* mount = NULL;
8027 	dev_t device = *_cookie;
8028 
8029 	mutex_lock(&sMountMutex);
8030 
8031 	// Since device IDs are assigned sequentially, this algorithm
8032 	// does work good enough. It makes sure that the device list
8033 	// returned is sorted, and that no device is skipped when an
8034 	// already visited device got unmounted.
8035 
8036 	while (device < sNextMountID) {
8037 		mount = find_mount(device++);
8038 		if (mount != NULL && mount->volume->private_volume != NULL)
8039 			break;
8040 	}
8041 
8042 	*_cookie = device;
8043 
8044 	if (mount != NULL)
8045 		device = mount->id;
8046 	else
8047 		device = B_BAD_VALUE;
8048 
8049 	mutex_unlock(&sMountMutex);
8050 
8051 	return device;
8052 }
8053 
8054 
8055 ssize_t
8056 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8057 	void *buffer, size_t readBytes)
8058 {
8059 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8060 	if (attrFD < 0)
8061 		return attrFD;
8062 
8063 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8064 
8065 	_kern_close(attrFD);
8066 
8067 	return bytesRead;
8068 }
8069 
8070 
8071 static status_t
8072 get_cwd(char* buffer, size_t size, bool kernel)
8073 {
8074 	// Get current working directory from io context
8075 	struct io_context* context = get_current_io_context(kernel);
8076 	status_t status;
8077 
8078 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8079 
8080 	mutex_lock(&context->io_mutex);
8081 
8082 	struct vnode* vnode = context->cwd;
8083 	if (vnode)
8084 		inc_vnode_ref_count(vnode);
8085 
8086 	mutex_unlock(&context->io_mutex);
8087 
8088 	if (vnode) {
8089 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8090 		put_vnode(vnode);
8091 	} else
8092 		status = B_ERROR;
8093 
8094 	return status;
8095 }
8096 
8097 
8098 static status_t
8099 set_cwd(int fd, char* path, bool kernel)
8100 {
8101 	struct io_context* context;
8102 	struct vnode* vnode = NULL;
8103 	struct vnode* oldDirectory;
8104 	status_t status;
8105 
8106 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8107 
8108 	// Get vnode for passed path, and bail if it failed
8109 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8110 	if (status < 0)
8111 		return status;
8112 
8113 	if (!S_ISDIR(vnode->Type())) {
8114 		// nope, can't cwd to here
8115 		status = B_NOT_A_DIRECTORY;
8116 		goto err;
8117 	}
8118 
8119 	// We need to have the permission to enter the directory, too
8120 	if (HAS_FS_CALL(vnode, access)) {
8121 		status = FS_CALL(vnode, access, X_OK);
8122 		if (status != B_OK)
8123 			goto err;
8124 	}
8125 
8126 	// Get current io context and lock
8127 	context = get_current_io_context(kernel);
8128 	mutex_lock(&context->io_mutex);
8129 
8130 	// save the old current working directory first
8131 	oldDirectory = context->cwd;
8132 	context->cwd = vnode;
8133 
8134 	mutex_unlock(&context->io_mutex);
8135 
8136 	if (oldDirectory)
8137 		put_vnode(oldDirectory);
8138 
8139 	return B_NO_ERROR;
8140 
8141 err:
8142 	put_vnode(vnode);
8143 	return status;
8144 }
8145 
8146 
8147 static status_t
8148 user_copy_name(char* to, const char* from, size_t length)
8149 {
8150 	ssize_t len = user_strlcpy(to, from, length);
8151 	if (len < 0)
8152 		return len;
8153 	if (len >= (ssize_t)length)
8154 		return B_NAME_TOO_LONG;
8155 	return B_OK;
8156 }
8157 
8158 
8159 //	#pragma mark - kernel mirrored syscalls
8160 
8161 
8162 dev_t
8163 _kern_mount(const char* path, const char* device, const char* fsName,
8164 	uint32 flags, const char* args, size_t argsLength)
8165 {
8166 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8167 	if (pathBuffer.InitCheck() != B_OK)
8168 		return B_NO_MEMORY;
8169 
8170 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8171 }
8172 
8173 
8174 status_t
8175 _kern_unmount(const char* path, uint32 flags)
8176 {
8177 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8178 	if (pathBuffer.InitCheck() != B_OK)
8179 		return B_NO_MEMORY;
8180 
8181 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8182 }
8183 
8184 
8185 status_t
8186 _kern_read_fs_info(dev_t device, struct fs_info* info)
8187 {
8188 	if (info == NULL)
8189 		return B_BAD_VALUE;
8190 
8191 	return fs_read_info(device, info);
8192 }
8193 
8194 
8195 status_t
8196 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8197 {
8198 	if (info == NULL)
8199 		return B_BAD_VALUE;
8200 
8201 	return fs_write_info(device, info, mask);
8202 }
8203 
8204 
8205 status_t
8206 _kern_sync(void)
8207 {
8208 	// Note: _kern_sync() is also called from _user_sync()
8209 	int32 cookie = 0;
8210 	dev_t device;
8211 	while ((device = next_dev(&cookie)) >= 0) {
8212 		status_t status = fs_sync(device);
8213 		if (status != B_OK && status != B_BAD_VALUE) {
8214 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8215 				strerror(status));
8216 		}
8217 	}
8218 
8219 	return B_OK;
8220 }
8221 
8222 
8223 dev_t
8224 _kern_next_device(int32* _cookie)
8225 {
8226 	return fs_next_device(_cookie);
8227 }
8228 
8229 
8230 status_t
8231 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8232 	size_t infoSize)
8233 {
8234 	if (infoSize != sizeof(fd_info))
8235 		return B_BAD_VALUE;
8236 
8237 	// get the team
8238 	Team* team = Team::Get(teamID);
8239 	if (team == NULL)
8240 		return B_BAD_TEAM_ID;
8241 	BReference<Team> teamReference(team, true);
8242 
8243 	// now that we have a team reference, its I/O context won't go away
8244 	io_context* context = team->io_context;
8245 	MutexLocker contextLocker(context->io_mutex);
8246 
8247 	uint32 slot = *_cookie;
8248 
8249 	struct file_descriptor* descriptor;
8250 	while (slot < context->table_size
8251 		&& (descriptor = context->fds[slot]) == NULL) {
8252 		slot++;
8253 	}
8254 
8255 	if (slot >= context->table_size)
8256 		return B_ENTRY_NOT_FOUND;
8257 
8258 	info->number = slot;
8259 	info->open_mode = descriptor->open_mode;
8260 
8261 	struct vnode* vnode = fd_vnode(descriptor);
8262 	if (vnode != NULL) {
8263 		info->device = vnode->device;
8264 		info->node = vnode->id;
8265 	} else if (descriptor->u.mount != NULL) {
8266 		info->device = descriptor->u.mount->id;
8267 		info->node = -1;
8268 	}
8269 
8270 	*_cookie = slot + 1;
8271 	return B_OK;
8272 }
8273 
8274 
8275 int
8276 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8277 	int perms)
8278 {
8279 	if ((openMode & O_CREAT) != 0) {
8280 		return file_create_entry_ref(device, inode, name, openMode, perms,
8281 			true);
8282 	}
8283 
8284 	return file_open_entry_ref(device, inode, name, openMode, true);
8285 }
8286 
8287 
8288 /*!	\brief Opens a node specified by a FD + path pair.
8289 
8290 	At least one of \a fd and \a path must be specified.
8291 	If only \a fd is given, the function opens the node identified by this
8292 	FD. If only a path is given, this path is opened. If both are given and
8293 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8294 	of the directory (!) identified by \a fd.
8295 
8296 	\param fd The FD. May be < 0.
8297 	\param path The absolute or relative path. May be \c NULL.
8298 	\param openMode The open mode.
8299 	\return A FD referring to the newly opened node, or an error code,
8300 			if an error occurs.
8301 */
8302 int
8303 _kern_open(int fd, const char* path, int openMode, int perms)
8304 {
8305 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8306 	if (pathBuffer.InitCheck() != B_OK)
8307 		return B_NO_MEMORY;
8308 
8309 	if ((openMode & O_CREAT) != 0)
8310 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8311 
8312 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8313 }
8314 
8315 
8316 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8317 
8318 	The supplied name may be \c NULL, in which case directory identified
8319 	by \a device and \a inode will be opened. Otherwise \a device and
8320 	\a inode identify the parent directory of the directory to be opened
8321 	and \a name its entry name.
8322 
8323 	\param device If \a name is specified the ID of the device the parent
8324 		   directory of the directory to be opened resides on, otherwise
8325 		   the device of the directory itself.
8326 	\param inode If \a name is specified the node ID of the parent
8327 		   directory of the directory to be opened, otherwise node ID of the
8328 		   directory itself.
8329 	\param name The entry name of the directory to be opened. If \c NULL,
8330 		   the \a device + \a inode pair identify the node to be opened.
8331 	\return The FD of the newly opened directory or an error code, if
8332 			something went wrong.
8333 */
8334 int
8335 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8336 {
8337 	return dir_open_entry_ref(device, inode, name, true);
8338 }
8339 
8340 
8341 /*!	\brief Opens a directory specified by a FD + path pair.
8342 
8343 	At least one of \a fd and \a path must be specified.
8344 	If only \a fd is given, the function opens the directory identified by this
8345 	FD. If only a path is given, this path is opened. If both are given and
8346 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8347 	of the directory (!) identified by \a fd.
8348 
8349 	\param fd The FD. May be < 0.
8350 	\param path The absolute or relative path. May be \c NULL.
8351 	\return A FD referring to the newly opened directory, or an error code,
8352 			if an error occurs.
8353 */
8354 int
8355 _kern_open_dir(int fd, const char* path)
8356 {
8357 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8358 	if (pathBuffer.InitCheck() != B_OK)
8359 		return B_NO_MEMORY;
8360 
8361 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8362 }
8363 
8364 
8365 status_t
8366 _kern_fcntl(int fd, int op, size_t argument)
8367 {
8368 	return common_fcntl(fd, op, argument, true);
8369 }
8370 
8371 
8372 status_t
8373 _kern_fsync(int fd)
8374 {
8375 	return common_sync(fd, true);
8376 }
8377 
8378 
8379 status_t
8380 _kern_lock_node(int fd)
8381 {
8382 	return common_lock_node(fd, true);
8383 }
8384 
8385 
8386 status_t
8387 _kern_unlock_node(int fd)
8388 {
8389 	return common_unlock_node(fd, true);
8390 }
8391 
8392 
8393 status_t
8394 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8395 	int perms)
8396 {
8397 	return dir_create_entry_ref(device, inode, name, perms, true);
8398 }
8399 
8400 
8401 /*!	\brief Creates a directory specified by a FD + path pair.
8402 
8403 	\a path must always be specified (it contains the name of the new directory
8404 	at least). If only a path is given, this path identifies the location at
8405 	which the directory shall be created. If both \a fd and \a path are given
8406 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8407 	of the directory (!) identified by \a fd.
8408 
8409 	\param fd The FD. May be < 0.
8410 	\param path The absolute or relative path. Must not be \c NULL.
8411 	\param perms The access permissions the new directory shall have.
8412 	\return \c B_OK, if the directory has been created successfully, another
8413 			error code otherwise.
8414 */
8415 status_t
8416 _kern_create_dir(int fd, const char* path, int perms)
8417 {
8418 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8419 	if (pathBuffer.InitCheck() != B_OK)
8420 		return B_NO_MEMORY;
8421 
8422 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8423 }
8424 
8425 
8426 status_t
8427 _kern_remove_dir(int fd, const char* path)
8428 {
8429 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8430 	if (pathBuffer.InitCheck() != B_OK)
8431 		return B_NO_MEMORY;
8432 
8433 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8434 }
8435 
8436 
8437 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8438 
8439 	At least one of \a fd and \a path must be specified.
8440 	If only \a fd is given, the function the symlink to be read is the node
8441 	identified by this FD. If only a path is given, this path identifies the
8442 	symlink to be read. If both are given and the path is absolute, \a fd is
8443 	ignored; a relative path is reckoned off of the directory (!) identified
8444 	by \a fd.
8445 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8446 	will still be updated to reflect the required buffer size.
8447 
8448 	\param fd The FD. May be < 0.
8449 	\param path The absolute or relative path. May be \c NULL.
8450 	\param buffer The buffer into which the contents of the symlink shall be
8451 		   written.
8452 	\param _bufferSize A pointer to the size of the supplied buffer.
8453 	\return The length of the link on success or an appropriate error code
8454 */
8455 status_t
8456 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8457 {
8458 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8459 	if (pathBuffer.InitCheck() != B_OK)
8460 		return B_NO_MEMORY;
8461 
8462 	return common_read_link(fd, pathBuffer.LockBuffer(),
8463 		buffer, _bufferSize, true);
8464 }
8465 
8466 
8467 /*!	\brief Creates a symlink specified by a FD + path pair.
8468 
8469 	\a path must always be specified (it contains the name of the new symlink
8470 	at least). If only a path is given, this path identifies the location at
8471 	which the symlink shall be created. If both \a fd and \a path are given and
8472 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8473 	of the directory (!) identified by \a fd.
8474 
8475 	\param fd The FD. May be < 0.
8476 	\param toPath The absolute or relative path. Must not be \c NULL.
8477 	\param mode The access permissions the new symlink shall have.
8478 	\return \c B_OK, if the symlink has been created successfully, another
8479 			error code otherwise.
8480 */
8481 status_t
8482 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8483 {
8484 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8485 	if (pathBuffer.InitCheck() != B_OK)
8486 		return B_NO_MEMORY;
8487 
8488 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8489 		toPath, mode, true);
8490 }
8491 
8492 
8493 status_t
8494 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8495 	bool traverseLeafLink)
8496 {
8497 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8498 	KPath toPathBuffer(toPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8499 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8500 		return B_NO_MEMORY;
8501 
8502 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8503 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8504 }
8505 
8506 
8507 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8508 
8509 	\a path must always be specified (it contains at least the name of the entry
8510 	to be deleted). If only a path is given, this path identifies the entry
8511 	directly. If both \a fd and \a path are given and the path is absolute,
8512 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8513 	identified by \a fd.
8514 
8515 	\param fd The FD. May be < 0.
8516 	\param path The absolute or relative path. Must not be \c NULL.
8517 	\return \c B_OK, if the entry has been removed successfully, another
8518 			error code otherwise.
8519 */
8520 status_t
8521 _kern_unlink(int fd, const char* path)
8522 {
8523 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8524 	if (pathBuffer.InitCheck() != B_OK)
8525 		return B_NO_MEMORY;
8526 
8527 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8528 }
8529 
8530 
8531 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8532 		   by another FD + path pair.
8533 
8534 	\a oldPath and \a newPath must always be specified (they contain at least
8535 	the name of the entry). If only a path is given, this path identifies the
8536 	entry directly. If both a FD and a path are given and the path is absolute,
8537 	the FD is ignored; a relative path is reckoned off of the directory (!)
8538 	identified by the respective FD.
8539 
8540 	\param oldFD The FD of the old location. May be < 0.
8541 	\param oldPath The absolute or relative path of the old location. Must not
8542 		   be \c NULL.
8543 	\param newFD The FD of the new location. May be < 0.
8544 	\param newPath The absolute or relative path of the new location. Must not
8545 		   be \c NULL.
8546 	\return \c B_OK, if the entry has been moved successfully, another
8547 			error code otherwise.
8548 */
8549 status_t
8550 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8551 {
8552 	KPath oldPathBuffer(oldPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8553 	KPath newPathBuffer(newPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8554 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8555 		return B_NO_MEMORY;
8556 
8557 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8558 		newFD, newPathBuffer.LockBuffer(), true);
8559 }
8560 
8561 
8562 status_t
8563 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8564 {
8565 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8566 	if (pathBuffer.InitCheck() != B_OK)
8567 		return B_NO_MEMORY;
8568 
8569 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8570 		true);
8571 }
8572 
8573 
8574 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8575 
8576 	If only \a fd is given, the stat operation associated with the type
8577 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8578 	given, this path identifies the entry for whose node to retrieve the
8579 	stat data. If both \a fd and \a path are given and the path is absolute,
8580 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8581 	identified by \a fd and specifies the entry whose stat data shall be
8582 	retrieved.
8583 
8584 	\param fd The FD. May be < 0.
8585 	\param path The absolute or relative path. Must not be \c NULL.
8586 	\param traverseLeafLink If \a path is given, \c true specifies that the
8587 		   function shall not stick to symlinks, but traverse them.
8588 	\param stat The buffer the stat data shall be written into.
8589 	\param statSize The size of the supplied stat buffer.
8590 	\return \c B_OK, if the the stat data have been read successfully, another
8591 			error code otherwise.
8592 */
8593 status_t
8594 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8595 	struct stat* stat, size_t statSize)
8596 {
8597 	struct stat completeStat;
8598 	struct stat* originalStat = NULL;
8599 	status_t status;
8600 
8601 	if (statSize > sizeof(struct stat))
8602 		return B_BAD_VALUE;
8603 
8604 	// this supports different stat extensions
8605 	if (statSize < sizeof(struct stat)) {
8606 		originalStat = stat;
8607 		stat = &completeStat;
8608 	}
8609 
8610 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8611 
8612 	if (status == B_OK && originalStat != NULL)
8613 		memcpy(originalStat, stat, statSize);
8614 
8615 	return status;
8616 }
8617 
8618 
8619 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8620 
8621 	If only \a fd is given, the stat operation associated with the type
8622 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8623 	given, this path identifies the entry for whose node to write the
8624 	stat data. If both \a fd and \a path are given and the path is absolute,
8625 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8626 	identified by \a fd and specifies the entry whose stat data shall be
8627 	written.
8628 
8629 	\param fd The FD. May be < 0.
8630 	\param path The absolute or relative path. May be \c NULL.
8631 	\param traverseLeafLink If \a path is given, \c true specifies that the
8632 		   function shall not stick to symlinks, but traverse them.
8633 	\param stat The buffer containing the stat data to be written.
8634 	\param statSize The size of the supplied stat buffer.
8635 	\param statMask A mask specifying which parts of the stat data shall be
8636 		   written.
8637 	\return \c B_OK, if the the stat data have been written successfully,
8638 			another error code otherwise.
8639 */
8640 status_t
8641 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8642 	const struct stat* stat, size_t statSize, int statMask)
8643 {
8644 	struct stat completeStat;
8645 
8646 	if (statSize > sizeof(struct stat))
8647 		return B_BAD_VALUE;
8648 
8649 	// this supports different stat extensions
8650 	if (statSize < sizeof(struct stat)) {
8651 		memset((uint8*)&completeStat + statSize, 0,
8652 			sizeof(struct stat) - statSize);
8653 		memcpy(&completeStat, stat, statSize);
8654 		stat = &completeStat;
8655 	}
8656 
8657 	status_t status;
8658 
8659 	if (path != NULL) {
8660 		// path given: write the stat of the node referred to by (fd, path)
8661 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8662 		if (pathBuffer.InitCheck() != B_OK)
8663 			return B_NO_MEMORY;
8664 
8665 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8666 			traverseLeafLink, stat, statMask, true);
8667 	} else {
8668 		// no path given: get the FD and use the FD operation
8669 		struct file_descriptor* descriptor
8670 			= get_fd(get_current_io_context(true), fd);
8671 		if (descriptor == NULL)
8672 			return B_FILE_ERROR;
8673 
8674 		if (descriptor->ops->fd_write_stat)
8675 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8676 		else
8677 			status = B_UNSUPPORTED;
8678 
8679 		put_fd(descriptor);
8680 	}
8681 
8682 	return status;
8683 }
8684 
8685 
8686 int
8687 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8688 {
8689 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8690 	if (pathBuffer.InitCheck() != B_OK)
8691 		return B_NO_MEMORY;
8692 
8693 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8694 }
8695 
8696 
8697 int
8698 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8699 	int openMode)
8700 {
8701 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8702 	if (pathBuffer.InitCheck() != B_OK)
8703 		return B_NO_MEMORY;
8704 
8705 	if ((openMode & O_CREAT) != 0) {
8706 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8707 			true);
8708 	}
8709 
8710 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8711 }
8712 
8713 
8714 status_t
8715 _kern_remove_attr(int fd, const char* name)
8716 {
8717 	return attr_remove(fd, name, true);
8718 }
8719 
8720 
8721 status_t
8722 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8723 	const char* toName)
8724 {
8725 	return attr_rename(fromFile, fromName, toFile, toName, true);
8726 }
8727 
8728 
8729 int
8730 _kern_open_index_dir(dev_t device)
8731 {
8732 	return index_dir_open(device, true);
8733 }
8734 
8735 
8736 status_t
8737 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8738 {
8739 	return index_create(device, name, type, flags, true);
8740 }
8741 
8742 
8743 status_t
8744 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8745 {
8746 	return index_name_read_stat(device, name, stat, true);
8747 }
8748 
8749 
8750 status_t
8751 _kern_remove_index(dev_t device, const char* name)
8752 {
8753 	return index_remove(device, name, true);
8754 }
8755 
8756 
8757 status_t
8758 _kern_getcwd(char* buffer, size_t size)
8759 {
8760 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8761 
8762 	// Call vfs to get current working directory
8763 	return get_cwd(buffer, size, true);
8764 }
8765 
8766 
8767 status_t
8768 _kern_setcwd(int fd, const char* path)
8769 {
8770 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8771 	if (pathBuffer.InitCheck() != B_OK)
8772 		return B_NO_MEMORY;
8773 
8774 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8775 }
8776 
8777 
8778 //	#pragma mark - userland syscalls
8779 
8780 
8781 dev_t
8782 _user_mount(const char* userPath, const char* userDevice,
8783 	const char* userFileSystem, uint32 flags, const char* userArgs,
8784 	size_t argsLength)
8785 {
8786 	char fileSystem[B_FILE_NAME_LENGTH];
8787 	KPath path, device;
8788 	char* args = NULL;
8789 	status_t status;
8790 
8791 	if (!IS_USER_ADDRESS(userPath))
8792 		return B_BAD_ADDRESS;
8793 
8794 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8795 		return B_NO_MEMORY;
8796 
8797 	status = user_copy_name(path.LockBuffer(), userPath,
8798 		B_PATH_NAME_LENGTH);
8799 	if (status != B_OK)
8800 		return status;
8801 
8802 	if (userFileSystem != NULL) {
8803 		if (!IS_USER_ADDRESS(userFileSystem))
8804 			return B_BAD_ADDRESS;
8805 
8806 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8807 		if (status != B_OK)
8808 			return status;
8809 	}
8810 
8811 	if (userDevice != NULL) {
8812 		if (!IS_USER_ADDRESS(userDevice))
8813 			return B_BAD_ADDRESS;
8814 
8815 		status = user_copy_name(device.LockBuffer(), userDevice,
8816 			B_PATH_NAME_LENGTH);
8817 		if (status != B_OK)
8818 			return status;
8819 	}
8820 
8821 	if (userArgs != NULL && argsLength > 0) {
8822 		if (!IS_USER_ADDRESS(userArgs))
8823 			return B_BAD_ADDRESS;
8824 
8825 		// this is a safety restriction
8826 		if (argsLength >= 65536)
8827 			return B_NAME_TOO_LONG;
8828 
8829 		args = (char*)malloc(argsLength + 1);
8830 		if (args == NULL)
8831 			return B_NO_MEMORY;
8832 
8833 		status = user_copy_name(args, userArgs, argsLength + 1);
8834 		if (status != B_OK) {
8835 			free(args);
8836 			return status;
8837 		}
8838 	}
8839 	path.UnlockBuffer();
8840 	device.UnlockBuffer();
8841 
8842 	status = fs_mount(path.LockBuffer(),
8843 		userDevice != NULL ? device.Path() : NULL,
8844 		userFileSystem ? fileSystem : NULL, flags, args, false);
8845 
8846 	free(args);
8847 	return status;
8848 }
8849 
8850 
8851 status_t
8852 _user_unmount(const char* userPath, uint32 flags)
8853 {
8854 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8855 
8856 	if (!IS_USER_ADDRESS(userPath))
8857 		return B_BAD_ADDRESS;
8858 
8859 	if (pathBuffer.InitCheck() != B_OK)
8860 		return B_NO_MEMORY;
8861 
8862 	char* path = pathBuffer.LockBuffer();
8863 
8864 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8865 	if (status != B_OK)
8866 		return status;
8867 
8868 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8869 }
8870 
8871 
8872 status_t
8873 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8874 {
8875 	struct fs_info info;
8876 	status_t status;
8877 
8878 	if (userInfo == NULL)
8879 		return B_BAD_VALUE;
8880 
8881 	if (!IS_USER_ADDRESS(userInfo))
8882 		return B_BAD_ADDRESS;
8883 
8884 	status = fs_read_info(device, &info);
8885 	if (status != B_OK)
8886 		return status;
8887 
8888 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8889 		return B_BAD_ADDRESS;
8890 
8891 	return B_OK;
8892 }
8893 
8894 
8895 status_t
8896 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8897 {
8898 	struct fs_info info;
8899 
8900 	if (userInfo == NULL)
8901 		return B_BAD_VALUE;
8902 
8903 	if (!IS_USER_ADDRESS(userInfo)
8904 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8905 		return B_BAD_ADDRESS;
8906 
8907 	return fs_write_info(device, &info, mask);
8908 }
8909 
8910 
8911 dev_t
8912 _user_next_device(int32* _userCookie)
8913 {
8914 	int32 cookie;
8915 	dev_t device;
8916 
8917 	if (!IS_USER_ADDRESS(_userCookie)
8918 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8919 		return B_BAD_ADDRESS;
8920 
8921 	device = fs_next_device(&cookie);
8922 
8923 	if (device >= B_OK) {
8924 		// update user cookie
8925 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8926 			return B_BAD_ADDRESS;
8927 	}
8928 
8929 	return device;
8930 }
8931 
8932 
8933 status_t
8934 _user_sync(void)
8935 {
8936 	return _kern_sync();
8937 }
8938 
8939 
8940 status_t
8941 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8942 	size_t infoSize)
8943 {
8944 	struct fd_info info;
8945 	uint32 cookie;
8946 
8947 	// only root can do this (or should root's group be enough?)
8948 	if (geteuid() != 0)
8949 		return B_NOT_ALLOWED;
8950 
8951 	if (infoSize != sizeof(fd_info))
8952 		return B_BAD_VALUE;
8953 
8954 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8955 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8956 		return B_BAD_ADDRESS;
8957 
8958 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8959 	if (status != B_OK)
8960 		return status;
8961 
8962 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8963 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8964 		return B_BAD_ADDRESS;
8965 
8966 	return status;
8967 }
8968 
8969 
8970 status_t
8971 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8972 	char* userPath, size_t pathLength)
8973 {
8974 	if (!IS_USER_ADDRESS(userPath))
8975 		return B_BAD_ADDRESS;
8976 
8977 	KPath path(B_PATH_NAME_LENGTH + 1);
8978 	if (path.InitCheck() != B_OK)
8979 		return B_NO_MEMORY;
8980 
8981 	// copy the leaf name onto the stack
8982 	char stackLeaf[B_FILE_NAME_LENGTH];
8983 	if (leaf != NULL) {
8984 		if (!IS_USER_ADDRESS(leaf))
8985 			return B_BAD_ADDRESS;
8986 
8987 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8988 		if (status != B_OK)
8989 			return status;
8990 
8991 		leaf = stackLeaf;
8992 	}
8993 
8994 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8995 		false, path.LockBuffer(), path.BufferSize());
8996 	if (status != B_OK)
8997 		return status;
8998 
8999 	path.UnlockBuffer();
9000 
9001 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9002 	if (length < 0)
9003 		return length;
9004 	if (length >= (int)pathLength)
9005 		return B_BUFFER_OVERFLOW;
9006 
9007 	return B_OK;
9008 }
9009 
9010 
9011 status_t
9012 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9013 {
9014 	if (userPath == NULL || buffer == NULL)
9015 		return B_BAD_VALUE;
9016 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9017 		return B_BAD_ADDRESS;
9018 
9019 	// copy path from userland
9020 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9021 	if (pathBuffer.InitCheck() != B_OK)
9022 		return B_NO_MEMORY;
9023 	char* path = pathBuffer.LockBuffer();
9024 
9025 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9026 	if (status != B_OK)
9027 		return status;
9028 
9029 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9030 		false);
9031 	if (error != B_OK)
9032 		return error;
9033 
9034 	// copy back to userland
9035 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9036 	if (len < 0)
9037 		return len;
9038 	if (len >= B_PATH_NAME_LENGTH)
9039 		return B_BUFFER_OVERFLOW;
9040 
9041 	return B_OK;
9042 }
9043 
9044 
9045 int
9046 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9047 	int openMode, int perms)
9048 {
9049 	char name[B_FILE_NAME_LENGTH];
9050 
9051 	if (userName == NULL || device < 0 || inode < 0)
9052 		return B_BAD_VALUE;
9053 	if (!IS_USER_ADDRESS(userName))
9054 		return B_BAD_ADDRESS;
9055 	status_t status = user_copy_name(name, userName, sizeof(name));
9056 	if (status != B_OK)
9057 		return status;
9058 
9059 	if ((openMode & O_CREAT) != 0) {
9060 		return file_create_entry_ref(device, inode, name, openMode, perms,
9061 			false);
9062 	}
9063 
9064 	return file_open_entry_ref(device, inode, name, openMode, false);
9065 }
9066 
9067 
9068 int
9069 _user_open(int fd, const char* userPath, int openMode, int perms)
9070 {
9071 	KPath path(B_PATH_NAME_LENGTH + 1);
9072 	if (path.InitCheck() != B_OK)
9073 		return B_NO_MEMORY;
9074 
9075 	char* buffer = path.LockBuffer();
9076 
9077 	if (!IS_USER_ADDRESS(userPath))
9078 		return B_BAD_ADDRESS;
9079 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9080 	if (status != B_OK)
9081 		return status;
9082 
9083 	if ((openMode & O_CREAT) != 0)
9084 		return file_create(fd, buffer, openMode, perms, false);
9085 
9086 	return file_open(fd, buffer, openMode, false);
9087 }
9088 
9089 
9090 int
9091 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9092 {
9093 	if (userName != NULL) {
9094 		char name[B_FILE_NAME_LENGTH];
9095 
9096 		if (!IS_USER_ADDRESS(userName))
9097 			return B_BAD_ADDRESS;
9098 		status_t status = user_copy_name(name, userName, sizeof(name));
9099 		if (status != B_OK)
9100 			return status;
9101 
9102 		return dir_open_entry_ref(device, inode, name, false);
9103 	}
9104 	return dir_open_entry_ref(device, inode, NULL, false);
9105 }
9106 
9107 
9108 int
9109 _user_open_dir(int fd, const char* userPath)
9110 {
9111 	if (userPath == NULL)
9112 		return dir_open(fd, NULL, false);
9113 
9114 	KPath path(B_PATH_NAME_LENGTH + 1);
9115 	if (path.InitCheck() != B_OK)
9116 		return B_NO_MEMORY;
9117 
9118 	char* buffer = path.LockBuffer();
9119 
9120 	if (!IS_USER_ADDRESS(userPath))
9121 		return B_BAD_ADDRESS;
9122 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9123 	if (status != B_OK)
9124 		return status;
9125 
9126 	return dir_open(fd, buffer, false);
9127 }
9128 
9129 
9130 /*!	\brief Opens a directory's parent directory and returns the entry name
9131 		   of the former.
9132 
9133 	Aside from that it returns the directory's entry name, this method is
9134 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9135 	equivalent, if \a userName is \c NULL.
9136 
9137 	If a name buffer is supplied and the name does not fit the buffer, the
9138 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9139 
9140 	\param fd A FD referring to a directory.
9141 	\param userName Buffer the directory's entry name shall be written into.
9142 		   May be \c NULL.
9143 	\param nameLength Size of the name buffer.
9144 	\return The file descriptor of the opened parent directory, if everything
9145 			went fine, an error code otherwise.
9146 */
9147 int
9148 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9149 {
9150 	bool kernel = false;
9151 
9152 	if (userName && !IS_USER_ADDRESS(userName))
9153 		return B_BAD_ADDRESS;
9154 
9155 	// open the parent dir
9156 	int parentFD = dir_open(fd, (char*)"..", kernel);
9157 	if (parentFD < 0)
9158 		return parentFD;
9159 	FDCloser fdCloser(parentFD, kernel);
9160 
9161 	if (userName) {
9162 		// get the vnodes
9163 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9164 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9165 		VNodePutter parentVNodePutter(parentVNode);
9166 		VNodePutter dirVNodePutter(dirVNode);
9167 		if (!parentVNode || !dirVNode)
9168 			return B_FILE_ERROR;
9169 
9170 		// get the vnode name
9171 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9172 		struct dirent* buffer = (struct dirent*)_buffer;
9173 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9174 			sizeof(_buffer), get_current_io_context(false));
9175 		if (status != B_OK)
9176 			return status;
9177 
9178 		// copy the name to the userland buffer
9179 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9180 		if (len < 0)
9181 			return len;
9182 		if (len >= (int)nameLength)
9183 			return B_BUFFER_OVERFLOW;
9184 	}
9185 
9186 	return fdCloser.Detach();
9187 }
9188 
9189 
9190 status_t
9191 _user_fcntl(int fd, int op, size_t argument)
9192 {
9193 	status_t status = common_fcntl(fd, op, argument, false);
9194 	if (op == F_SETLKW)
9195 		syscall_restart_handle_post(status);
9196 
9197 	return status;
9198 }
9199 
9200 
9201 status_t
9202 _user_fsync(int fd)
9203 {
9204 	return common_sync(fd, false);
9205 }
9206 
9207 
9208 status_t
9209 _user_flock(int fd, int operation)
9210 {
9211 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9212 
9213 	// Check if the operation is valid
9214 	switch (operation & ~LOCK_NB) {
9215 		case LOCK_UN:
9216 		case LOCK_SH:
9217 		case LOCK_EX:
9218 			break;
9219 
9220 		default:
9221 			return B_BAD_VALUE;
9222 	}
9223 
9224 	struct file_descriptor* descriptor;
9225 	struct vnode* vnode;
9226 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9227 	if (descriptor == NULL)
9228 		return B_FILE_ERROR;
9229 
9230 	if (descriptor->type != FDTYPE_FILE) {
9231 		put_fd(descriptor);
9232 		return B_BAD_VALUE;
9233 	}
9234 
9235 	struct flock flock;
9236 	flock.l_start = 0;
9237 	flock.l_len = OFF_MAX;
9238 	flock.l_whence = 0;
9239 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9240 
9241 	status_t status;
9242 	if ((operation & LOCK_UN) != 0) {
9243 		if (HAS_FS_CALL(vnode, release_lock))
9244 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9245 		else
9246 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9247 	} else {
9248 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9249 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9250 				(operation & LOCK_NB) == 0);
9251 		} else {
9252 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9253 				(operation & LOCK_NB) == 0);
9254 		}
9255 	}
9256 
9257 	syscall_restart_handle_post(status);
9258 
9259 	put_fd(descriptor);
9260 	return status;
9261 }
9262 
9263 
9264 status_t
9265 _user_lock_node(int fd)
9266 {
9267 	return common_lock_node(fd, false);
9268 }
9269 
9270 
9271 status_t
9272 _user_unlock_node(int fd)
9273 {
9274 	return common_unlock_node(fd, false);
9275 }
9276 
9277 
9278 status_t
9279 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9280 	int perms)
9281 {
9282 	char name[B_FILE_NAME_LENGTH];
9283 	status_t status;
9284 
9285 	if (!IS_USER_ADDRESS(userName))
9286 		return B_BAD_ADDRESS;
9287 
9288 	status = user_copy_name(name, userName, sizeof(name));
9289 	if (status != B_OK)
9290 		return status;
9291 
9292 	return dir_create_entry_ref(device, inode, name, perms, false);
9293 }
9294 
9295 
9296 status_t
9297 _user_create_dir(int fd, const char* userPath, int perms)
9298 {
9299 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9300 	if (pathBuffer.InitCheck() != B_OK)
9301 		return B_NO_MEMORY;
9302 
9303 	char* path = pathBuffer.LockBuffer();
9304 
9305 	if (!IS_USER_ADDRESS(userPath))
9306 		return B_BAD_ADDRESS;
9307 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9308 	if (status != B_OK)
9309 		return status;
9310 
9311 	return dir_create(fd, path, perms, false);
9312 }
9313 
9314 
9315 status_t
9316 _user_remove_dir(int fd, const char* userPath)
9317 {
9318 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9319 	if (pathBuffer.InitCheck() != B_OK)
9320 		return B_NO_MEMORY;
9321 
9322 	char* path = pathBuffer.LockBuffer();
9323 
9324 	if (userPath != NULL) {
9325 		if (!IS_USER_ADDRESS(userPath))
9326 			return B_BAD_ADDRESS;
9327 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9328 		if (status != B_OK)
9329 			return status;
9330 	}
9331 
9332 	return dir_remove(fd, userPath ? path : NULL, false);
9333 }
9334 
9335 
9336 status_t
9337 _user_read_link(int fd, const char* userPath, char* userBuffer,
9338 	size_t* userBufferSize)
9339 {
9340 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9341 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9342 		return B_NO_MEMORY;
9343 
9344 	size_t bufferSize;
9345 
9346 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9347 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9348 		return B_BAD_ADDRESS;
9349 
9350 	char* path = pathBuffer.LockBuffer();
9351 	char* buffer = linkBuffer.LockBuffer();
9352 
9353 	if (userPath) {
9354 		if (!IS_USER_ADDRESS(userPath))
9355 			return B_BAD_ADDRESS;
9356 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9357 		if (status != B_OK)
9358 			return status;
9359 
9360 		if (bufferSize > B_PATH_NAME_LENGTH)
9361 			bufferSize = B_PATH_NAME_LENGTH;
9362 	}
9363 
9364 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9365 		&bufferSize, false);
9366 
9367 	// we also update the bufferSize in case of errors
9368 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9369 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9370 		return B_BAD_ADDRESS;
9371 
9372 	if (status != B_OK)
9373 		return status;
9374 
9375 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9376 		return B_BAD_ADDRESS;
9377 
9378 	return B_OK;
9379 }
9380 
9381 
9382 status_t
9383 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9384 	int mode)
9385 {
9386 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9387 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9388 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9389 		return B_NO_MEMORY;
9390 
9391 	char* path = pathBuffer.LockBuffer();
9392 	char* toPath = toPathBuffer.LockBuffer();
9393 
9394 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9395 		return B_BAD_ADDRESS;
9396 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9397 	if (status != B_OK)
9398 		return status;
9399 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9400 	if (status != B_OK)
9401 		return status;
9402 
9403 	return common_create_symlink(fd, path, toPath, mode, false);
9404 }
9405 
9406 
9407 status_t
9408 _user_create_link(int pathFD, const char* userPath, int toFD,
9409 	const char* userToPath, bool traverseLeafLink)
9410 {
9411 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9412 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9413 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9414 		return B_NO_MEMORY;
9415 
9416 	char* path = pathBuffer.LockBuffer();
9417 	char* toPath = toPathBuffer.LockBuffer();
9418 
9419 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9420 		return B_BAD_ADDRESS;
9421 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9422 	if (status != B_OK)
9423 		return status;
9424 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9425 	if (status != B_OK)
9426 		return status;
9427 
9428 	status = check_path(toPath);
9429 	if (status != B_OK)
9430 		return status;
9431 
9432 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9433 		false);
9434 }
9435 
9436 
9437 status_t
9438 _user_unlink(int fd, const char* userPath)
9439 {
9440 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9441 	if (pathBuffer.InitCheck() != B_OK)
9442 		return B_NO_MEMORY;
9443 
9444 	char* path = pathBuffer.LockBuffer();
9445 
9446 	if (!IS_USER_ADDRESS(userPath))
9447 		return B_BAD_ADDRESS;
9448 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9449 	if (status != B_OK)
9450 		return status;
9451 
9452 	return common_unlink(fd, path, false);
9453 }
9454 
9455 
9456 status_t
9457 _user_rename(int oldFD, const char* userOldPath, int newFD,
9458 	const char* userNewPath)
9459 {
9460 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9461 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9462 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9463 		return B_NO_MEMORY;
9464 
9465 	char* oldPath = oldPathBuffer.LockBuffer();
9466 	char* newPath = newPathBuffer.LockBuffer();
9467 
9468 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9469 		return B_BAD_ADDRESS;
9470 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9471 	if (status != B_OK)
9472 		return status;
9473 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9474 	if (status != B_OK)
9475 		return status;
9476 
9477 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9478 }
9479 
9480 
9481 status_t
9482 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9483 {
9484 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9485 	if (pathBuffer.InitCheck() != B_OK)
9486 		return B_NO_MEMORY;
9487 
9488 	char* path = pathBuffer.LockBuffer();
9489 
9490 	if (!IS_USER_ADDRESS(userPath))
9491 		return B_BAD_ADDRESS;
9492 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9493 	if (status != B_OK)
9494 		return status;
9495 
9496 	// split into directory vnode and filename path
9497 	char filename[B_FILE_NAME_LENGTH];
9498 	struct vnode* dir;
9499 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9500 	if (status != B_OK)
9501 		return status;
9502 
9503 	VNodePutter _(dir);
9504 
9505 	// the underlying FS needs to support creating FIFOs
9506 	if (!HAS_FS_CALL(dir, create_special_node))
9507 		return B_UNSUPPORTED;
9508 
9509 	// create the entry	-- the FIFO sub node is set up automatically
9510 	fs_vnode superVnode;
9511 	ino_t nodeID;
9512 	status = FS_CALL(dir, create_special_node, filename, NULL,
9513 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9514 
9515 	// create_special_node() acquired a reference for us that we don't need.
9516 	if (status == B_OK)
9517 		put_vnode(dir->mount->volume, nodeID);
9518 
9519 	return status;
9520 }
9521 
9522 
9523 status_t
9524 _user_create_pipe(int* userFDs)
9525 {
9526 	// rootfs should support creating FIFOs, but let's be sure
9527 	if (!HAS_FS_CALL(sRoot, create_special_node))
9528 		return B_UNSUPPORTED;
9529 
9530 	// create the node	-- the FIFO sub node is set up automatically
9531 	fs_vnode superVnode;
9532 	ino_t nodeID;
9533 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9534 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9535 	if (status != B_OK)
9536 		return status;
9537 
9538 	// We've got one reference to the node and need another one.
9539 	struct vnode* vnode;
9540 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9541 	if (status != B_OK) {
9542 		// that should not happen
9543 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9544 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9545 		return status;
9546 	}
9547 
9548 	// Everything looks good so far. Open two FDs for reading respectively
9549 	// writing.
9550 	int fds[2];
9551 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9552 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9553 
9554 	FDCloser closer0(fds[0], false);
9555 	FDCloser closer1(fds[1], false);
9556 
9557 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9558 
9559 	// copy FDs to userland
9560 	if (status == B_OK) {
9561 		if (!IS_USER_ADDRESS(userFDs)
9562 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9563 			status = B_BAD_ADDRESS;
9564 		}
9565 	}
9566 
9567 	// keep FDs, if everything went fine
9568 	if (status == B_OK) {
9569 		closer0.Detach();
9570 		closer1.Detach();
9571 	}
9572 
9573 	return status;
9574 }
9575 
9576 
9577 status_t
9578 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9579 {
9580 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9581 	if (pathBuffer.InitCheck() != B_OK)
9582 		return B_NO_MEMORY;
9583 
9584 	char* path = pathBuffer.LockBuffer();
9585 
9586 	if (!IS_USER_ADDRESS(userPath))
9587 		return B_BAD_ADDRESS;
9588 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9589 	if (status != B_OK)
9590 		return status;
9591 
9592 	return common_access(fd, path, mode, effectiveUserGroup, false);
9593 }
9594 
9595 
9596 status_t
9597 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9598 	struct stat* userStat, size_t statSize)
9599 {
9600 	struct stat stat;
9601 	status_t status;
9602 
9603 	if (statSize > sizeof(struct stat))
9604 		return B_BAD_VALUE;
9605 
9606 	if (!IS_USER_ADDRESS(userStat))
9607 		return B_BAD_ADDRESS;
9608 
9609 	if (userPath != NULL) {
9610 		// path given: get the stat of the node referred to by (fd, path)
9611 		if (!IS_USER_ADDRESS(userPath))
9612 			return B_BAD_ADDRESS;
9613 
9614 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9615 		if (pathBuffer.InitCheck() != B_OK)
9616 			return B_NO_MEMORY;
9617 
9618 		char* path = pathBuffer.LockBuffer();
9619 
9620 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9621 		if (status != B_OK)
9622 			return status;
9623 
9624 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9625 	} else {
9626 		// no path given: get the FD and use the FD operation
9627 		struct file_descriptor* descriptor
9628 			= get_fd(get_current_io_context(false), fd);
9629 		if (descriptor == NULL)
9630 			return B_FILE_ERROR;
9631 
9632 		if (descriptor->ops->fd_read_stat)
9633 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9634 		else
9635 			status = B_UNSUPPORTED;
9636 
9637 		put_fd(descriptor);
9638 	}
9639 
9640 	if (status != B_OK)
9641 		return status;
9642 
9643 	return user_memcpy(userStat, &stat, statSize);
9644 }
9645 
9646 
9647 status_t
9648 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9649 	const struct stat* userStat, size_t statSize, int statMask)
9650 {
9651 	if (statSize > sizeof(struct stat))
9652 		return B_BAD_VALUE;
9653 
9654 	struct stat stat;
9655 
9656 	if (!IS_USER_ADDRESS(userStat)
9657 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9658 		return B_BAD_ADDRESS;
9659 
9660 	// clear additional stat fields
9661 	if (statSize < sizeof(struct stat))
9662 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9663 
9664 	status_t status;
9665 
9666 	if (userPath != NULL) {
9667 		// path given: write the stat of the node referred to by (fd, path)
9668 		if (!IS_USER_ADDRESS(userPath))
9669 			return B_BAD_ADDRESS;
9670 
9671 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9672 		if (pathBuffer.InitCheck() != B_OK)
9673 			return B_NO_MEMORY;
9674 
9675 		char* path = pathBuffer.LockBuffer();
9676 
9677 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9678 		if (status != B_OK)
9679 			return status;
9680 
9681 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9682 			statMask, false);
9683 	} else {
9684 		// no path given: get the FD and use the FD operation
9685 		struct file_descriptor* descriptor
9686 			= get_fd(get_current_io_context(false), fd);
9687 		if (descriptor == NULL)
9688 			return B_FILE_ERROR;
9689 
9690 		if (descriptor->ops->fd_write_stat) {
9691 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9692 				statMask);
9693 		} else
9694 			status = B_UNSUPPORTED;
9695 
9696 		put_fd(descriptor);
9697 	}
9698 
9699 	return status;
9700 }
9701 
9702 
9703 int
9704 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9705 {
9706 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9707 	if (pathBuffer.InitCheck() != B_OK)
9708 		return B_NO_MEMORY;
9709 
9710 	char* path = pathBuffer.LockBuffer();
9711 
9712 	if (userPath != NULL) {
9713 		if (!IS_USER_ADDRESS(userPath))
9714 			return B_BAD_ADDRESS;
9715 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9716 		if (status != B_OK)
9717 			return status;
9718 	}
9719 
9720 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9721 }
9722 
9723 
9724 ssize_t
9725 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9726 	size_t readBytes)
9727 {
9728 	char attribute[B_FILE_NAME_LENGTH];
9729 
9730 	if (userAttribute == NULL)
9731 		return B_BAD_VALUE;
9732 	if (!IS_USER_ADDRESS(userAttribute))
9733 		return B_BAD_ADDRESS;
9734 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9735 	if (status != B_OK)
9736 		return status;
9737 
9738 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9739 	if (attr < 0)
9740 		return attr;
9741 
9742 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9743 	_user_close(attr);
9744 
9745 	return bytes;
9746 }
9747 
9748 
9749 ssize_t
9750 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9751 	const void* buffer, size_t writeBytes)
9752 {
9753 	char attribute[B_FILE_NAME_LENGTH];
9754 
9755 	if (userAttribute == NULL)
9756 		return B_BAD_VALUE;
9757 	if (!IS_USER_ADDRESS(userAttribute))
9758 		return B_BAD_ADDRESS;
9759 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9760 	if (status != B_OK)
9761 		return status;
9762 
9763 	// Try to support the BeOS typical truncation as well as the position
9764 	// argument
9765 	int attr = attr_create(fd, NULL, attribute, type,
9766 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9767 	if (attr < 0)
9768 		return attr;
9769 
9770 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9771 	_user_close(attr);
9772 
9773 	return bytes;
9774 }
9775 
9776 
9777 status_t
9778 _user_stat_attr(int fd, const char* userAttribute,
9779 	struct attr_info* userAttrInfo)
9780 {
9781 	char attribute[B_FILE_NAME_LENGTH];
9782 
9783 	if (userAttribute == NULL || userAttrInfo == NULL)
9784 		return B_BAD_VALUE;
9785 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9786 		return B_BAD_ADDRESS;
9787 	status_t status = user_copy_name(attribute, userAttribute,
9788 		sizeof(attribute));
9789 	if (status != B_OK)
9790 		return status;
9791 
9792 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9793 	if (attr < 0)
9794 		return attr;
9795 
9796 	struct file_descriptor* descriptor
9797 		= get_fd(get_current_io_context(false), attr);
9798 	if (descriptor == NULL) {
9799 		_user_close(attr);
9800 		return B_FILE_ERROR;
9801 	}
9802 
9803 	struct stat stat;
9804 	if (descriptor->ops->fd_read_stat)
9805 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9806 	else
9807 		status = B_UNSUPPORTED;
9808 
9809 	put_fd(descriptor);
9810 	_user_close(attr);
9811 
9812 	if (status == B_OK) {
9813 		attr_info info;
9814 		info.type = stat.st_type;
9815 		info.size = stat.st_size;
9816 
9817 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9818 			return B_BAD_ADDRESS;
9819 	}
9820 
9821 	return status;
9822 }
9823 
9824 
9825 int
9826 _user_open_attr(int fd, const char* userPath, const char* userName,
9827 	uint32 type, int openMode)
9828 {
9829 	char name[B_FILE_NAME_LENGTH];
9830 
9831 	if (!IS_USER_ADDRESS(userName))
9832 		return B_BAD_ADDRESS;
9833 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9834 	if (status != B_OK)
9835 		return status;
9836 
9837 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9838 	if (pathBuffer.InitCheck() != B_OK)
9839 		return B_NO_MEMORY;
9840 
9841 	char* path = pathBuffer.LockBuffer();
9842 
9843 	if (userPath != NULL) {
9844 		if (!IS_USER_ADDRESS(userPath))
9845 			return B_BAD_ADDRESS;
9846 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9847 		if (status != B_OK)
9848 			return status;
9849 	}
9850 
9851 	if ((openMode & O_CREAT) != 0) {
9852 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9853 			false);
9854 	}
9855 
9856 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9857 }
9858 
9859 
9860 status_t
9861 _user_remove_attr(int fd, const char* userName)
9862 {
9863 	char name[B_FILE_NAME_LENGTH];
9864 
9865 	if (!IS_USER_ADDRESS(userName))
9866 		return B_BAD_ADDRESS;
9867 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9868 	if (status != B_OK)
9869 		return status;
9870 
9871 	return attr_remove(fd, name, false);
9872 }
9873 
9874 
9875 status_t
9876 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9877 	const char* userToName)
9878 {
9879 	if (!IS_USER_ADDRESS(userFromName)
9880 		|| !IS_USER_ADDRESS(userToName))
9881 		return B_BAD_ADDRESS;
9882 
9883 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9884 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9885 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9886 		return B_NO_MEMORY;
9887 
9888 	char* fromName = fromNameBuffer.LockBuffer();
9889 	char* toName = toNameBuffer.LockBuffer();
9890 
9891 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9892 	if (status != B_OK)
9893 		return status;
9894 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9895 	if (status != B_OK)
9896 		return status;
9897 
9898 	return attr_rename(fromFile, fromName, toFile, toName, false);
9899 }
9900 
9901 
9902 int
9903 _user_open_index_dir(dev_t device)
9904 {
9905 	return index_dir_open(device, false);
9906 }
9907 
9908 
9909 status_t
9910 _user_create_index(dev_t device, const char* userName, uint32 type,
9911 	uint32 flags)
9912 {
9913 	char name[B_FILE_NAME_LENGTH];
9914 
9915 	if (!IS_USER_ADDRESS(userName))
9916 		return B_BAD_ADDRESS;
9917 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9918 	if (status != B_OK)
9919 		return status;
9920 
9921 	return index_create(device, name, type, flags, false);
9922 }
9923 
9924 
9925 status_t
9926 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9927 {
9928 	char name[B_FILE_NAME_LENGTH];
9929 	struct stat stat;
9930 	status_t status;
9931 
9932 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9933 		return B_BAD_ADDRESS;
9934 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9935 	if (status != B_OK)
9936 		return status;
9937 
9938 	status = index_name_read_stat(device, name, &stat, false);
9939 	if (status == B_OK) {
9940 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9941 			return B_BAD_ADDRESS;
9942 	}
9943 
9944 	return status;
9945 }
9946 
9947 
9948 status_t
9949 _user_remove_index(dev_t device, const char* userName)
9950 {
9951 	char name[B_FILE_NAME_LENGTH];
9952 
9953 	if (!IS_USER_ADDRESS(userName))
9954 		return B_BAD_ADDRESS;
9955 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9956 	if (status != B_OK)
9957 		return status;
9958 
9959 	return index_remove(device, name, false);
9960 }
9961 
9962 
9963 status_t
9964 _user_getcwd(char* userBuffer, size_t size)
9965 {
9966 	if (size == 0)
9967 		return B_BAD_VALUE;
9968 	if (!IS_USER_ADDRESS(userBuffer))
9969 		return B_BAD_ADDRESS;
9970 
9971 	if (size > kMaxPathLength)
9972 		size = kMaxPathLength;
9973 
9974 	KPath pathBuffer(size);
9975 	if (pathBuffer.InitCheck() != B_OK)
9976 		return B_NO_MEMORY;
9977 
9978 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9979 
9980 	char* path = pathBuffer.LockBuffer();
9981 
9982 	status_t status = get_cwd(path, size, false);
9983 	if (status != B_OK)
9984 		return status;
9985 
9986 	// Copy back the result
9987 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9988 		return B_BAD_ADDRESS;
9989 
9990 	return status;
9991 }
9992 
9993 
9994 status_t
9995 _user_setcwd(int fd, const char* userPath)
9996 {
9997 	TRACE(("user_setcwd: path = %p\n", userPath));
9998 
9999 	KPath pathBuffer(B_PATH_NAME_LENGTH);
10000 	if (pathBuffer.InitCheck() != B_OK)
10001 		return B_NO_MEMORY;
10002 
10003 	char* path = pathBuffer.LockBuffer();
10004 
10005 	if (userPath != NULL) {
10006 		if (!IS_USER_ADDRESS(userPath))
10007 			return B_BAD_ADDRESS;
10008 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10009 		if (status != B_OK)
10010 			return status;
10011 	}
10012 
10013 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10014 }
10015 
10016 
10017 status_t
10018 _user_change_root(const char* userPath)
10019 {
10020 	// only root is allowed to chroot()
10021 	if (geteuid() != 0)
10022 		return B_NOT_ALLOWED;
10023 
10024 	// alloc path buffer
10025 	KPath pathBuffer(B_PATH_NAME_LENGTH);
10026 	if (pathBuffer.InitCheck() != B_OK)
10027 		return B_NO_MEMORY;
10028 
10029 	// copy userland path to kernel
10030 	char* path = pathBuffer.LockBuffer();
10031 	if (userPath != NULL) {
10032 		if (!IS_USER_ADDRESS(userPath))
10033 			return B_BAD_ADDRESS;
10034 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10035 		if (status != B_OK)
10036 			return status;
10037 	}
10038 
10039 	// get the vnode
10040 	struct vnode* vnode;
10041 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
10042 	if (status != B_OK)
10043 		return status;
10044 
10045 	// set the new root
10046 	struct io_context* context = get_current_io_context(false);
10047 	mutex_lock(&sIOContextRootLock);
10048 	struct vnode* oldRoot = context->root;
10049 	context->root = vnode;
10050 	mutex_unlock(&sIOContextRootLock);
10051 
10052 	put_vnode(oldRoot);
10053 
10054 	return B_OK;
10055 }
10056 
10057 
10058 int
10059 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10060 	uint32 flags, port_id port, int32 token)
10061 {
10062 	char* query;
10063 
10064 	if (device < 0 || userQuery == NULL || queryLength == 0)
10065 		return B_BAD_VALUE;
10066 
10067 	if (!IS_USER_ADDRESS(userQuery))
10068 		return B_BAD_ADDRESS;
10069 
10070 	// this is a safety restriction
10071 	if (queryLength >= 65536)
10072 		return B_NAME_TOO_LONG;
10073 
10074 	query = (char*)malloc(queryLength + 1);
10075 	if (query == NULL)
10076 		return B_NO_MEMORY;
10077 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
10078 		free(query);
10079 		return B_BAD_ADDRESS;
10080 	}
10081 
10082 	int fd = query_open(device, query, flags, port, token, false);
10083 
10084 	free(query);
10085 	return fd;
10086 }
10087 
10088 
10089 #include "vfs_request_io.cpp"
10090