xref: /haiku/src/system/kernel/fs/vfs.cpp (revision f7c507c3a6fbf3a44c59500543926a9088724968)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <vfs.h>
53 #include <vm/vm.h>
54 #include <vm/VMCache.h>
55 #include <wait_for_objects.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 typedef DoublyLinkedList<vnode> VnodeList;
116 
117 /*!	\brief Structure to manage a mounted file system
118 
119 	Note: The root_vnode and root_vnode->covers fields (what others?) are
120 	initialized in fs_mount() and not changed afterwards. That is as soon
121 	as the mount is mounted and it is made sure it won't be unmounted
122 	(e.g. by holding a reference to a vnode of that mount) (read) access
123 	to those fields is always safe, even without additional locking. Morever
124 	while mounted the mount holds a reference to the root_vnode->covers vnode,
125 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
126 	safe if a reference to vnode is held (note that for the root mount
127 	root_vnode->covers is NULL, though).
128 */
129 struct fs_mount {
130 	fs_mount()
131 		:
132 		volume(NULL),
133 		device_name(NULL)
134 	{
135 		recursive_lock_init(&rlock, "mount rlock");
136 	}
137 
138 	~fs_mount()
139 	{
140 		recursive_lock_destroy(&rlock);
141 		free(device_name);
142 
143 		while (volume) {
144 			fs_volume* superVolume = volume->super_volume;
145 
146 			if (volume->file_system != NULL)
147 				put_module(volume->file_system->info.name);
148 
149 			free(volume->file_system_name);
150 			free(volume);
151 			volume = superVolume;
152 		}
153 	}
154 
155 	struct fs_mount* next;
156 	dev_t			id;
157 	fs_volume*		volume;
158 	char*			device_name;
159 	recursive_lock	rlock;	// guards the vnodes list
160 		// TODO: Make this a mutex! It is never used recursively.
161 	struct vnode*	root_vnode;
162 	struct vnode*	covers_vnode;	// immutable
163 	KPartition*		partition;
164 	VnodeList		vnodes;
165 	EntryCache		entry_cache;
166 	bool			unmounting;
167 	bool			owns_file_device;
168 };
169 
170 
171 namespace {
172 
173 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
174 	list_link		link;
175 	void*			bound_to;
176 	team_id			team;
177 	pid_t			session;
178 	off_t			start;
179 	off_t			end;
180 	bool			shared;
181 };
182 
183 typedef DoublyLinkedList<advisory_lock> LockList;
184 
185 } // namespace
186 
187 
188 struct advisory_locking {
189 	sem_id			lock;
190 	sem_id			wait_sem;
191 	LockList		locks;
192 
193 	advisory_locking()
194 		:
195 		lock(-1),
196 		wait_sem(-1)
197 	{
198 	}
199 
200 	~advisory_locking()
201 	{
202 		if (lock >= 0)
203 			delete_sem(lock);
204 		if (wait_sem >= 0)
205 			delete_sem(wait_sem);
206 	}
207 };
208 
209 /*!	\brief Guards sMountsTable.
210 
211 	The holder is allowed to read/write access the sMountsTable.
212 	Manipulation of the fs_mount structures themselves
213 	(and their destruction) requires different locks though.
214 */
215 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
216 
217 /*!	\brief Guards mount/unmount operations.
218 
219 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
220 	That is locking the lock ensures that no FS is mounted/unmounted. In
221 	particular this means that
222 	- sMountsTable will not be modified,
223 	- the fields immutable after initialization of the fs_mount structures in
224 	  sMountsTable will not be modified,
225 
226 	The thread trying to lock the lock must not hold sVnodeLock or
227 	sMountMutex.
228 */
229 static recursive_lock sMountOpLock;
230 
231 /*!	\brief Guards sVnodeTable.
232 
233 	The holder is allowed read/write access to sVnodeTable and to
234 	any unbusy vnode in that table, save to the immutable fields (device, id,
235 	private_node, mount) to which only read-only access is allowed.
236 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
237 	well as the busy, removed, unused flags, and the vnode's type can also be
238 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
239 	locked. Write access to covered_by and covers requires to write lock
240 	sVnodeLock.
241 
242 	The thread trying to acquire the lock must not hold sMountMutex.
243 	You must not hold this lock when calling create_sem(), as this might call
244 	vfs_free_unused_vnodes() and thus cause a deadlock.
245 */
246 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
247 
248 /*!	\brief Guards io_context::root.
249 
250 	Must be held when setting or getting the io_context::root field.
251 	The only operation allowed while holding this lock besides getting or
252 	setting the field is inc_vnode_ref_count() on io_context::root.
253 */
254 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
255 
256 
257 namespace {
258 
259 struct vnode_hash_key {
260 	dev_t	device;
261 	ino_t	vnode;
262 };
263 
264 struct VnodeHash {
265 	typedef vnode_hash_key	KeyType;
266 	typedef	struct vnode	ValueType;
267 
268 #define VHASH(mountid, vnodeid) \
269 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
270 
271 	size_t HashKey(KeyType key) const
272 	{
273 		return VHASH(key.device, key.vnode);
274 	}
275 
276 	size_t Hash(ValueType* vnode) const
277 	{
278 		return VHASH(vnode->device, vnode->id);
279 	}
280 
281 #undef VHASH
282 
283 	bool Compare(KeyType key, ValueType* vnode) const
284 	{
285 		return vnode->device == key.device && vnode->id == key.vnode;
286 	}
287 
288 	ValueType*& GetLink(ValueType* value) const
289 	{
290 		return value->next;
291 	}
292 };
293 
294 typedef BOpenHashTable<VnodeHash> VnodeTable;
295 
296 
297 struct MountHash {
298 	typedef dev_t			KeyType;
299 	typedef	struct fs_mount	ValueType;
300 
301 	size_t HashKey(KeyType key) const
302 	{
303 		return key;
304 	}
305 
306 	size_t Hash(ValueType* mount) const
307 	{
308 		return mount->id;
309 	}
310 
311 	bool Compare(KeyType key, ValueType* mount) const
312 	{
313 		return mount->id == key;
314 	}
315 
316 	ValueType*& GetLink(ValueType* value) const
317 	{
318 		return value->next;
319 	}
320 };
321 
322 typedef BOpenHashTable<MountHash> MountTable;
323 
324 } // namespace
325 
326 
327 #define VNODE_HASH_TABLE_SIZE 1024
328 static VnodeTable* sVnodeTable;
329 static struct vnode* sRoot;
330 
331 #define MOUNTS_HASH_TABLE_SIZE 16
332 static MountTable* sMountsTable;
333 static dev_t sNextMountID = 1;
334 
335 #define MAX_TEMP_IO_VECS 8
336 
337 // How long to wait for busy vnodes (10s)
338 #define BUSY_VNODE_RETRIES 2000
339 #define BUSY_VNODE_DELAY 5000
340 
341 mode_t __gUmask = 022;
342 
343 /* function declarations */
344 
345 static void free_unused_vnodes();
346 
347 // file descriptor operation prototypes
348 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
349 	void* buffer, size_t* _bytes);
350 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
351 	const void* buffer, size_t* _bytes);
352 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
353 	int seekType);
354 static void file_free_fd(struct file_descriptor* descriptor);
355 static status_t file_close(struct file_descriptor* descriptor);
356 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
357 	struct selectsync* sync);
358 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
359 	struct selectsync* sync);
360 static status_t dir_read(struct io_context* context,
361 	struct file_descriptor* descriptor, struct dirent* buffer,
362 	size_t bufferSize, uint32* _count);
363 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
364 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
365 static status_t dir_rewind(struct file_descriptor* descriptor);
366 static void dir_free_fd(struct file_descriptor* descriptor);
367 static status_t dir_close(struct file_descriptor* descriptor);
368 static status_t attr_dir_read(struct io_context* context,
369 	struct file_descriptor* descriptor, struct dirent* buffer,
370 	size_t bufferSize, uint32* _count);
371 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
372 static void attr_dir_free_fd(struct file_descriptor* descriptor);
373 static status_t attr_dir_close(struct file_descriptor* descriptor);
374 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
375 	void* buffer, size_t* _bytes);
376 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
377 	const void* buffer, size_t* _bytes);
378 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
379 	int seekType);
380 static void attr_free_fd(struct file_descriptor* descriptor);
381 static status_t attr_close(struct file_descriptor* descriptor);
382 static status_t attr_read_stat(struct file_descriptor* descriptor,
383 	struct stat* statData);
384 static status_t attr_write_stat(struct file_descriptor* descriptor,
385 	const struct stat* stat, int statMask);
386 static status_t index_dir_read(struct io_context* context,
387 	struct file_descriptor* descriptor, struct dirent* buffer,
388 	size_t bufferSize, uint32* _count);
389 static status_t index_dir_rewind(struct file_descriptor* descriptor);
390 static void index_dir_free_fd(struct file_descriptor* descriptor);
391 static status_t index_dir_close(struct file_descriptor* descriptor);
392 static status_t query_read(struct io_context* context,
393 	struct file_descriptor* descriptor, struct dirent* buffer,
394 	size_t bufferSize, uint32* _count);
395 static status_t query_rewind(struct file_descriptor* descriptor);
396 static void query_free_fd(struct file_descriptor* descriptor);
397 static status_t query_close(struct file_descriptor* descriptor);
398 
399 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
400 	void* buffer, size_t length);
401 static status_t common_read_stat(struct file_descriptor* descriptor,
402 	struct stat* statData);
403 static status_t common_write_stat(struct file_descriptor* descriptor,
404 	const struct stat* statData, int statMask);
405 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
406 	struct stat* stat, bool kernel);
407 
408 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
409 	bool traverseLeafLink, int count, bool kernel,
410 	struct vnode** _vnode, ino_t* _parentID);
411 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
412 	size_t bufferSize, bool kernel);
413 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
414 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
415 static void inc_vnode_ref_count(struct vnode* vnode);
416 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
417 	bool reenter);
418 static inline void put_vnode(struct vnode* vnode);
419 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
420 	bool kernel);
421 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
422 
423 
424 static struct fd_ops sFileOps = {
425 	file_read,
426 	file_write,
427 	file_seek,
428 	common_ioctl,
429 	NULL,		// set_flags
430 	file_select,
431 	file_deselect,
432 	NULL,		// read_dir()
433 	NULL,		// rewind_dir()
434 	common_read_stat,
435 	common_write_stat,
436 	file_close,
437 	file_free_fd
438 };
439 
440 static struct fd_ops sDirectoryOps = {
441 	NULL,		// read()
442 	NULL,		// write()
443 	NULL,		// seek()
444 	common_ioctl,
445 	NULL,		// set_flags
446 	NULL,		// select()
447 	NULL,		// deselect()
448 	dir_read,
449 	dir_rewind,
450 	common_read_stat,
451 	common_write_stat,
452 	dir_close,
453 	dir_free_fd
454 };
455 
456 static struct fd_ops sAttributeDirectoryOps = {
457 	NULL,		// read()
458 	NULL,		// write()
459 	NULL,		// seek()
460 	common_ioctl,
461 	NULL,		// set_flags
462 	NULL,		// select()
463 	NULL,		// deselect()
464 	attr_dir_read,
465 	attr_dir_rewind,
466 	common_read_stat,
467 	common_write_stat,
468 	attr_dir_close,
469 	attr_dir_free_fd
470 };
471 
472 static struct fd_ops sAttributeOps = {
473 	attr_read,
474 	attr_write,
475 	attr_seek,
476 	common_ioctl,
477 	NULL,		// set_flags
478 	NULL,		// select()
479 	NULL,		// deselect()
480 	NULL,		// read_dir()
481 	NULL,		// rewind_dir()
482 	attr_read_stat,
483 	attr_write_stat,
484 	attr_close,
485 	attr_free_fd
486 };
487 
488 static struct fd_ops sIndexDirectoryOps = {
489 	NULL,		// read()
490 	NULL,		// write()
491 	NULL,		// seek()
492 	NULL,		// ioctl()
493 	NULL,		// set_flags
494 	NULL,		// select()
495 	NULL,		// deselect()
496 	index_dir_read,
497 	index_dir_rewind,
498 	NULL,		// read_stat()
499 	NULL,		// write_stat()
500 	index_dir_close,
501 	index_dir_free_fd
502 };
503 
504 #if 0
505 static struct fd_ops sIndexOps = {
506 	NULL,		// read()
507 	NULL,		// write()
508 	NULL,		// seek()
509 	NULL,		// ioctl()
510 	NULL,		// set_flags
511 	NULL,		// select()
512 	NULL,		// deselect()
513 	NULL,		// dir_read()
514 	NULL,		// dir_rewind()
515 	index_read_stat,	// read_stat()
516 	NULL,		// write_stat()
517 	NULL,		// dir_close()
518 	NULL		// free_fd()
519 };
520 #endif
521 
522 static struct fd_ops sQueryOps = {
523 	NULL,		// read()
524 	NULL,		// write()
525 	NULL,		// seek()
526 	NULL,		// ioctl()
527 	NULL,		// set_flags
528 	NULL,		// select()
529 	NULL,		// deselect()
530 	query_read,
531 	query_rewind,
532 	NULL,		// read_stat()
533 	NULL,		// write_stat()
534 	query_close,
535 	query_free_fd
536 };
537 
538 
539 namespace {
540 
541 class VNodePutter {
542 public:
543 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
544 
545 	~VNodePutter()
546 	{
547 		Put();
548 	}
549 
550 	void SetTo(struct vnode* vnode)
551 	{
552 		Put();
553 		fVNode = vnode;
554 	}
555 
556 	void Put()
557 	{
558 		if (fVNode) {
559 			put_vnode(fVNode);
560 			fVNode = NULL;
561 		}
562 	}
563 
564 	struct vnode* Detach()
565 	{
566 		struct vnode* vnode = fVNode;
567 		fVNode = NULL;
568 		return vnode;
569 	}
570 
571 private:
572 	struct vnode* fVNode;
573 };
574 
575 
576 class FDCloser {
577 public:
578 	FDCloser() : fFD(-1), fKernel(true) {}
579 
580 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
581 
582 	~FDCloser()
583 	{
584 		Close();
585 	}
586 
587 	void SetTo(int fd, bool kernel)
588 	{
589 		Close();
590 		fFD = fd;
591 		fKernel = kernel;
592 	}
593 
594 	void Close()
595 	{
596 		if (fFD >= 0) {
597 			if (fKernel)
598 				_kern_close(fFD);
599 			else
600 				_user_close(fFD);
601 			fFD = -1;
602 		}
603 	}
604 
605 	int Detach()
606 	{
607 		int fd = fFD;
608 		fFD = -1;
609 		return fd;
610 	}
611 
612 private:
613 	int		fFD;
614 	bool	fKernel;
615 };
616 
617 } // namespace
618 
619 
620 #if VFS_PAGES_IO_TRACING
621 
622 namespace VFSPagesIOTracing {
623 
624 class PagesIOTraceEntry : public AbstractTraceEntry {
625 protected:
626 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
627 		const generic_io_vec* vecs, uint32 count, uint32 flags,
628 		generic_size_t bytesRequested, status_t status,
629 		generic_size_t bytesTransferred)
630 		:
631 		fVnode(vnode),
632 		fMountID(vnode->mount->id),
633 		fNodeID(vnode->id),
634 		fCookie(cookie),
635 		fPos(pos),
636 		fCount(count),
637 		fFlags(flags),
638 		fBytesRequested(bytesRequested),
639 		fStatus(status),
640 		fBytesTransferred(bytesTransferred)
641 	{
642 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
643 			sizeof(generic_io_vec) * count, false);
644 	}
645 
646 	void AddDump(TraceOutput& out, const char* mode)
647 	{
648 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
649 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
650 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
651 			(uint64)fBytesRequested);
652 
653 		if (fVecs != NULL) {
654 			for (uint32 i = 0; i < fCount; i++) {
655 				if (i > 0)
656 					out.Print(", ");
657 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
658 					(uint64)fVecs[i].length);
659 			}
660 		}
661 
662 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
663 			"transferred: %" B_PRIu64, fFlags, fStatus,
664 			(uint64)fBytesTransferred);
665 	}
666 
667 protected:
668 	struct vnode*	fVnode;
669 	dev_t			fMountID;
670 	ino_t			fNodeID;
671 	void*			fCookie;
672 	off_t			fPos;
673 	generic_io_vec*	fVecs;
674 	uint32			fCount;
675 	uint32			fFlags;
676 	generic_size_t	fBytesRequested;
677 	status_t		fStatus;
678 	generic_size_t	fBytesTransferred;
679 };
680 
681 
682 class ReadPages : public PagesIOTraceEntry {
683 public:
684 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
685 		const generic_io_vec* vecs, uint32 count, uint32 flags,
686 		generic_size_t bytesRequested, status_t status,
687 		generic_size_t bytesTransferred)
688 		:
689 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
690 			bytesRequested, status, bytesTransferred)
691 	{
692 		Initialized();
693 	}
694 
695 	virtual void AddDump(TraceOutput& out)
696 	{
697 		PagesIOTraceEntry::AddDump(out, "read");
698 	}
699 };
700 
701 
702 class WritePages : public PagesIOTraceEntry {
703 public:
704 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
705 		const generic_io_vec* vecs, uint32 count, uint32 flags,
706 		generic_size_t bytesRequested, status_t status,
707 		generic_size_t bytesTransferred)
708 		:
709 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
710 			bytesRequested, status, bytesTransferred)
711 	{
712 		Initialized();
713 	}
714 
715 	virtual void AddDump(TraceOutput& out)
716 	{
717 		PagesIOTraceEntry::AddDump(out, "write");
718 	}
719 };
720 
721 }	// namespace VFSPagesIOTracing
722 
723 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
724 #else
725 #	define TPIO(x) ;
726 #endif	// VFS_PAGES_IO_TRACING
727 
728 
729 /*! Finds the mounted device (the fs_mount structure) with the given ID.
730 	Note, you must hold the gMountMutex lock when you call this function.
731 */
732 static struct fs_mount*
733 find_mount(dev_t id)
734 {
735 	ASSERT_LOCKED_MUTEX(&sMountMutex);
736 
737 	return sMountsTable->Lookup(id);
738 }
739 
740 
741 static status_t
742 get_mount(dev_t id, struct fs_mount** _mount)
743 {
744 	struct fs_mount* mount;
745 
746 	ReadLocker nodeLocker(sVnodeLock);
747 	MutexLocker mountLocker(sMountMutex);
748 
749 	mount = find_mount(id);
750 	if (mount == NULL)
751 		return B_BAD_VALUE;
752 
753 	struct vnode* rootNode = mount->root_vnode;
754 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
755 		|| rootNode->ref_count == 0) {
756 		// might have been called during a mount/unmount operation
757 		return B_BUSY;
758 	}
759 
760 	inc_vnode_ref_count(rootNode);
761 	*_mount = mount;
762 	return B_OK;
763 }
764 
765 
766 static void
767 put_mount(struct fs_mount* mount)
768 {
769 	if (mount)
770 		put_vnode(mount->root_vnode);
771 }
772 
773 
774 /*!	Tries to open the specified file system module.
775 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
776 	Returns a pointer to file system module interface, or NULL if it
777 	could not open the module.
778 */
779 static file_system_module_info*
780 get_file_system(const char* fsName)
781 {
782 	char name[B_FILE_NAME_LENGTH];
783 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
784 		// construct module name if we didn't get one
785 		// (we currently support only one API)
786 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
787 		fsName = NULL;
788 	}
789 
790 	file_system_module_info* info;
791 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
792 		return NULL;
793 
794 	return info;
795 }
796 
797 
798 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
799 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
800 	The name is allocated for you, and you have to free() it when you're
801 	done with it.
802 	Returns NULL if the required memory is not available.
803 */
804 static char*
805 get_file_system_name(const char* fsName)
806 {
807 	const size_t length = strlen("file_systems/");
808 
809 	if (strncmp(fsName, "file_systems/", length)) {
810 		// the name already seems to be the module's file name
811 		return strdup(fsName);
812 	}
813 
814 	fsName += length;
815 	const char* end = strchr(fsName, '/');
816 	if (end == NULL) {
817 		// this doesn't seem to be a valid name, but well...
818 		return strdup(fsName);
819 	}
820 
821 	// cut off the trailing /v1
822 
823 	char* name = (char*)malloc(end + 1 - fsName);
824 	if (name == NULL)
825 		return NULL;
826 
827 	strlcpy(name, fsName, end + 1 - fsName);
828 	return name;
829 }
830 
831 
832 /*!	Accepts a list of file system names separated by a colon, one for each
833 	layer and returns the file system name for the specified layer.
834 	The name is allocated for you, and you have to free() it when you're
835 	done with it.
836 	Returns NULL if the required memory is not available or if there is no
837 	name for the specified layer.
838 */
839 static char*
840 get_file_system_name_for_layer(const char* fsNames, int32 layer)
841 {
842 	while (layer >= 0) {
843 		const char* end = strchr(fsNames, ':');
844 		if (end == NULL) {
845 			if (layer == 0)
846 				return strdup(fsNames);
847 			return NULL;
848 		}
849 
850 		if (layer == 0) {
851 			size_t length = end - fsNames + 1;
852 			char* result = (char*)malloc(length);
853 			strlcpy(result, fsNames, length);
854 			return result;
855 		}
856 
857 		fsNames = end + 1;
858 		layer--;
859 	}
860 
861 	return NULL;
862 }
863 
864 
865 static void
866 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
867 {
868 	RecursiveLocker _(mount->rlock);
869 	mount->vnodes.Add(vnode);
870 }
871 
872 
873 static void
874 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
875 {
876 	RecursiveLocker _(mount->rlock);
877 	mount->vnodes.Remove(vnode);
878 }
879 
880 
881 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
882 
883 	The caller must hold the sVnodeLock (read lock at least).
884 
885 	\param mountID the mount ID.
886 	\param vnodeID the node ID.
887 
888 	\return The vnode structure, if it was found in the hash table, \c NULL
889 			otherwise.
890 */
891 static struct vnode*
892 lookup_vnode(dev_t mountID, ino_t vnodeID)
893 {
894 	struct vnode_hash_key key;
895 
896 	key.device = mountID;
897 	key.vnode = vnodeID;
898 
899 	return sVnodeTable->Lookup(key);
900 }
901 
902 
903 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
904 
905 	This will also wait for BUSY_VNODE_DELAY before returning if one should
906 	still wait for the vnode becoming unbusy.
907 
908 	\return \c true if one should retry, \c false if not.
909 */
910 static bool
911 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
912 {
913 	if (--tries < 0) {
914 		// vnode doesn't seem to become unbusy
915 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
916 			" is not becoming unbusy!\n", mountID, vnodeID);
917 		return false;
918 	}
919 	snooze(BUSY_VNODE_DELAY);
920 	return true;
921 }
922 
923 
924 /*!	Creates a new vnode with the given mount and node ID.
925 	If the node already exists, it is returned instead and no new node is
926 	created. In either case -- but not, if an error occurs -- the function write
927 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
928 	error the lock is not held on return.
929 
930 	\param mountID The mount ID.
931 	\param vnodeID The vnode ID.
932 	\param _vnode Will be set to the new vnode on success.
933 	\param _nodeCreated Will be set to \c true when the returned vnode has
934 		been newly created, \c false when it already existed. Will not be
935 		changed on error.
936 	\return \c B_OK, when the vnode was successfully created and inserted or
937 		a node with the given ID was found, \c B_NO_MEMORY or
938 		\c B_ENTRY_NOT_FOUND on error.
939 */
940 static status_t
941 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
942 	bool& _nodeCreated)
943 {
944 	FUNCTION(("create_new_vnode_and_lock()\n"));
945 
946 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
947 	if (vnode == NULL)
948 		return B_NO_MEMORY;
949 
950 	// initialize basic values
951 	memset(vnode, 0, sizeof(struct vnode));
952 	vnode->device = mountID;
953 	vnode->id = vnodeID;
954 	vnode->ref_count = 1;
955 	vnode->SetBusy(true);
956 
957 	// look up the node -- it might have been added by someone else in the
958 	// meantime
959 	rw_lock_write_lock(&sVnodeLock);
960 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
961 	if (existingVnode != NULL) {
962 		free(vnode);
963 		_vnode = existingVnode;
964 		_nodeCreated = false;
965 		return B_OK;
966 	}
967 
968 	// get the mount structure
969 	mutex_lock(&sMountMutex);
970 	vnode->mount = find_mount(mountID);
971 	if (!vnode->mount || vnode->mount->unmounting) {
972 		mutex_unlock(&sMountMutex);
973 		rw_lock_write_unlock(&sVnodeLock);
974 		free(vnode);
975 		return B_ENTRY_NOT_FOUND;
976 	}
977 
978 	// add the vnode to the mount's node list and the hash table
979 	sVnodeTable->Insert(vnode);
980 	add_vnode_to_mount_list(vnode, vnode->mount);
981 
982 	mutex_unlock(&sMountMutex);
983 
984 	_vnode = vnode;
985 	_nodeCreated = true;
986 
987 	// keep the vnode lock locked
988 	return B_OK;
989 }
990 
991 
992 /*!	Frees the vnode and all resources it has acquired, and removes
993 	it from the vnode hash as well as from its mount structure.
994 	Will also make sure that any cache modifications are written back.
995 */
996 static void
997 free_vnode(struct vnode* vnode, bool reenter)
998 {
999 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1000 		vnode);
1001 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1002 
1003 	// write back any changes in this vnode's cache -- but only
1004 	// if the vnode won't be deleted, in which case the changes
1005 	// will be discarded
1006 
1007 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1008 		FS_CALL_NO_PARAMS(vnode, fsync);
1009 
1010 	// Note: If this vnode has a cache attached, there will still be two
1011 	// references to that cache at this point. The last one belongs to the vnode
1012 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1013 	// cache. Each but the last reference to a cache also includes a reference
1014 	// to the vnode. The file cache, however, released its reference (cf.
1015 	// file_cache_create()), so that this vnode's ref count has the chance to
1016 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1017 	// cache reference to be released, which will also release a (no longer
1018 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1019 	// count, so that it will neither become negative nor 0.
1020 	vnode->ref_count = 2;
1021 
1022 	if (!vnode->IsUnpublished()) {
1023 		if (vnode->IsRemoved())
1024 			FS_CALL(vnode, remove_vnode, reenter);
1025 		else
1026 			FS_CALL(vnode, put_vnode, reenter);
1027 	}
1028 
1029 	// If the vnode has a VMCache attached, make sure that it won't try to get
1030 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1031 	// long as the vnode is busy and in the hash, that won't happen, but as
1032 	// soon as we've removed it from the hash, it could reload the vnode -- with
1033 	// a new cache attached!
1034 	if (vnode->cache != NULL)
1035 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1036 
1037 	// The file system has removed the resources of the vnode now, so we can
1038 	// make it available again (by removing the busy vnode from the hash).
1039 	rw_lock_write_lock(&sVnodeLock);
1040 	sVnodeTable->Remove(vnode);
1041 	rw_lock_write_unlock(&sVnodeLock);
1042 
1043 	// if we have a VMCache attached, remove it
1044 	if (vnode->cache)
1045 		vnode->cache->ReleaseRef();
1046 
1047 	vnode->cache = NULL;
1048 
1049 	remove_vnode_from_mount_list(vnode, vnode->mount);
1050 
1051 	free(vnode);
1052 }
1053 
1054 
1055 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1056 	if the counter dropped to 0.
1057 
1058 	The caller must, of course, own a reference to the vnode to call this
1059 	function.
1060 	The caller must not hold the sVnodeLock or the sMountMutex.
1061 
1062 	\param vnode the vnode.
1063 	\param alwaysFree don't move this vnode into the unused list, but really
1064 		   delete it if possible.
1065 	\param reenter \c true, if this function is called (indirectly) from within
1066 		   a file system. This will be passed to file system hooks only.
1067 	\return \c B_OK, if everything went fine, an error code otherwise.
1068 */
1069 static status_t
1070 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1071 {
1072 	ReadLocker locker(sVnodeLock);
1073 	AutoLocker<Vnode> nodeLocker(vnode);
1074 
1075 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1076 
1077 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1078 
1079 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1080 		vnode->ref_count));
1081 
1082 	if (oldRefCount != 1)
1083 		return B_OK;
1084 
1085 	if (vnode->IsBusy())
1086 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1087 
1088 	bool freeNode = false;
1089 	bool freeUnusedNodes = false;
1090 
1091 	// Just insert the vnode into an unused list if we don't need
1092 	// to delete it
1093 	if (vnode->IsRemoved() || alwaysFree) {
1094 		vnode_to_be_freed(vnode);
1095 		vnode->SetBusy(true);
1096 		freeNode = true;
1097 	} else
1098 		freeUnusedNodes = vnode_unused(vnode);
1099 
1100 	nodeLocker.Unlock();
1101 	locker.Unlock();
1102 
1103 	if (freeNode)
1104 		free_vnode(vnode, reenter);
1105 	else if (freeUnusedNodes)
1106 		free_unused_vnodes();
1107 
1108 	return B_OK;
1109 }
1110 
1111 
1112 /*!	\brief Increments the reference counter of the given vnode.
1113 
1114 	The caller must make sure that the node isn't deleted while this function
1115 	is called. This can be done either:
1116 	- by ensuring that a reference to the node exists and remains in existence,
1117 	  or
1118 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1119 	  or by holding sVnodeLock write locked.
1120 
1121 	In the second case the caller is responsible for dealing with the ref count
1122 	0 -> 1 transition. That is 1. this function must not be invoked when the
1123 	node is busy in the first place and 2. vnode_used() must be called for the
1124 	node.
1125 
1126 	\param vnode the vnode.
1127 */
1128 static void
1129 inc_vnode_ref_count(struct vnode* vnode)
1130 {
1131 	atomic_add(&vnode->ref_count, 1);
1132 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1133 		vnode->ref_count));
1134 }
1135 
1136 
1137 static bool
1138 is_special_node_type(int type)
1139 {
1140 	// at the moment only FIFOs are supported
1141 	return S_ISFIFO(type);
1142 }
1143 
1144 
1145 static status_t
1146 create_special_sub_node(struct vnode* vnode, uint32 flags)
1147 {
1148 	if (S_ISFIFO(vnode->Type()))
1149 		return create_fifo_vnode(vnode->mount->volume, vnode);
1150 
1151 	return B_BAD_VALUE;
1152 }
1153 
1154 
1155 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1156 
1157 	If the node is not yet in memory, it will be loaded.
1158 
1159 	The caller must not hold the sVnodeLock or the sMountMutex.
1160 
1161 	\param mountID the mount ID.
1162 	\param vnodeID the node ID.
1163 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1164 		   retrieved vnode structure shall be written.
1165 	\param reenter \c true, if this function is called (indirectly) from within
1166 		   a file system.
1167 	\return \c B_OK, if everything when fine, an error code otherwise.
1168 */
1169 static status_t
1170 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1171 	int reenter)
1172 {
1173 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1174 		mountID, vnodeID, _vnode));
1175 
1176 	rw_lock_read_lock(&sVnodeLock);
1177 
1178 	int32 tries = BUSY_VNODE_RETRIES;
1179 restart:
1180 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1181 	AutoLocker<Vnode> nodeLocker(vnode);
1182 
1183 	if (vnode && vnode->IsBusy()) {
1184 		nodeLocker.Unlock();
1185 		rw_lock_read_unlock(&sVnodeLock);
1186 		if (!canWait) {
1187 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1188 				mountID, vnodeID);
1189 			return B_BUSY;
1190 		}
1191 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1192 			return B_BUSY;
1193 
1194 		rw_lock_read_lock(&sVnodeLock);
1195 		goto restart;
1196 	}
1197 
1198 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1199 
1200 	status_t status;
1201 
1202 	if (vnode) {
1203 		if (vnode->ref_count == 0) {
1204 			// this vnode has been unused before
1205 			vnode_used(vnode);
1206 		}
1207 		inc_vnode_ref_count(vnode);
1208 
1209 		nodeLocker.Unlock();
1210 		rw_lock_read_unlock(&sVnodeLock);
1211 	} else {
1212 		// we need to create a new vnode and read it in
1213 		rw_lock_read_unlock(&sVnodeLock);
1214 			// unlock -- create_new_vnode_and_lock() write-locks on success
1215 		bool nodeCreated;
1216 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1217 			nodeCreated);
1218 		if (status != B_OK)
1219 			return status;
1220 
1221 		if (!nodeCreated) {
1222 			rw_lock_read_lock(&sVnodeLock);
1223 			rw_lock_write_unlock(&sVnodeLock);
1224 			goto restart;
1225 		}
1226 
1227 		rw_lock_write_unlock(&sVnodeLock);
1228 
1229 		int type;
1230 		uint32 flags;
1231 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1232 			&flags, reenter);
1233 		if (status == B_OK && vnode->private_node == NULL)
1234 			status = B_BAD_VALUE;
1235 
1236 		bool gotNode = status == B_OK;
1237 		bool publishSpecialSubNode = false;
1238 		if (gotNode) {
1239 			vnode->SetType(type);
1240 			publishSpecialSubNode = is_special_node_type(type)
1241 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1242 		}
1243 
1244 		if (gotNode && publishSpecialSubNode)
1245 			status = create_special_sub_node(vnode, flags);
1246 
1247 		if (status != B_OK) {
1248 			if (gotNode)
1249 				FS_CALL(vnode, put_vnode, reenter);
1250 
1251 			rw_lock_write_lock(&sVnodeLock);
1252 			sVnodeTable->Remove(vnode);
1253 			remove_vnode_from_mount_list(vnode, vnode->mount);
1254 			rw_lock_write_unlock(&sVnodeLock);
1255 
1256 			free(vnode);
1257 			return status;
1258 		}
1259 
1260 		rw_lock_read_lock(&sVnodeLock);
1261 		vnode->Lock();
1262 
1263 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1264 		vnode->SetBusy(false);
1265 
1266 		vnode->Unlock();
1267 		rw_lock_read_unlock(&sVnodeLock);
1268 	}
1269 
1270 	TRACE(("get_vnode: returning %p\n", vnode));
1271 
1272 	*_vnode = vnode;
1273 	return B_OK;
1274 }
1275 
1276 
1277 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1278 	if the counter dropped to 0.
1279 
1280 	The caller must, of course, own a reference to the vnode to call this
1281 	function.
1282 	The caller must not hold the sVnodeLock or the sMountMutex.
1283 
1284 	\param vnode the vnode.
1285 */
1286 static inline void
1287 put_vnode(struct vnode* vnode)
1288 {
1289 	dec_vnode_ref_count(vnode, false, false);
1290 }
1291 
1292 
1293 static void
1294 free_unused_vnodes(int32 level)
1295 {
1296 	unused_vnodes_check_started();
1297 
1298 	if (level == B_NO_LOW_RESOURCE) {
1299 		unused_vnodes_check_done();
1300 		return;
1301 	}
1302 
1303 	flush_hot_vnodes();
1304 
1305 	// determine how many nodes to free
1306 	uint32 count = 1;
1307 	{
1308 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1309 
1310 		switch (level) {
1311 			case B_LOW_RESOURCE_NOTE:
1312 				count = sUnusedVnodes / 100;
1313 				break;
1314 			case B_LOW_RESOURCE_WARNING:
1315 				count = sUnusedVnodes / 10;
1316 				break;
1317 			case B_LOW_RESOURCE_CRITICAL:
1318 				count = sUnusedVnodes;
1319 				break;
1320 		}
1321 
1322 		if (count > sUnusedVnodes)
1323 			count = sUnusedVnodes;
1324 	}
1325 
1326 	// Write back the modified pages of some unused vnodes and free them.
1327 
1328 	for (uint32 i = 0; i < count; i++) {
1329 		ReadLocker vnodesReadLocker(sVnodeLock);
1330 
1331 		// get the first node
1332 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1333 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1334 			&sUnusedVnodeList);
1335 		unusedVnodesLocker.Unlock();
1336 
1337 		if (vnode == NULL)
1338 			break;
1339 
1340 		// lock the node
1341 		AutoLocker<Vnode> nodeLocker(vnode);
1342 
1343 		// Check whether the node is still unused -- since we only append to the
1344 		// tail of the unused queue, the vnode should still be at its head.
1345 		// Alternatively we could check its ref count for 0 and its busy flag,
1346 		// but if the node is no longer at the head of the queue, it means it
1347 		// has been touched in the meantime, i.e. it is no longer the least
1348 		// recently used unused vnode and we rather don't free it.
1349 		unusedVnodesLocker.Lock();
1350 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1351 			continue;
1352 		unusedVnodesLocker.Unlock();
1353 
1354 		ASSERT(!vnode->IsBusy());
1355 
1356 		// grab a reference
1357 		inc_vnode_ref_count(vnode);
1358 		vnode_used(vnode);
1359 
1360 		// write back changes and free the node
1361 		nodeLocker.Unlock();
1362 		vnodesReadLocker.Unlock();
1363 
1364 		if (vnode->cache != NULL)
1365 			vnode->cache->WriteModified();
1366 
1367 		dec_vnode_ref_count(vnode, true, false);
1368 			// this should free the vnode when it's still unused
1369 	}
1370 
1371 	unused_vnodes_check_done();
1372 }
1373 
1374 
1375 /*!	Gets the vnode the given vnode is covering.
1376 
1377 	The caller must have \c sVnodeLock read-locked at least.
1378 
1379 	The function returns a reference to the retrieved vnode (if any), the caller
1380 	is responsible to free.
1381 
1382 	\param vnode The vnode whose covered node shall be returned.
1383 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1384 		vnode.
1385 */
1386 static inline Vnode*
1387 get_covered_vnode_locked(Vnode* vnode)
1388 {
1389 	if (Vnode* coveredNode = vnode->covers) {
1390 		while (coveredNode->covers != NULL)
1391 			coveredNode = coveredNode->covers;
1392 
1393 		inc_vnode_ref_count(coveredNode);
1394 		return coveredNode;
1395 	}
1396 
1397 	return NULL;
1398 }
1399 
1400 
1401 /*!	Gets the vnode the given vnode is covering.
1402 
1403 	The caller must not hold \c sVnodeLock. Note that this implies a race
1404 	condition, since the situation can change at any time.
1405 
1406 	The function returns a reference to the retrieved vnode (if any), the caller
1407 	is responsible to free.
1408 
1409 	\param vnode The vnode whose covered node shall be returned.
1410 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1411 		vnode.
1412 */
1413 static inline Vnode*
1414 get_covered_vnode(Vnode* vnode)
1415 {
1416 	if (!vnode->IsCovering())
1417 		return NULL;
1418 
1419 	ReadLocker vnodeReadLocker(sVnodeLock);
1420 	return get_covered_vnode_locked(vnode);
1421 }
1422 
1423 
1424 /*!	Gets the vnode the given vnode is covered by.
1425 
1426 	The caller must have \c sVnodeLock read-locked at least.
1427 
1428 	The function returns a reference to the retrieved vnode (if any), the caller
1429 	is responsible to free.
1430 
1431 	\param vnode The vnode whose covering node shall be returned.
1432 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1433 		any vnode.
1434 */
1435 static Vnode*
1436 get_covering_vnode_locked(Vnode* vnode)
1437 {
1438 	if (Vnode* coveringNode = vnode->covered_by) {
1439 		while (coveringNode->covered_by != NULL)
1440 			coveringNode = coveringNode->covered_by;
1441 
1442 		inc_vnode_ref_count(coveringNode);
1443 		return coveringNode;
1444 	}
1445 
1446 	return NULL;
1447 }
1448 
1449 
1450 /*!	Gets the vnode the given vnode is covered by.
1451 
1452 	The caller must not hold \c sVnodeLock. Note that this implies a race
1453 	condition, since the situation can change at any time.
1454 
1455 	The function returns a reference to the retrieved vnode (if any), the caller
1456 	is responsible to free.
1457 
1458 	\param vnode The vnode whose covering node shall be returned.
1459 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1460 		any vnode.
1461 */
1462 static inline Vnode*
1463 get_covering_vnode(Vnode* vnode)
1464 {
1465 	if (!vnode->IsCovered())
1466 		return NULL;
1467 
1468 	ReadLocker vnodeReadLocker(sVnodeLock);
1469 	return get_covering_vnode_locked(vnode);
1470 }
1471 
1472 
1473 static void
1474 free_unused_vnodes()
1475 {
1476 	free_unused_vnodes(
1477 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1478 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1479 }
1480 
1481 
1482 static void
1483 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1484 {
1485 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1486 
1487 	free_unused_vnodes(level);
1488 }
1489 
1490 
1491 static inline void
1492 put_advisory_locking(struct advisory_locking* locking)
1493 {
1494 	release_sem(locking->lock);
1495 }
1496 
1497 
1498 /*!	Returns the advisory_locking object of the \a vnode in case it
1499 	has one, and locks it.
1500 	You have to call put_advisory_locking() when you're done with
1501 	it.
1502 	Note, you must not have the vnode mutex locked when calling
1503 	this function.
1504 */
1505 static struct advisory_locking*
1506 get_advisory_locking(struct vnode* vnode)
1507 {
1508 	rw_lock_read_lock(&sVnodeLock);
1509 	vnode->Lock();
1510 
1511 	struct advisory_locking* locking = vnode->advisory_locking;
1512 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1513 
1514 	vnode->Unlock();
1515 	rw_lock_read_unlock(&sVnodeLock);
1516 
1517 	if (lock >= 0)
1518 		lock = acquire_sem(lock);
1519 	if (lock < 0) {
1520 		// This means the locking has been deleted in the mean time
1521 		// or had never existed in the first place - otherwise, we
1522 		// would get the lock at some point.
1523 		return NULL;
1524 	}
1525 
1526 	return locking;
1527 }
1528 
1529 
1530 /*!	Creates a locked advisory_locking object, and attaches it to the
1531 	given \a vnode.
1532 	Returns B_OK in case of success - also if the vnode got such an
1533 	object from someone else in the mean time, you'll still get this
1534 	one locked then.
1535 */
1536 static status_t
1537 create_advisory_locking(struct vnode* vnode)
1538 {
1539 	if (vnode == NULL)
1540 		return B_FILE_ERROR;
1541 
1542 	ObjectDeleter<advisory_locking> lockingDeleter;
1543 	struct advisory_locking* locking = NULL;
1544 
1545 	while (get_advisory_locking(vnode) == NULL) {
1546 		// no locking object set on the vnode yet, create one
1547 		if (locking == NULL) {
1548 			locking = new(std::nothrow) advisory_locking;
1549 			if (locking == NULL)
1550 				return B_NO_MEMORY;
1551 			lockingDeleter.SetTo(locking);
1552 
1553 			locking->wait_sem = create_sem(0, "advisory lock");
1554 			if (locking->wait_sem < 0)
1555 				return locking->wait_sem;
1556 
1557 			locking->lock = create_sem(0, "advisory locking");
1558 			if (locking->lock < 0)
1559 				return locking->lock;
1560 		}
1561 
1562 		// set our newly created locking object
1563 		ReadLocker _(sVnodeLock);
1564 		AutoLocker<Vnode> nodeLocker(vnode);
1565 		if (vnode->advisory_locking == NULL) {
1566 			vnode->advisory_locking = locking;
1567 			lockingDeleter.Detach();
1568 			return B_OK;
1569 		}
1570 	}
1571 
1572 	// The vnode already had a locking object. That's just as well.
1573 
1574 	return B_OK;
1575 }
1576 
1577 
1578 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1579 	with the advisory_lock \a lock.
1580 */
1581 static bool
1582 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1583 {
1584 	if (flock == NULL)
1585 		return true;
1586 
1587 	return lock->start <= flock->l_start - 1 + flock->l_len
1588 		&& lock->end >= flock->l_start;
1589 }
1590 
1591 
1592 /*!	Tests whether acquiring a lock would block.
1593 */
1594 static status_t
1595 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1596 {
1597 	flock->l_type = F_UNLCK;
1598 
1599 	struct advisory_locking* locking = get_advisory_locking(vnode);
1600 	if (locking == NULL)
1601 		return B_OK;
1602 
1603 	team_id team = team_get_current_team_id();
1604 
1605 	LockList::Iterator iterator = locking->locks.GetIterator();
1606 	while (iterator.HasNext()) {
1607 		struct advisory_lock* lock = iterator.Next();
1608 
1609 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1610 			// locks do overlap
1611 			if (flock->l_type != F_RDLCK || !lock->shared) {
1612 				// collision
1613 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1614 				flock->l_whence = SEEK_SET;
1615 				flock->l_start = lock->start;
1616 				flock->l_len = lock->end - lock->start + 1;
1617 				flock->l_pid = lock->team;
1618 				break;
1619 			}
1620 		}
1621 	}
1622 
1623 	put_advisory_locking(locking);
1624 	return B_OK;
1625 }
1626 
1627 
1628 /*!	Removes the specified lock, or all locks of the calling team
1629 	if \a flock is NULL.
1630 */
1631 static status_t
1632 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1633 	struct file_descriptor* descriptor, struct flock* flock)
1634 {
1635 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1636 
1637 	struct advisory_locking* locking = get_advisory_locking(vnode);
1638 	if (locking == NULL)
1639 		return B_OK;
1640 
1641 	// find matching lock entries
1642 
1643 	LockList::Iterator iterator = locking->locks.GetIterator();
1644 	while (iterator.HasNext()) {
1645 		struct advisory_lock* lock = iterator.Next();
1646 		bool removeLock = false;
1647 
1648 		if (descriptor != NULL && lock->bound_to == descriptor) {
1649 			// Remove flock() locks
1650 			removeLock = true;
1651 		} else if (lock->bound_to == context
1652 				&& advisory_lock_intersects(lock, flock)) {
1653 			// Remove POSIX locks
1654 			bool endsBeyond = false;
1655 			bool startsBefore = false;
1656 			if (flock != NULL) {
1657 				startsBefore = lock->start < flock->l_start;
1658 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1659 			}
1660 
1661 			if (!startsBefore && !endsBeyond) {
1662 				// lock is completely contained in flock
1663 				removeLock = true;
1664 			} else if (startsBefore && !endsBeyond) {
1665 				// cut the end of the lock
1666 				lock->end = flock->l_start - 1;
1667 			} else if (!startsBefore && endsBeyond) {
1668 				// cut the start of the lock
1669 				lock->start = flock->l_start + flock->l_len;
1670 			} else {
1671 				// divide the lock into two locks
1672 				struct advisory_lock* secondLock = new advisory_lock;
1673 				if (secondLock == NULL) {
1674 					// TODO: we should probably revert the locks we already
1675 					// changed... (ie. allocate upfront)
1676 					put_advisory_locking(locking);
1677 					return B_NO_MEMORY;
1678 				}
1679 
1680 				lock->end = flock->l_start - 1;
1681 
1682 				secondLock->bound_to = context;
1683 				secondLock->team = lock->team;
1684 				secondLock->session = lock->session;
1685 				// values must already be normalized when getting here
1686 				secondLock->start = flock->l_start + flock->l_len;
1687 				secondLock->end = lock->end;
1688 				secondLock->shared = lock->shared;
1689 
1690 				locking->locks.Add(secondLock);
1691 			}
1692 		}
1693 
1694 		if (removeLock) {
1695 			// this lock is no longer used
1696 			iterator.Remove();
1697 			free(lock);
1698 		}
1699 	}
1700 
1701 	bool removeLocking = locking->locks.IsEmpty();
1702 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1703 
1704 	put_advisory_locking(locking);
1705 
1706 	if (removeLocking) {
1707 		// We can remove the whole advisory locking structure; it's no
1708 		// longer used
1709 		locking = get_advisory_locking(vnode);
1710 		if (locking != NULL) {
1711 			ReadLocker locker(sVnodeLock);
1712 			AutoLocker<Vnode> nodeLocker(vnode);
1713 
1714 			// the locking could have been changed in the mean time
1715 			if (locking->locks.IsEmpty()) {
1716 				vnode->advisory_locking = NULL;
1717 				nodeLocker.Unlock();
1718 				locker.Unlock();
1719 
1720 				// we've detached the locking from the vnode, so we can
1721 				// safely delete it
1722 				delete locking;
1723 			} else {
1724 				// the locking is in use again
1725 				nodeLocker.Unlock();
1726 				locker.Unlock();
1727 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1728 			}
1729 		}
1730 	}
1731 
1732 	return B_OK;
1733 }
1734 
1735 
1736 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1737 	will wait for the lock to become available, if there are any collisions
1738 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1739 
1740 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1741 	BSD flock() semantics are used, that is, all children can unlock the file
1742 	in question (we even allow parents to remove the lock, though, but that
1743 	seems to be in line to what the BSD's are doing).
1744 */
1745 static status_t
1746 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1747 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1748 {
1749 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1750 		vnode, flock, wait ? "yes" : "no"));
1751 	dprintf("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1752 		vnode, flock, wait ? "yes" : "no");
1753 
1754 	bool shared = flock->l_type == F_RDLCK;
1755 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1756 	status_t status = B_OK;
1757 
1758 	// TODO: do deadlock detection!
1759 
1760 	struct advisory_locking* locking;
1761 
1762 	while (true) {
1763 		// if this vnode has an advisory_locking structure attached,
1764 		// lock that one and search for any colliding file lock
1765 		status = create_advisory_locking(vnode);
1766 		if (status != B_OK)
1767 			return status;
1768 
1769 		locking = vnode->advisory_locking;
1770 		team_id team = team_get_current_team_id();
1771 		sem_id waitForLock = -1;
1772 
1773 		// test for collisions
1774 		LockList::Iterator iterator = locking->locks.GetIterator();
1775 		while (iterator.HasNext()) {
1776 			struct advisory_lock* lock = iterator.Next();
1777 
1778 			// TODO: locks from the same team might be joinable!
1779 			if ((lock->team != team || lock->bound_to != boundTo)
1780 					&& advisory_lock_intersects(lock, flock)) {
1781 				// locks do overlap
1782 				if (!shared || !lock->shared) {
1783 					// we need to wait
1784 					waitForLock = locking->wait_sem;
1785 					break;
1786 				}
1787 			}
1788 		}
1789 
1790 		if (waitForLock < 0)
1791 			break;
1792 
1793 		// We need to wait. Do that or fail now, if we've been asked not to.
1794 
1795 		if (!wait) {
1796 			put_advisory_locking(locking);
1797 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1798 		}
1799 
1800 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1801 			B_CAN_INTERRUPT, 0);
1802 		if (status != B_OK && status != B_BAD_SEM_ID)
1803 			return status;
1804 
1805 		// We have been notified, but we need to re-lock the locking object. So
1806 		// go another round...
1807 	}
1808 
1809 	// install new lock
1810 
1811 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1812 		sizeof(struct advisory_lock));
1813 	if (lock == NULL) {
1814 		put_advisory_locking(locking);
1815 		return B_NO_MEMORY;
1816 	}
1817 
1818 	lock->bound_to = boundTo;
1819 	lock->team = team_get_current_team_id();
1820 	lock->session = thread_get_current_thread()->team->session_id;
1821 	// values must already be normalized when getting here
1822 	lock->start = flock->l_start;
1823 	lock->end = flock->l_start - 1 + flock->l_len;
1824 	lock->shared = shared;
1825 
1826 	locking->locks.Add(lock);
1827 	put_advisory_locking(locking);
1828 
1829 	return status;
1830 }
1831 
1832 
1833 /*!	Normalizes the \a flock structure to make it easier to compare the
1834 	structure with others. The l_start and l_len fields are set to absolute
1835 	values according to the l_whence field.
1836 */
1837 static status_t
1838 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1839 {
1840 	switch (flock->l_whence) {
1841 		case SEEK_SET:
1842 			break;
1843 		case SEEK_CUR:
1844 			flock->l_start += descriptor->pos;
1845 			break;
1846 		case SEEK_END:
1847 		{
1848 			struct vnode* vnode = descriptor->u.vnode;
1849 			struct stat stat;
1850 			status_t status;
1851 
1852 			if (!HAS_FS_CALL(vnode, read_stat))
1853 				return B_UNSUPPORTED;
1854 
1855 			status = FS_CALL(vnode, read_stat, &stat);
1856 			if (status != B_OK)
1857 				return status;
1858 
1859 			flock->l_start += stat.st_size;
1860 			break;
1861 		}
1862 		default:
1863 			return B_BAD_VALUE;
1864 	}
1865 
1866 	if (flock->l_start < 0)
1867 		flock->l_start = 0;
1868 	if (flock->l_len == 0)
1869 		flock->l_len = OFF_MAX;
1870 
1871 	// don't let the offset and length overflow
1872 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1873 		flock->l_len = OFF_MAX - flock->l_start;
1874 
1875 	if (flock->l_len < 0) {
1876 		// a negative length reverses the region
1877 		flock->l_start += flock->l_len;
1878 		flock->l_len = -flock->l_len;
1879 	}
1880 
1881 	return B_OK;
1882 }
1883 
1884 
1885 static void
1886 replace_vnode_if_disconnected(struct fs_mount* mount,
1887 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1888 	struct vnode* fallBack, bool lockRootLock)
1889 {
1890 	struct vnode* givenVnode = vnode;
1891 	bool vnodeReplaced = false;
1892 
1893 	ReadLocker vnodeReadLocker(sVnodeLock);
1894 
1895 	if (lockRootLock)
1896 		mutex_lock(&sIOContextRootLock);
1897 
1898 	while (vnode != NULL && vnode->mount == mount
1899 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1900 		if (vnode->covers != NULL) {
1901 			// redirect the vnode to the covered vnode
1902 			vnode = vnode->covers;
1903 		} else
1904 			vnode = fallBack;
1905 
1906 		vnodeReplaced = true;
1907 	}
1908 
1909 	// If we've replaced the node, grab a reference for the new one.
1910 	if (vnodeReplaced && vnode != NULL)
1911 		inc_vnode_ref_count(vnode);
1912 
1913 	if (lockRootLock)
1914 		mutex_unlock(&sIOContextRootLock);
1915 
1916 	vnodeReadLocker.Unlock();
1917 
1918 	if (vnodeReplaced)
1919 		put_vnode(givenVnode);
1920 }
1921 
1922 
1923 /*!	Disconnects all file descriptors that are associated with the
1924 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1925 	\a mount object.
1926 
1927 	Note, after you've called this function, there might still be ongoing
1928 	accesses - they won't be interrupted if they already happened before.
1929 	However, any subsequent access will fail.
1930 
1931 	This is not a cheap function and should be used with care and rarely.
1932 	TODO: there is currently no means to stop a blocking read/write!
1933 */
1934 static void
1935 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1936 	struct vnode* vnodeToDisconnect)
1937 {
1938 	// iterate over all teams and peek into their file descriptors
1939 	TeamListIterator teamIterator;
1940 	while (Team* team = teamIterator.Next()) {
1941 		BReference<Team> teamReference(team, true);
1942 		TeamLocker teamLocker(team);
1943 
1944 		// lock the I/O context
1945 		io_context* context = team->io_context;
1946 		if (context == NULL)
1947 			continue;
1948 		MutexLocker contextLocker(context->io_mutex);
1949 
1950 		teamLocker.Unlock();
1951 
1952 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1953 			sRoot, true);
1954 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1955 			sRoot, false);
1956 
1957 		for (uint32 i = 0; i < context->table_size; i++) {
1958 			if (struct file_descriptor* descriptor = context->fds[i]) {
1959 				inc_fd_ref_count(descriptor);
1960 
1961 				// if this descriptor points at this mount, we
1962 				// need to disconnect it to be able to unmount
1963 				struct vnode* vnode = fd_vnode(descriptor);
1964 				if (vnodeToDisconnect != NULL) {
1965 					if (vnode == vnodeToDisconnect)
1966 						disconnect_fd(descriptor);
1967 				} else if ((vnode != NULL && vnode->mount == mount)
1968 					|| (vnode == NULL && descriptor->u.mount == mount))
1969 					disconnect_fd(descriptor);
1970 
1971 				put_fd(descriptor);
1972 			}
1973 		}
1974 	}
1975 }
1976 
1977 
1978 /*!	\brief Gets the root node of the current IO context.
1979 	If \a kernel is \c true, the kernel IO context will be used.
1980 	The caller obtains a reference to the returned node.
1981 */
1982 struct vnode*
1983 get_root_vnode(bool kernel)
1984 {
1985 	if (!kernel) {
1986 		// Get current working directory from io context
1987 		struct io_context* context = get_current_io_context(kernel);
1988 
1989 		mutex_lock(&sIOContextRootLock);
1990 
1991 		struct vnode* root = context->root;
1992 		if (root != NULL)
1993 			inc_vnode_ref_count(root);
1994 
1995 		mutex_unlock(&sIOContextRootLock);
1996 
1997 		if (root != NULL)
1998 			return root;
1999 
2000 		// That should never happen.
2001 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2002 			"have a root\n", team_get_current_team_id());
2003 	}
2004 
2005 	inc_vnode_ref_count(sRoot);
2006 	return sRoot;
2007 }
2008 
2009 
2010 /*!	\brief Gets the directory path and leaf name for a given path.
2011 
2012 	The supplied \a path is transformed to refer to the directory part of
2013 	the entry identified by the original path, and into the buffer \a filename
2014 	the leaf name of the original entry is written.
2015 	Neither the returned path nor the leaf name can be expected to be
2016 	canonical.
2017 
2018 	\param path The path to be analyzed. Must be able to store at least one
2019 		   additional character.
2020 	\param filename The buffer into which the leaf name will be written.
2021 		   Must be of size B_FILE_NAME_LENGTH at least.
2022 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2023 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2024 		   if the given path name is empty.
2025 */
2026 static status_t
2027 get_dir_path_and_leaf(char* path, char* filename)
2028 {
2029 	if (*path == '\0')
2030 		return B_ENTRY_NOT_FOUND;
2031 
2032 	char* last = strrchr(path, '/');
2033 		// '/' are not allowed in file names!
2034 
2035 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2036 
2037 	if (last == NULL) {
2038 		// this path is single segment with no '/' in it
2039 		// ex. "foo"
2040 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2041 			return B_NAME_TOO_LONG;
2042 
2043 		strcpy(path, ".");
2044 	} else {
2045 		last++;
2046 		if (last[0] == '\0') {
2047 			// special case: the path ends in one or more '/' - remove them
2048 			while (*--last == '/' && last != path);
2049 			last[1] = '\0';
2050 
2051 			if (last == path && last[0] == '/') {
2052 				// This path points to the root of the file system
2053 				strcpy(filename, ".");
2054 				return B_OK;
2055 			}
2056 			for (; last != path && *(last - 1) != '/'; last--);
2057 				// rewind to the start of the leaf before the '/'
2058 		}
2059 
2060 		// normal leaf: replace the leaf portion of the path with a '.'
2061 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2062 			return B_NAME_TOO_LONG;
2063 
2064 		last[0] = '.';
2065 		last[1] = '\0';
2066 	}
2067 	return B_OK;
2068 }
2069 
2070 
2071 static status_t
2072 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2073 	bool traverse, bool kernel, struct vnode** _vnode)
2074 {
2075 	char clonedName[B_FILE_NAME_LENGTH + 1];
2076 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2077 		return B_NAME_TOO_LONG;
2078 
2079 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2080 	struct vnode* directory;
2081 
2082 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2083 	if (status < 0)
2084 		return status;
2085 
2086 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2087 		_vnode, NULL);
2088 }
2089 
2090 
2091 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2092 	and returns the respective vnode.
2093 	On success a reference to the vnode is acquired for the caller.
2094 */
2095 static status_t
2096 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2097 {
2098 	ino_t id;
2099 	bool missing;
2100 
2101 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2102 		return missing ? B_ENTRY_NOT_FOUND
2103 			: get_vnode(dir->device, id, _vnode, true, false);
2104 	}
2105 
2106 	status_t status = FS_CALL(dir, lookup, name, &id);
2107 	if (status != B_OK)
2108 		return status;
2109 
2110 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2111 	// have a reference and just need to look the node up.
2112 	rw_lock_read_lock(&sVnodeLock);
2113 	*_vnode = lookup_vnode(dir->device, id);
2114 	rw_lock_read_unlock(&sVnodeLock);
2115 
2116 	if (*_vnode == NULL) {
2117 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2118 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2119 		return B_ENTRY_NOT_FOUND;
2120 	}
2121 
2122 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2123 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2124 //		(*_vnode)->mount->id, (*_vnode)->id);
2125 
2126 	return B_OK;
2127 }
2128 
2129 
2130 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2131 	\a path must not be NULL.
2132 	If it returns successfully, \a path contains the name of the last path
2133 	component. This function clobbers the buffer pointed to by \a path only
2134 	if it does contain more than one component.
2135 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2136 	it is successful or not!
2137 */
2138 static status_t
2139 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2140 	int count, struct io_context* ioContext, struct vnode** _vnode,
2141 	ino_t* _parentID)
2142 {
2143 	status_t status = B_OK;
2144 	ino_t lastParentID = vnode->id;
2145 
2146 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2147 
2148 	if (path == NULL) {
2149 		put_vnode(vnode);
2150 		return B_BAD_VALUE;
2151 	}
2152 
2153 	if (*path == '\0') {
2154 		put_vnode(vnode);
2155 		return B_ENTRY_NOT_FOUND;
2156 	}
2157 
2158 	while (true) {
2159 		struct vnode* nextVnode;
2160 		char* nextPath;
2161 
2162 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2163 			path));
2164 
2165 		// done?
2166 		if (path[0] == '\0')
2167 			break;
2168 
2169 		// walk to find the next path component ("path" will point to a single
2170 		// path component), and filter out multiple slashes
2171 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2172 				nextPath++);
2173 
2174 		if (*nextPath == '/') {
2175 			*nextPath = '\0';
2176 			do
2177 				nextPath++;
2178 			while (*nextPath == '/');
2179 		}
2180 
2181 		// See if the '..' is at a covering vnode move to the covered
2182 		// vnode so we pass the '..' path to the underlying filesystem.
2183 		// Also prevent breaking the root of the IO context.
2184 		if (strcmp("..", path) == 0) {
2185 			if (vnode == ioContext->root) {
2186 				// Attempted prison break! Keep it contained.
2187 				path = nextPath;
2188 				continue;
2189 			}
2190 
2191 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2192 				nextVnode = coveredVnode;
2193 				put_vnode(vnode);
2194 				vnode = nextVnode;
2195 			}
2196 		}
2197 
2198 		// check if vnode is really a directory
2199 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2200 			status = B_NOT_A_DIRECTORY;
2201 
2202 		// Check if we have the right to search the current directory vnode.
2203 		// If a file system doesn't have the access() function, we assume that
2204 		// searching a directory is always allowed
2205 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2206 			status = FS_CALL(vnode, access, X_OK);
2207 
2208 		// Tell the filesystem to get the vnode of this path component (if we
2209 		// got the permission from the call above)
2210 		if (status == B_OK)
2211 			status = lookup_dir_entry(vnode, path, &nextVnode);
2212 
2213 		if (status != B_OK) {
2214 			put_vnode(vnode);
2215 			return status;
2216 		}
2217 
2218 		// If the new node is a symbolic link, resolve it (if we've been told
2219 		// to do it)
2220 		if (S_ISLNK(nextVnode->Type())
2221 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2222 			size_t bufferSize;
2223 			char* buffer;
2224 
2225 			TRACE(("traverse link\n"));
2226 
2227 			// it's not exactly nice style using goto in this way, but hey,
2228 			// it works :-/
2229 			if (count + 1 > B_MAX_SYMLINKS) {
2230 				status = B_LINK_LIMIT;
2231 				goto resolve_link_error;
2232 			}
2233 
2234 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2235 			if (buffer == NULL) {
2236 				status = B_NO_MEMORY;
2237 				goto resolve_link_error;
2238 			}
2239 
2240 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2241 				bufferSize--;
2242 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2243 				// null-terminate
2244 				if (status >= 0)
2245 					buffer[bufferSize] = '\0';
2246 			} else
2247 				status = B_BAD_VALUE;
2248 
2249 			if (status != B_OK) {
2250 				free(buffer);
2251 
2252 		resolve_link_error:
2253 				put_vnode(vnode);
2254 				put_vnode(nextVnode);
2255 
2256 				return status;
2257 			}
2258 			put_vnode(nextVnode);
2259 
2260 			// Check if we start from the root directory or the current
2261 			// directory ("vnode" still points to that one).
2262 			// Cut off all leading slashes if it's the root directory
2263 			path = buffer;
2264 			bool absoluteSymlink = false;
2265 			if (path[0] == '/') {
2266 				// we don't need the old directory anymore
2267 				put_vnode(vnode);
2268 
2269 				while (*++path == '/')
2270 					;
2271 
2272 				mutex_lock(&sIOContextRootLock);
2273 				vnode = ioContext->root;
2274 				inc_vnode_ref_count(vnode);
2275 				mutex_unlock(&sIOContextRootLock);
2276 
2277 				absoluteSymlink = true;
2278 			}
2279 
2280 			inc_vnode_ref_count(vnode);
2281 				// balance the next recursion - we will decrement the
2282 				// ref_count of the vnode, no matter if we succeeded or not
2283 
2284 			if (absoluteSymlink && *path == '\0') {
2285 				// symlink was just "/"
2286 				nextVnode = vnode;
2287 			} else {
2288 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2289 					ioContext, &nextVnode, &lastParentID);
2290 			}
2291 
2292 			free(buffer);
2293 
2294 			if (status != B_OK) {
2295 				put_vnode(vnode);
2296 				return status;
2297 			}
2298 		} else
2299 			lastParentID = vnode->id;
2300 
2301 		// decrease the ref count on the old dir we just looked up into
2302 		put_vnode(vnode);
2303 
2304 		path = nextPath;
2305 		vnode = nextVnode;
2306 
2307 		// see if we hit a covered node
2308 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2309 			put_vnode(vnode);
2310 			vnode = coveringNode;
2311 		}
2312 	}
2313 
2314 	*_vnode = vnode;
2315 	if (_parentID)
2316 		*_parentID = lastParentID;
2317 
2318 	return B_OK;
2319 }
2320 
2321 
2322 static status_t
2323 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2324 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2325 {
2326 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2327 		get_current_io_context(kernel), _vnode, _parentID);
2328 }
2329 
2330 
2331 static status_t
2332 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2333 	ino_t* _parentID, bool kernel)
2334 {
2335 	struct vnode* start = NULL;
2336 
2337 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2338 
2339 	if (!path)
2340 		return B_BAD_VALUE;
2341 
2342 	if (*path == '\0')
2343 		return B_ENTRY_NOT_FOUND;
2344 
2345 	// figure out if we need to start at root or at cwd
2346 	if (*path == '/') {
2347 		if (sRoot == NULL) {
2348 			// we're a bit early, aren't we?
2349 			return B_ERROR;
2350 		}
2351 
2352 		while (*++path == '/')
2353 			;
2354 		start = get_root_vnode(kernel);
2355 
2356 		if (*path == '\0') {
2357 			*_vnode = start;
2358 			return B_OK;
2359 		}
2360 
2361 	} else {
2362 		struct io_context* context = get_current_io_context(kernel);
2363 
2364 		mutex_lock(&context->io_mutex);
2365 		start = context->cwd;
2366 		if (start != NULL)
2367 			inc_vnode_ref_count(start);
2368 		mutex_unlock(&context->io_mutex);
2369 
2370 		if (start == NULL)
2371 			return B_ERROR;
2372 	}
2373 
2374 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2375 		_parentID);
2376 }
2377 
2378 
2379 /*! Returns the vnode in the next to last segment of the path, and returns
2380 	the last portion in filename.
2381 	The path buffer must be able to store at least one additional character.
2382 */
2383 static status_t
2384 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2385 	bool kernel)
2386 {
2387 	status_t status = get_dir_path_and_leaf(path, filename);
2388 	if (status != B_OK)
2389 		return status;
2390 
2391 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2392 }
2393 
2394 
2395 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2396 		   to by a FD + path pair.
2397 
2398 	\a path must be given in either case. \a fd might be omitted, in which
2399 	case \a path is either an absolute path or one relative to the current
2400 	directory. If both a supplied and \a path is relative it is reckoned off
2401 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2402 	ignored.
2403 
2404 	The caller has the responsibility to call put_vnode() on the returned
2405 	directory vnode.
2406 
2407 	\param fd The FD. May be < 0.
2408 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2409 	       is modified by this function. It must have at least room for a
2410 	       string one character longer than the path it contains.
2411 	\param _vnode A pointer to a variable the directory vnode shall be written
2412 		   into.
2413 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2414 		   the leaf name of the specified entry will be written.
2415 	\param kernel \c true, if invoked from inside the kernel, \c false if
2416 		   invoked from userland.
2417 	\return \c B_OK, if everything went fine, another error code otherwise.
2418 */
2419 static status_t
2420 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2421 	char* filename, bool kernel)
2422 {
2423 	if (!path)
2424 		return B_BAD_VALUE;
2425 	if (*path == '\0')
2426 		return B_ENTRY_NOT_FOUND;
2427 	if (fd < 0)
2428 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2429 
2430 	status_t status = get_dir_path_and_leaf(path, filename);
2431 	if (status != B_OK)
2432 		return status;
2433 
2434 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2435 }
2436 
2437 
2438 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2439 		   to by a vnode + path pair.
2440 
2441 	\a path must be given in either case. \a vnode might be omitted, in which
2442 	case \a path is either an absolute path or one relative to the current
2443 	directory. If both a supplied and \a path is relative it is reckoned off
2444 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2445 	ignored.
2446 
2447 	The caller has the responsibility to call put_vnode() on the returned
2448 	directory vnode.
2449 
2450 	\param vnode The vnode. May be \c NULL.
2451 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2452 	       is modified by this function. It must have at least room for a
2453 	       string one character longer than the path it contains.
2454 	\param _vnode A pointer to a variable the directory vnode shall be written
2455 		   into.
2456 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2457 		   the leaf name of the specified entry will be written.
2458 	\param kernel \c true, if invoked from inside the kernel, \c false if
2459 		   invoked from userland.
2460 	\return \c B_OK, if everything went fine, another error code otherwise.
2461 */
2462 static status_t
2463 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2464 	struct vnode** _vnode, char* filename, bool kernel)
2465 {
2466 	if (!path)
2467 		return B_BAD_VALUE;
2468 	if (*path == '\0')
2469 		return B_ENTRY_NOT_FOUND;
2470 	if (vnode == NULL || path[0] == '/')
2471 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2472 
2473 	status_t status = get_dir_path_and_leaf(path, filename);
2474 	if (status != B_OK)
2475 		return status;
2476 
2477 	inc_vnode_ref_count(vnode);
2478 		// vnode_path_to_vnode() always decrements the ref count
2479 
2480 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2481 }
2482 
2483 
2484 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2485 */
2486 static status_t
2487 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2488 	size_t bufferSize, struct io_context* ioContext)
2489 {
2490 	if (bufferSize < sizeof(struct dirent))
2491 		return B_BAD_VALUE;
2492 
2493 	// See if the vnode is covering another vnode and move to the covered
2494 	// vnode so we get the underlying file system
2495 	VNodePutter vnodePutter;
2496 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2497 		vnode = coveredVnode;
2498 		vnodePutter.SetTo(vnode);
2499 	}
2500 
2501 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2502 		// The FS supports getting the name of a vnode.
2503 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2504 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2505 			return B_OK;
2506 	}
2507 
2508 	// The FS doesn't support getting the name of a vnode. So we search the
2509 	// parent directory for the vnode, if the caller let us.
2510 
2511 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2512 		return B_UNSUPPORTED;
2513 
2514 	void* cookie;
2515 
2516 	status_t status = FS_CALL(parent, open_dir, &cookie);
2517 	if (status >= B_OK) {
2518 		while (true) {
2519 			uint32 num = 1;
2520 			// We use the FS hook directly instead of dir_read(), since we don't
2521 			// want the entries to be fixed. We have already resolved vnode to
2522 			// the covered node.
2523 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2524 				&num);
2525 			if (status != B_OK)
2526 				break;
2527 			if (num == 0) {
2528 				status = B_ENTRY_NOT_FOUND;
2529 				break;
2530 			}
2531 
2532 			if (vnode->id == buffer->d_ino) {
2533 				// found correct entry!
2534 				break;
2535 			}
2536 		}
2537 
2538 		FS_CALL(parent, close_dir, cookie);
2539 		FS_CALL(parent, free_dir_cookie, cookie);
2540 	}
2541 	return status;
2542 }
2543 
2544 
2545 static status_t
2546 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2547 	size_t nameSize, bool kernel)
2548 {
2549 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2550 	struct dirent* dirent = (struct dirent*)buffer;
2551 
2552 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2553 		get_current_io_context(kernel));
2554 	if (status != B_OK)
2555 		return status;
2556 
2557 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2558 		return B_BUFFER_OVERFLOW;
2559 
2560 	return B_OK;
2561 }
2562 
2563 
2564 /*!	Gets the full path to a given directory vnode.
2565 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2566 	file system doesn't support this call, it will fall back to iterating
2567 	through the parent directory to get the name of the child.
2568 
2569 	To protect against circular loops, it supports a maximum tree depth
2570 	of 256 levels.
2571 
2572 	Note that the path may not be correct the time this function returns!
2573 	It doesn't use any locking to prevent returning the correct path, as
2574 	paths aren't safe anyway: the path to a file can change at any time.
2575 
2576 	It might be a good idea, though, to check if the returned path exists
2577 	in the calling function (it's not done here because of efficiency)
2578 */
2579 static status_t
2580 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2581 	bool kernel)
2582 {
2583 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2584 
2585 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2586 		return B_BAD_VALUE;
2587 
2588 	if (!S_ISDIR(vnode->Type()))
2589 		return B_NOT_A_DIRECTORY;
2590 
2591 	char* path = buffer;
2592 	int32 insert = bufferSize;
2593 	int32 maxLevel = 256;
2594 	int32 length;
2595 	status_t status = B_OK;
2596 	struct io_context* ioContext = get_current_io_context(kernel);
2597 
2598 	// we don't use get_vnode() here because this call is more
2599 	// efficient and does all we need from get_vnode()
2600 	inc_vnode_ref_count(vnode);
2601 
2602 	path[--insert] = '\0';
2603 		// the path is filled right to left
2604 
2605 	while (true) {
2606 		// If the node is the context's root, bail out. Otherwise resolve mount
2607 		// points.
2608 		if (vnode == ioContext->root)
2609 			break;
2610 
2611 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2612 			put_vnode(vnode);
2613 			vnode = coveredVnode;
2614 		}
2615 
2616 		// lookup the parent vnode
2617 		struct vnode* parentVnode;
2618 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2619 		if (status != B_OK)
2620 			goto out;
2621 
2622 		if (parentVnode == vnode) {
2623 			// The caller apparently got their hands on a node outside of their
2624 			// context's root. Now we've hit the global root.
2625 			put_vnode(parentVnode);
2626 			break;
2627 		}
2628 
2629 		// get the node's name
2630 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2631 			// also used for fs_read_dir()
2632 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2633 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2634 			sizeof(nameBuffer), ioContext);
2635 
2636 		// release the current vnode, we only need its parent from now on
2637 		put_vnode(vnode);
2638 		vnode = parentVnode;
2639 
2640 		if (status != B_OK)
2641 			goto out;
2642 
2643 		// TODO: add an explicit check for loops in about 10 levels to do
2644 		// real loop detection
2645 
2646 		// don't go deeper as 'maxLevel' to prevent circular loops
2647 		if (maxLevel-- < 0) {
2648 			status = B_LINK_LIMIT;
2649 			goto out;
2650 		}
2651 
2652 		// add the name in front of the current path
2653 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2654 		length = strlen(name);
2655 		insert -= length;
2656 		if (insert <= 0) {
2657 			status = B_RESULT_NOT_REPRESENTABLE;
2658 			goto out;
2659 		}
2660 		memcpy(path + insert, name, length);
2661 		path[--insert] = '/';
2662 	}
2663 
2664 	// the root dir will result in an empty path: fix it
2665 	if (path[insert] == '\0')
2666 		path[--insert] = '/';
2667 
2668 	TRACE(("  path is: %s\n", path + insert));
2669 
2670 	// move the path to the start of the buffer
2671 	length = bufferSize - insert;
2672 	memmove(buffer, path + insert, length);
2673 
2674 out:
2675 	put_vnode(vnode);
2676 	return status;
2677 }
2678 
2679 
2680 /*!	Checks the length of every path component, and adds a '.'
2681 	if the path ends in a slash.
2682 	The given path buffer must be able to store at least one
2683 	additional character.
2684 */
2685 static status_t
2686 check_path(char* to)
2687 {
2688 	int32 length = 0;
2689 
2690 	// check length of every path component
2691 
2692 	while (*to) {
2693 		char* begin;
2694 		if (*to == '/')
2695 			to++, length++;
2696 
2697 		begin = to;
2698 		while (*to != '/' && *to)
2699 			to++, length++;
2700 
2701 		if (to - begin > B_FILE_NAME_LENGTH)
2702 			return B_NAME_TOO_LONG;
2703 	}
2704 
2705 	if (length == 0)
2706 		return B_ENTRY_NOT_FOUND;
2707 
2708 	// complete path if there is a slash at the end
2709 
2710 	if (*(to - 1) == '/') {
2711 		if (length > B_PATH_NAME_LENGTH - 2)
2712 			return B_NAME_TOO_LONG;
2713 
2714 		to[0] = '.';
2715 		to[1] = '\0';
2716 	}
2717 
2718 	return B_OK;
2719 }
2720 
2721 
2722 static struct file_descriptor*
2723 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2724 {
2725 	struct file_descriptor* descriptor
2726 		= get_fd(get_current_io_context(kernel), fd);
2727 	if (descriptor == NULL)
2728 		return NULL;
2729 
2730 	struct vnode* vnode = fd_vnode(descriptor);
2731 	if (vnode == NULL) {
2732 		put_fd(descriptor);
2733 		return NULL;
2734 	}
2735 
2736 	// ToDo: when we can close a file descriptor at any point, investigate
2737 	//	if this is still valid to do (accessing the vnode without ref_count
2738 	//	or locking)
2739 	*_vnode = vnode;
2740 	return descriptor;
2741 }
2742 
2743 
2744 static struct vnode*
2745 get_vnode_from_fd(int fd, bool kernel)
2746 {
2747 	struct file_descriptor* descriptor;
2748 	struct vnode* vnode;
2749 
2750 	descriptor = get_fd(get_current_io_context(kernel), fd);
2751 	if (descriptor == NULL)
2752 		return NULL;
2753 
2754 	vnode = fd_vnode(descriptor);
2755 	if (vnode != NULL)
2756 		inc_vnode_ref_count(vnode);
2757 
2758 	put_fd(descriptor);
2759 	return vnode;
2760 }
2761 
2762 
2763 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2764 	only the path will be considered. In this case, the \a path must not be
2765 	NULL.
2766 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2767 	and should be NULL for files.
2768 */
2769 static status_t
2770 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2771 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2772 {
2773 	if (fd < 0 && !path)
2774 		return B_BAD_VALUE;
2775 
2776 	if (path != NULL && *path == '\0')
2777 		return B_ENTRY_NOT_FOUND;
2778 
2779 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2780 		// no FD or absolute path
2781 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2782 	}
2783 
2784 	// FD only, or FD + relative path
2785 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2786 	if (vnode == NULL)
2787 		return B_FILE_ERROR;
2788 
2789 	if (path != NULL) {
2790 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2791 			_vnode, _parentID);
2792 	}
2793 
2794 	// there is no relative path to take into account
2795 
2796 	*_vnode = vnode;
2797 	if (_parentID)
2798 		*_parentID = -1;
2799 
2800 	return B_OK;
2801 }
2802 
2803 
2804 static int
2805 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2806 	void* cookie, int openMode, bool kernel)
2807 {
2808 	struct file_descriptor* descriptor;
2809 	int fd;
2810 
2811 	// If the vnode is locked, we don't allow creating a new file/directory
2812 	// file_descriptor for it
2813 	if (vnode && vnode->mandatory_locked_by != NULL
2814 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2815 		return B_BUSY;
2816 
2817 	descriptor = alloc_fd();
2818 	if (!descriptor)
2819 		return B_NO_MEMORY;
2820 
2821 	if (vnode)
2822 		descriptor->u.vnode = vnode;
2823 	else
2824 		descriptor->u.mount = mount;
2825 	descriptor->cookie = cookie;
2826 
2827 	switch (type) {
2828 		// vnode types
2829 		case FDTYPE_FILE:
2830 			descriptor->ops = &sFileOps;
2831 			break;
2832 		case FDTYPE_DIR:
2833 			descriptor->ops = &sDirectoryOps;
2834 			break;
2835 		case FDTYPE_ATTR:
2836 			descriptor->ops = &sAttributeOps;
2837 			break;
2838 		case FDTYPE_ATTR_DIR:
2839 			descriptor->ops = &sAttributeDirectoryOps;
2840 			break;
2841 
2842 		// mount types
2843 		case FDTYPE_INDEX_DIR:
2844 			descriptor->ops = &sIndexDirectoryOps;
2845 			break;
2846 		case FDTYPE_QUERY:
2847 			descriptor->ops = &sQueryOps;
2848 			break;
2849 
2850 		default:
2851 			panic("get_new_fd() called with unknown type %d\n", type);
2852 			break;
2853 	}
2854 	descriptor->type = type;
2855 	descriptor->open_mode = openMode;
2856 
2857 	io_context* context = get_current_io_context(kernel);
2858 	fd = new_fd(context, descriptor);
2859 	if (fd < 0) {
2860 		free(descriptor);
2861 		return B_NO_MORE_FDS;
2862 	}
2863 
2864 	mutex_lock(&context->io_mutex);
2865 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2866 	mutex_unlock(&context->io_mutex);
2867 
2868 	return fd;
2869 }
2870 
2871 
2872 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2873 	vfs_normalize_path(). See there for more documentation.
2874 */
2875 static status_t
2876 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2877 {
2878 	VNodePutter dirPutter;
2879 	struct vnode* dir = NULL;
2880 	status_t error;
2881 
2882 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2883 		// get dir vnode + leaf name
2884 		struct vnode* nextDir;
2885 		char leaf[B_FILE_NAME_LENGTH];
2886 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2887 		if (error != B_OK)
2888 			return error;
2889 
2890 		dir = nextDir;
2891 		strcpy(path, leaf);
2892 		dirPutter.SetTo(dir);
2893 
2894 		// get file vnode, if we shall resolve links
2895 		bool fileExists = false;
2896 		struct vnode* fileVnode;
2897 		VNodePutter fileVnodePutter;
2898 		if (traverseLink) {
2899 			inc_vnode_ref_count(dir);
2900 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2901 					NULL) == B_OK) {
2902 				fileVnodePutter.SetTo(fileVnode);
2903 				fileExists = true;
2904 			}
2905 		}
2906 
2907 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2908 			// we're done -- construct the path
2909 			bool hasLeaf = true;
2910 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2911 				// special cases "." and ".." -- get the dir, forget the leaf
2912 				inc_vnode_ref_count(dir);
2913 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2914 					&nextDir, NULL);
2915 				if (error != B_OK)
2916 					return error;
2917 				dir = nextDir;
2918 				dirPutter.SetTo(dir);
2919 				hasLeaf = false;
2920 			}
2921 
2922 			// get the directory path
2923 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2924 			if (error != B_OK)
2925 				return error;
2926 
2927 			// append the leaf name
2928 			if (hasLeaf) {
2929 				// insert a directory separator if this is not the file system
2930 				// root
2931 				if ((strcmp(path, "/") != 0
2932 					&& strlcat(path, "/", pathSize) >= pathSize)
2933 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2934 					return B_NAME_TOO_LONG;
2935 				}
2936 			}
2937 
2938 			return B_OK;
2939 		}
2940 
2941 		// read link
2942 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2943 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2944 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2945 			if (error != B_OK)
2946 				return error;
2947 			path[bufferSize] = '\0';
2948 		} else
2949 			return B_BAD_VALUE;
2950 	}
2951 
2952 	return B_LINK_LIMIT;
2953 }
2954 
2955 
2956 static status_t
2957 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2958 	struct io_context* ioContext)
2959 {
2960 	// Make sure the IO context root is not bypassed.
2961 	if (parent == ioContext->root) {
2962 		*_device = parent->device;
2963 		*_node = parent->id;
2964 		return B_OK;
2965 	}
2966 
2967 	inc_vnode_ref_count(parent);
2968 		// vnode_path_to_vnode() puts the node
2969 
2970 	// ".." is guaranteed not to be clobbered by this call
2971 	struct vnode* vnode;
2972 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2973 		ioContext, &vnode, NULL);
2974 	if (status == B_OK) {
2975 		*_device = vnode->device;
2976 		*_node = vnode->id;
2977 		put_vnode(vnode);
2978 	}
2979 
2980 	return status;
2981 }
2982 
2983 
2984 #ifdef ADD_DEBUGGER_COMMANDS
2985 
2986 
2987 static void
2988 _dump_advisory_locking(advisory_locking* locking)
2989 {
2990 	if (locking == NULL)
2991 		return;
2992 
2993 	kprintf("   lock:        %" B_PRId32, locking->lock);
2994 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2995 
2996 	int32 index = 0;
2997 	LockList::Iterator iterator = locking->locks.GetIterator();
2998 	while (iterator.HasNext()) {
2999 		struct advisory_lock* lock = iterator.Next();
3000 
3001 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3002 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3003 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3004 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3005 	}
3006 }
3007 
3008 
3009 static void
3010 _dump_mount(struct fs_mount* mount)
3011 {
3012 	kprintf("MOUNT: %p\n", mount);
3013 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3014 	kprintf(" device_name:   %s\n", mount->device_name);
3015 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3016 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3017 	kprintf(" partition:     %p\n", mount->partition);
3018 	kprintf(" lock:          %p\n", &mount->rlock);
3019 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3020 		mount->owns_file_device ? " owns_file_device" : "");
3021 
3022 	fs_volume* volume = mount->volume;
3023 	while (volume != NULL) {
3024 		kprintf(" volume %p:\n", volume);
3025 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3026 		kprintf("  private_volume:   %p\n", volume->private_volume);
3027 		kprintf("  ops:              %p\n", volume->ops);
3028 		kprintf("  file_system:      %p\n", volume->file_system);
3029 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3030 		volume = volume->super_volume;
3031 	}
3032 
3033 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3034 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3035 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3036 	set_debug_variable("_partition", (addr_t)mount->partition);
3037 }
3038 
3039 
3040 static bool
3041 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3042 	const char* name)
3043 {
3044 	bool insertSlash = buffer[bufferSize] != '\0';
3045 	size_t nameLength = strlen(name);
3046 
3047 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3048 		return false;
3049 
3050 	if (insertSlash)
3051 		buffer[--bufferSize] = '/';
3052 
3053 	bufferSize -= nameLength;
3054 	memcpy(buffer + bufferSize, name, nameLength);
3055 
3056 	return true;
3057 }
3058 
3059 
3060 static bool
3061 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3062 	ino_t nodeID)
3063 {
3064 	if (bufferSize == 0)
3065 		return false;
3066 
3067 	bool insertSlash = buffer[bufferSize] != '\0';
3068 	if (insertSlash)
3069 		buffer[--bufferSize] = '/';
3070 
3071 	size_t size = snprintf(buffer, bufferSize,
3072 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3073 	if (size > bufferSize) {
3074 		if (insertSlash)
3075 			bufferSize++;
3076 		return false;
3077 	}
3078 
3079 	if (size < bufferSize)
3080 		memmove(buffer + bufferSize - size, buffer, size);
3081 
3082 	bufferSize -= size;
3083 	return true;
3084 }
3085 
3086 
3087 static char*
3088 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3089 	bool& _truncated)
3090 {
3091 	// null-terminate the path
3092 	buffer[--bufferSize] = '\0';
3093 
3094 	while (true) {
3095 		while (vnode->covers != NULL)
3096 			vnode = vnode->covers;
3097 
3098 		if (vnode == sRoot) {
3099 			_truncated = bufferSize == 0;
3100 			if (!_truncated)
3101 				buffer[--bufferSize] = '/';
3102 			return buffer + bufferSize;
3103 		}
3104 
3105 		// resolve the name
3106 		ino_t dirID;
3107 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3108 			vnode->id, dirID);
3109 		if (name == NULL) {
3110 			// Failed to resolve the name -- prepend "<dev,node>/".
3111 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3112 				vnode->mount->id, vnode->id);
3113 			return buffer + bufferSize;
3114 		}
3115 
3116 		// prepend the name
3117 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3118 			_truncated = true;
3119 			return buffer + bufferSize;
3120 		}
3121 
3122 		// resolve the directory node
3123 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3124 		if (nextVnode == NULL) {
3125 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3126 				vnode->mount->id, dirID);
3127 			return buffer + bufferSize;
3128 		}
3129 
3130 		vnode = nextVnode;
3131 	}
3132 }
3133 
3134 
3135 static void
3136 _dump_vnode(struct vnode* vnode, bool printPath)
3137 {
3138 	kprintf("VNODE: %p\n", vnode);
3139 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3140 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3141 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3142 	kprintf(" private_node:  %p\n", vnode->private_node);
3143 	kprintf(" mount:         %p\n", vnode->mount);
3144 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3145 	kprintf(" covers:        %p\n", vnode->covers);
3146 	kprintf(" cache:         %p\n", vnode->cache);
3147 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3148 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3149 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3150 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3151 
3152 	_dump_advisory_locking(vnode->advisory_locking);
3153 
3154 	if (printPath) {
3155 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3156 		if (buffer != NULL) {
3157 			bool truncated;
3158 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3159 				B_PATH_NAME_LENGTH, truncated);
3160 			if (path != NULL) {
3161 				kprintf(" path:          ");
3162 				if (truncated)
3163 					kputs("<truncated>/");
3164 				kputs(path);
3165 				kputs("\n");
3166 			} else
3167 				kprintf("Failed to resolve vnode path.\n");
3168 
3169 			debug_free(buffer);
3170 		} else
3171 			kprintf("Failed to allocate memory for constructing the path.\n");
3172 	}
3173 
3174 	set_debug_variable("_node", (addr_t)vnode->private_node);
3175 	set_debug_variable("_mount", (addr_t)vnode->mount);
3176 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3177 	set_debug_variable("_covers", (addr_t)vnode->covers);
3178 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3179 }
3180 
3181 
3182 static int
3183 dump_mount(int argc, char** argv)
3184 {
3185 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3186 		kprintf("usage: %s [id|address]\n", argv[0]);
3187 		return 0;
3188 	}
3189 
3190 	ulong val = parse_expression(argv[1]);
3191 	uint32 id = val;
3192 
3193 	struct fs_mount* mount = sMountsTable->Lookup(id);
3194 	if (mount == NULL) {
3195 		if (IS_USER_ADDRESS(id)) {
3196 			kprintf("fs_mount not found\n");
3197 			return 0;
3198 		}
3199 		mount = (fs_mount*)val;
3200 	}
3201 
3202 	_dump_mount(mount);
3203 	return 0;
3204 }
3205 
3206 
3207 static int
3208 dump_mounts(int argc, char** argv)
3209 {
3210 	if (argc != 1) {
3211 		kprintf("usage: %s\n", argv[0]);
3212 		return 0;
3213 	}
3214 
3215 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3216 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3217 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3218 
3219 	struct fs_mount* mount;
3220 
3221 	MountTable::Iterator iterator(sMountsTable);
3222 	while (iterator.HasNext()) {
3223 		mount = iterator.Next();
3224 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3225 			mount->root_vnode->covers, mount->volume->private_volume,
3226 			mount->volume->file_system_name);
3227 
3228 		fs_volume* volume = mount->volume;
3229 		while (volume->super_volume != NULL) {
3230 			volume = volume->super_volume;
3231 			kprintf("                                     %p %s\n",
3232 				volume->private_volume, volume->file_system_name);
3233 		}
3234 	}
3235 
3236 	return 0;
3237 }
3238 
3239 
3240 static int
3241 dump_vnode(int argc, char** argv)
3242 {
3243 	bool printPath = false;
3244 	int argi = 1;
3245 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3246 		printPath = true;
3247 		argi++;
3248 	}
3249 
3250 	if (argi >= argc || argi + 2 < argc) {
3251 		print_debugger_command_usage(argv[0]);
3252 		return 0;
3253 	}
3254 
3255 	struct vnode* vnode = NULL;
3256 
3257 	if (argi + 1 == argc) {
3258 		vnode = (struct vnode*)parse_expression(argv[argi]);
3259 		if (IS_USER_ADDRESS(vnode)) {
3260 			kprintf("invalid vnode address\n");
3261 			return 0;
3262 		}
3263 		_dump_vnode(vnode, printPath);
3264 		return 0;
3265 	}
3266 
3267 	dev_t device = parse_expression(argv[argi]);
3268 	ino_t id = parse_expression(argv[argi + 1]);
3269 
3270 	VnodeTable::Iterator iterator(sVnodeTable);
3271 	while (iterator.HasNext()) {
3272 		vnode = iterator.Next();
3273 		if (vnode->id != id || vnode->device != device)
3274 			continue;
3275 
3276 		_dump_vnode(vnode, printPath);
3277 	}
3278 
3279 	return 0;
3280 }
3281 
3282 
3283 static int
3284 dump_vnodes(int argc, char** argv)
3285 {
3286 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3287 		kprintf("usage: %s [device]\n", argv[0]);
3288 		return 0;
3289 	}
3290 
3291 	// restrict dumped nodes to a certain device if requested
3292 	dev_t device = parse_expression(argv[1]);
3293 
3294 	struct vnode* vnode;
3295 
3296 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3297 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3298 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3299 
3300 	VnodeTable::Iterator iterator(sVnodeTable);
3301 	while (iterator.HasNext()) {
3302 		vnode = iterator.Next();
3303 		if (vnode->device != device)
3304 			continue;
3305 
3306 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3307 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3308 			vnode->private_node, vnode->advisory_locking,
3309 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3310 			vnode->IsUnpublished() ? "u" : "-");
3311 	}
3312 
3313 	return 0;
3314 }
3315 
3316 
3317 static int
3318 dump_vnode_caches(int argc, char** argv)
3319 {
3320 	struct vnode* vnode;
3321 
3322 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3323 		kprintf("usage: %s [device]\n", argv[0]);
3324 		return 0;
3325 	}
3326 
3327 	// restrict dumped nodes to a certain device if requested
3328 	dev_t device = -1;
3329 	if (argc > 1)
3330 		device = parse_expression(argv[1]);
3331 
3332 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3333 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3334 
3335 	VnodeTable::Iterator iterator(sVnodeTable);
3336 	while (iterator.HasNext()) {
3337 		vnode = iterator.Next();
3338 		if (vnode->cache == NULL)
3339 			continue;
3340 		if (device != -1 && vnode->device != device)
3341 			continue;
3342 
3343 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3344 			vnode, vnode->device, vnode->id, vnode->cache,
3345 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3346 			vnode->cache->page_count);
3347 	}
3348 
3349 	return 0;
3350 }
3351 
3352 
3353 int
3354 dump_io_context(int argc, char** argv)
3355 {
3356 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3357 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3358 		return 0;
3359 	}
3360 
3361 	struct io_context* context = NULL;
3362 
3363 	if (argc > 1) {
3364 		ulong num = parse_expression(argv[1]);
3365 		if (IS_KERNEL_ADDRESS(num))
3366 			context = (struct io_context*)num;
3367 		else {
3368 			Team* team = team_get_team_struct_locked(num);
3369 			if (team == NULL) {
3370 				kprintf("could not find team with ID %lu\n", num);
3371 				return 0;
3372 			}
3373 			context = (struct io_context*)team->io_context;
3374 		}
3375 	} else
3376 		context = get_current_io_context(true);
3377 
3378 	kprintf("I/O CONTEXT: %p\n", context);
3379 	kprintf(" root vnode:\t%p\n", context->root);
3380 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3381 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3382 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3383 
3384 	if (context->num_used_fds) {
3385 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3386 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3387 	}
3388 
3389 	for (uint32 i = 0; i < context->table_size; i++) {
3390 		struct file_descriptor* fd = context->fds[i];
3391 		if (fd == NULL)
3392 			continue;
3393 
3394 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3395 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3396 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3397 			fd->pos, fd->cookie,
3398 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3399 				? "mount" : "vnode",
3400 			fd->u.vnode);
3401 	}
3402 
3403 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3404 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3405 
3406 	set_debug_variable("_cwd", (addr_t)context->cwd);
3407 
3408 	return 0;
3409 }
3410 
3411 
3412 int
3413 dump_vnode_usage(int argc, char** argv)
3414 {
3415 	if (argc != 1) {
3416 		kprintf("usage: %s\n", argv[0]);
3417 		return 0;
3418 	}
3419 
3420 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3421 		sUnusedVnodes, kMaxUnusedVnodes);
3422 
3423 	uint32 count = sVnodeTable->CountElements();
3424 
3425 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3426 		count - sUnusedVnodes);
3427 	return 0;
3428 }
3429 
3430 #endif	// ADD_DEBUGGER_COMMANDS
3431 
3432 
3433 /*!	Clears memory specified by an iovec array.
3434 */
3435 static void
3436 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3437 {
3438 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3439 		size_t length = std::min(vecs[i].iov_len, bytes);
3440 		memset(vecs[i].iov_base, 0, length);
3441 		bytes -= length;
3442 	}
3443 }
3444 
3445 
3446 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3447 	and calls the file system hooks to read/write the request to disk.
3448 */
3449 static status_t
3450 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3451 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3452 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3453 	bool doWrite)
3454 {
3455 	if (fileVecCount == 0) {
3456 		// There are no file vecs at this offset, so we're obviously trying
3457 		// to access the file outside of its bounds
3458 		return B_BAD_VALUE;
3459 	}
3460 
3461 	size_t numBytes = *_numBytes;
3462 	uint32 fileVecIndex;
3463 	size_t vecOffset = *_vecOffset;
3464 	uint32 vecIndex = *_vecIndex;
3465 	status_t status;
3466 	size_t size;
3467 
3468 	if (!doWrite && vecOffset == 0) {
3469 		// now directly read the data from the device
3470 		// the first file_io_vec can be read directly
3471 
3472 		if (fileVecs[0].length < (off_t)numBytes)
3473 			size = fileVecs[0].length;
3474 		else
3475 			size = numBytes;
3476 
3477 		if (fileVecs[0].offset >= 0) {
3478 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3479 				&vecs[vecIndex], vecCount - vecIndex, &size);
3480 		} else {
3481 			// sparse read
3482 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3483 			status = B_OK;
3484 		}
3485 		if (status != B_OK)
3486 			return status;
3487 
3488 		// TODO: this is a work-around for buggy device drivers!
3489 		//	When our own drivers honour the length, we can:
3490 		//	a) also use this direct I/O for writes (otherwise, it would
3491 		//	   overwrite precious data)
3492 		//	b) panic if the term below is true (at least for writes)
3493 		if ((off_t)size > fileVecs[0].length) {
3494 			//dprintf("warning: device driver %p doesn't respect total length "
3495 			//	"in read_pages() call!\n", ref->device);
3496 			size = fileVecs[0].length;
3497 		}
3498 
3499 		ASSERT((off_t)size <= fileVecs[0].length);
3500 
3501 		// If the file portion was contiguous, we're already done now
3502 		if (size == numBytes)
3503 			return B_OK;
3504 
3505 		// if we reached the end of the file, we can return as well
3506 		if ((off_t)size != fileVecs[0].length) {
3507 			*_numBytes = size;
3508 			return B_OK;
3509 		}
3510 
3511 		fileVecIndex = 1;
3512 
3513 		// first, find out where we have to continue in our iovecs
3514 		for (; vecIndex < vecCount; vecIndex++) {
3515 			if (size < vecs[vecIndex].iov_len)
3516 				break;
3517 
3518 			size -= vecs[vecIndex].iov_len;
3519 		}
3520 
3521 		vecOffset = size;
3522 	} else {
3523 		fileVecIndex = 0;
3524 		size = 0;
3525 	}
3526 
3527 	// Too bad, let's process the rest of the file_io_vecs
3528 
3529 	size_t totalSize = size;
3530 	size_t bytesLeft = numBytes - size;
3531 
3532 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3533 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3534 		off_t fileOffset = fileVec.offset;
3535 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3536 
3537 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3538 			fileLeft));
3539 
3540 		// process the complete fileVec
3541 		while (fileLeft > 0) {
3542 			iovec tempVecs[MAX_TEMP_IO_VECS];
3543 			uint32 tempCount = 0;
3544 
3545 			// size tracks how much of what is left of the current fileVec
3546 			// (fileLeft) has been assigned to tempVecs
3547 			size = 0;
3548 
3549 			// assign what is left of the current fileVec to the tempVecs
3550 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3551 					&& tempCount < MAX_TEMP_IO_VECS;) {
3552 				// try to satisfy one iovec per iteration (or as much as
3553 				// possible)
3554 
3555 				// bytes left of the current iovec
3556 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3557 				if (vecLeft == 0) {
3558 					vecOffset = 0;
3559 					vecIndex++;
3560 					continue;
3561 				}
3562 
3563 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3564 					vecIndex, vecOffset, size));
3565 
3566 				// actually available bytes
3567 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3568 
3569 				tempVecs[tempCount].iov_base
3570 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3571 				tempVecs[tempCount].iov_len = tempVecSize;
3572 				tempCount++;
3573 
3574 				size += tempVecSize;
3575 				vecOffset += tempVecSize;
3576 			}
3577 
3578 			size_t bytes = size;
3579 
3580 			if (fileOffset == -1) {
3581 				if (doWrite) {
3582 					panic("sparse write attempt: vnode %p", vnode);
3583 					status = B_IO_ERROR;
3584 				} else {
3585 					// sparse read
3586 					zero_iovecs(tempVecs, tempCount, bytes);
3587 					status = B_OK;
3588 				}
3589 			} else if (doWrite) {
3590 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3591 					tempVecs, tempCount, &bytes);
3592 			} else {
3593 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3594 					tempVecs, tempCount, &bytes);
3595 			}
3596 			if (status != B_OK)
3597 				return status;
3598 
3599 			totalSize += bytes;
3600 			bytesLeft -= size;
3601 			if (fileOffset >= 0)
3602 				fileOffset += size;
3603 			fileLeft -= size;
3604 			//dprintf("-> file left = %Lu\n", fileLeft);
3605 
3606 			if (size != bytes || vecIndex >= vecCount) {
3607 				// there are no more bytes or iovecs, let's bail out
3608 				*_numBytes = totalSize;
3609 				return B_OK;
3610 			}
3611 		}
3612 	}
3613 
3614 	*_vecIndex = vecIndex;
3615 	*_vecOffset = vecOffset;
3616 	*_numBytes = totalSize;
3617 	return B_OK;
3618 }
3619 
3620 
3621 static bool
3622 is_user_in_group(gid_t gid)
3623 {
3624 	if (gid == getegid())
3625 		return true;
3626 
3627 	gid_t groups[NGROUPS_MAX];
3628 	int groupCount = getgroups(NGROUPS_MAX, groups);
3629 	for (int i = 0; i < groupCount; i++) {
3630 		if (gid == groups[i])
3631 			return true;
3632 	}
3633 
3634 	return false;
3635 }
3636 
3637 
3638 static status_t
3639 free_io_context(io_context* context)
3640 {
3641 	uint32 i;
3642 
3643 	TIOC(FreeIOContext(context));
3644 
3645 	if (context->root)
3646 		put_vnode(context->root);
3647 
3648 	if (context->cwd)
3649 		put_vnode(context->cwd);
3650 
3651 	mutex_lock(&context->io_mutex);
3652 
3653 	for (i = 0; i < context->table_size; i++) {
3654 		if (struct file_descriptor* descriptor = context->fds[i]) {
3655 			close_fd(context, descriptor);
3656 			put_fd(descriptor);
3657 		}
3658 	}
3659 
3660 	mutex_destroy(&context->io_mutex);
3661 
3662 	remove_node_monitors(context);
3663 	free(context->fds);
3664 	free(context);
3665 
3666 	return B_OK;
3667 }
3668 
3669 
3670 static status_t
3671 resize_monitor_table(struct io_context* context, const int newSize)
3672 {
3673 	int	status = B_OK;
3674 
3675 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3676 		return B_BAD_VALUE;
3677 
3678 	mutex_lock(&context->io_mutex);
3679 
3680 	if ((size_t)newSize < context->num_monitors) {
3681 		status = B_BUSY;
3682 		goto out;
3683 	}
3684 	context->max_monitors = newSize;
3685 
3686 out:
3687 	mutex_unlock(&context->io_mutex);
3688 	return status;
3689 }
3690 
3691 
3692 //	#pragma mark - public API for file systems
3693 
3694 
3695 extern "C" status_t
3696 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3697 	fs_vnode_ops* ops)
3698 {
3699 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3700 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3701 
3702 	if (privateNode == NULL)
3703 		return B_BAD_VALUE;
3704 
3705 	int32 tries = BUSY_VNODE_RETRIES;
3706 restart:
3707 	// create the node
3708 	bool nodeCreated;
3709 	struct vnode* vnode;
3710 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3711 		nodeCreated);
3712 	if (status != B_OK)
3713 		return status;
3714 
3715 	WriteLocker nodeLocker(sVnodeLock, true);
3716 		// create_new_vnode_and_lock() has locked for us
3717 
3718 	if (!nodeCreated && vnode->IsBusy()) {
3719 		nodeLocker.Unlock();
3720 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3721 			return B_BUSY;
3722 		goto restart;
3723 	}
3724 
3725 	// file system integrity check:
3726 	// test if the vnode already exists and bail out if this is the case!
3727 	if (!nodeCreated) {
3728 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3729 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3730 			vnode->private_node);
3731 		return B_ERROR;
3732 	}
3733 
3734 	vnode->private_node = privateNode;
3735 	vnode->ops = ops;
3736 	vnode->SetUnpublished(true);
3737 
3738 	TRACE(("returns: %s\n", strerror(status)));
3739 
3740 	return status;
3741 }
3742 
3743 
3744 extern "C" status_t
3745 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3746 	fs_vnode_ops* ops, int type, uint32 flags)
3747 {
3748 	FUNCTION(("publish_vnode()\n"));
3749 
3750 	int32 tries = BUSY_VNODE_RETRIES;
3751 restart:
3752 	WriteLocker locker(sVnodeLock);
3753 
3754 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3755 
3756 	bool nodeCreated = false;
3757 	if (vnode == NULL) {
3758 		if (privateNode == NULL)
3759 			return B_BAD_VALUE;
3760 
3761 		// create the node
3762 		locker.Unlock();
3763 			// create_new_vnode_and_lock() will re-lock for us on success
3764 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3765 			nodeCreated);
3766 		if (status != B_OK)
3767 			return status;
3768 
3769 		locker.SetTo(sVnodeLock, true);
3770 	}
3771 
3772 	if (nodeCreated) {
3773 		vnode->private_node = privateNode;
3774 		vnode->ops = ops;
3775 		vnode->SetUnpublished(true);
3776 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3777 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3778 		// already known, but not published
3779 	} else if (vnode->IsBusy()) {
3780 		locker.Unlock();
3781 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3782 			return B_BUSY;
3783 		goto restart;
3784 	} else
3785 		return B_BAD_VALUE;
3786 
3787 	bool publishSpecialSubNode = false;
3788 
3789 	vnode->SetType(type);
3790 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3791 	publishSpecialSubNode = is_special_node_type(type)
3792 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3793 
3794 	status_t status = B_OK;
3795 
3796 	// create sub vnodes, if necessary
3797 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3798 		locker.Unlock();
3799 
3800 		fs_volume* subVolume = volume;
3801 		if (volume->sub_volume != NULL) {
3802 			while (status == B_OK && subVolume->sub_volume != NULL) {
3803 				subVolume = subVolume->sub_volume;
3804 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3805 					vnode);
3806 			}
3807 		}
3808 
3809 		if (status == B_OK && publishSpecialSubNode)
3810 			status = create_special_sub_node(vnode, flags);
3811 
3812 		if (status != B_OK) {
3813 			// error -- clean up the created sub vnodes
3814 			while (subVolume->super_volume != volume) {
3815 				subVolume = subVolume->super_volume;
3816 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3817 			}
3818 		}
3819 
3820 		if (status == B_OK) {
3821 			ReadLocker vnodesReadLocker(sVnodeLock);
3822 			AutoLocker<Vnode> nodeLocker(vnode);
3823 			vnode->SetBusy(false);
3824 			vnode->SetUnpublished(false);
3825 		} else {
3826 			locker.Lock();
3827 			sVnodeTable->Remove(vnode);
3828 			remove_vnode_from_mount_list(vnode, vnode->mount);
3829 			free(vnode);
3830 		}
3831 	} else {
3832 		// we still hold the write lock -- mark the node unbusy and published
3833 		vnode->SetBusy(false);
3834 		vnode->SetUnpublished(false);
3835 	}
3836 
3837 	TRACE(("returns: %s\n", strerror(status)));
3838 
3839 	return status;
3840 }
3841 
3842 
3843 extern "C" status_t
3844 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3845 {
3846 	struct vnode* vnode;
3847 
3848 	if (volume == NULL)
3849 		return B_BAD_VALUE;
3850 
3851 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3852 	if (status != B_OK)
3853 		return status;
3854 
3855 	// If this is a layered FS, we need to get the node cookie for the requested
3856 	// layer.
3857 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3858 		fs_vnode resolvedNode;
3859 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3860 			&resolvedNode);
3861 		if (status != B_OK) {
3862 			panic("get_vnode(): Failed to get super node for vnode %p, "
3863 				"volume: %p", vnode, volume);
3864 			put_vnode(vnode);
3865 			return status;
3866 		}
3867 
3868 		if (_privateNode != NULL)
3869 			*_privateNode = resolvedNode.private_node;
3870 	} else if (_privateNode != NULL)
3871 		*_privateNode = vnode->private_node;
3872 
3873 	return B_OK;
3874 }
3875 
3876 
3877 extern "C" status_t
3878 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3879 {
3880 	struct vnode* vnode;
3881 
3882 	rw_lock_read_lock(&sVnodeLock);
3883 	vnode = lookup_vnode(volume->id, vnodeID);
3884 	rw_lock_read_unlock(&sVnodeLock);
3885 
3886 	if (vnode == NULL)
3887 		return B_BAD_VALUE;
3888 
3889 	inc_vnode_ref_count(vnode);
3890 	return B_OK;
3891 }
3892 
3893 
3894 extern "C" status_t
3895 put_vnode(fs_volume* volume, ino_t vnodeID)
3896 {
3897 	struct vnode* vnode;
3898 
3899 	rw_lock_read_lock(&sVnodeLock);
3900 	vnode = lookup_vnode(volume->id, vnodeID);
3901 	rw_lock_read_unlock(&sVnodeLock);
3902 
3903 	if (vnode == NULL)
3904 		return B_BAD_VALUE;
3905 
3906 	dec_vnode_ref_count(vnode, false, true);
3907 	return B_OK;
3908 }
3909 
3910 
3911 extern "C" status_t
3912 remove_vnode(fs_volume* volume, ino_t vnodeID)
3913 {
3914 	ReadLocker locker(sVnodeLock);
3915 
3916 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3917 	if (vnode == NULL)
3918 		return B_ENTRY_NOT_FOUND;
3919 
3920 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3921 		// this vnode is in use
3922 		return B_BUSY;
3923 	}
3924 
3925 	vnode->Lock();
3926 
3927 	vnode->SetRemoved(true);
3928 	bool removeUnpublished = false;
3929 
3930 	if (vnode->IsUnpublished()) {
3931 		// prepare the vnode for deletion
3932 		removeUnpublished = true;
3933 		vnode->SetBusy(true);
3934 	}
3935 
3936 	vnode->Unlock();
3937 	locker.Unlock();
3938 
3939 	if (removeUnpublished) {
3940 		// If the vnode hasn't been published yet, we delete it here
3941 		atomic_add(&vnode->ref_count, -1);
3942 		free_vnode(vnode, true);
3943 	}
3944 
3945 	return B_OK;
3946 }
3947 
3948 
3949 extern "C" status_t
3950 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3951 {
3952 	struct vnode* vnode;
3953 
3954 	rw_lock_read_lock(&sVnodeLock);
3955 
3956 	vnode = lookup_vnode(volume->id, vnodeID);
3957 	if (vnode) {
3958 		AutoLocker<Vnode> nodeLocker(vnode);
3959 		vnode->SetRemoved(false);
3960 	}
3961 
3962 	rw_lock_read_unlock(&sVnodeLock);
3963 	return B_OK;
3964 }
3965 
3966 
3967 extern "C" status_t
3968 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3969 {
3970 	ReadLocker _(sVnodeLock);
3971 
3972 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3973 		if (_removed != NULL)
3974 			*_removed = vnode->IsRemoved();
3975 		return B_OK;
3976 	}
3977 
3978 	return B_BAD_VALUE;
3979 }
3980 
3981 
3982 extern "C" fs_volume*
3983 volume_for_vnode(fs_vnode* _vnode)
3984 {
3985 	if (_vnode == NULL)
3986 		return NULL;
3987 
3988 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3989 	return vnode->mount->volume;
3990 }
3991 
3992 
3993 extern "C" status_t
3994 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3995 	uid_t nodeUserID)
3996 {
3997 	// get node permissions
3998 	int userPermissions = (mode & S_IRWXU) >> 6;
3999 	int groupPermissions = (mode & S_IRWXG) >> 3;
4000 	int otherPermissions = mode & S_IRWXO;
4001 
4002 	// get the node permissions for this uid/gid
4003 	int permissions = 0;
4004 	uid_t uid = geteuid();
4005 
4006 	if (uid == 0) {
4007 		// user is root
4008 		// root has always read/write permission, but at least one of the
4009 		// X bits must be set for execute permission
4010 		permissions = userPermissions | groupPermissions | otherPermissions
4011 			| S_IROTH | S_IWOTH;
4012 		if (S_ISDIR(mode))
4013 			permissions |= S_IXOTH;
4014 	} else if (uid == nodeUserID) {
4015 		// user is node owner
4016 		permissions = userPermissions;
4017 	} else if (is_user_in_group(nodeGroupID)) {
4018 		// user is in owning group
4019 		permissions = groupPermissions;
4020 	} else {
4021 		// user is one of the others
4022 		permissions = otherPermissions;
4023 	}
4024 
4025 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4026 }
4027 
4028 
4029 #if 0
4030 extern "C" status_t
4031 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4032 	size_t* _numBytes)
4033 {
4034 	struct file_descriptor* descriptor;
4035 	struct vnode* vnode;
4036 
4037 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4038 	if (descriptor == NULL)
4039 		return B_FILE_ERROR;
4040 
4041 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4042 		count, 0, _numBytes);
4043 
4044 	put_fd(descriptor);
4045 	return status;
4046 }
4047 
4048 
4049 extern "C" status_t
4050 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4051 	size_t* _numBytes)
4052 {
4053 	struct file_descriptor* descriptor;
4054 	struct vnode* vnode;
4055 
4056 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4057 	if (descriptor == NULL)
4058 		return B_FILE_ERROR;
4059 
4060 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4061 		count, 0, _numBytes);
4062 
4063 	put_fd(descriptor);
4064 	return status;
4065 }
4066 #endif
4067 
4068 
4069 extern "C" status_t
4070 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4071 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4072 	size_t* _bytes)
4073 {
4074 	struct file_descriptor* descriptor;
4075 	struct vnode* vnode;
4076 
4077 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4078 	if (descriptor == NULL)
4079 		return B_FILE_ERROR;
4080 
4081 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4082 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4083 		false);
4084 
4085 	put_fd(descriptor);
4086 	return status;
4087 }
4088 
4089 
4090 extern "C" status_t
4091 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4092 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4093 	size_t* _bytes)
4094 {
4095 	struct file_descriptor* descriptor;
4096 	struct vnode* vnode;
4097 
4098 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4099 	if (descriptor == NULL)
4100 		return B_FILE_ERROR;
4101 
4102 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4103 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4104 		true);
4105 
4106 	put_fd(descriptor);
4107 	return status;
4108 }
4109 
4110 
4111 extern "C" status_t
4112 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4113 {
4114 	// lookup mount -- the caller is required to make sure that the mount
4115 	// won't go away
4116 	MutexLocker locker(sMountMutex);
4117 	struct fs_mount* mount = find_mount(mountID);
4118 	if (mount == NULL)
4119 		return B_BAD_VALUE;
4120 	locker.Unlock();
4121 
4122 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4123 }
4124 
4125 
4126 extern "C" status_t
4127 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4128 {
4129 	// lookup mount -- the caller is required to make sure that the mount
4130 	// won't go away
4131 	MutexLocker locker(sMountMutex);
4132 	struct fs_mount* mount = find_mount(mountID);
4133 	if (mount == NULL)
4134 		return B_BAD_VALUE;
4135 	locker.Unlock();
4136 
4137 	return mount->entry_cache.Add(dirID, name, -1, true);
4138 }
4139 
4140 
4141 extern "C" status_t
4142 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4143 {
4144 	// lookup mount -- the caller is required to make sure that the mount
4145 	// won't go away
4146 	MutexLocker locker(sMountMutex);
4147 	struct fs_mount* mount = find_mount(mountID);
4148 	if (mount == NULL)
4149 		return B_BAD_VALUE;
4150 	locker.Unlock();
4151 
4152 	return mount->entry_cache.Remove(dirID, name);
4153 }
4154 
4155 
4156 //	#pragma mark - private VFS API
4157 //	Functions the VFS exports for other parts of the kernel
4158 
4159 
4160 /*! Acquires another reference to the vnode that has to be released
4161 	by calling vfs_put_vnode().
4162 */
4163 void
4164 vfs_acquire_vnode(struct vnode* vnode)
4165 {
4166 	inc_vnode_ref_count(vnode);
4167 }
4168 
4169 
4170 /*! This is currently called from file_cache_create() only.
4171 	It's probably a temporary solution as long as devfs requires that
4172 	fs_read_pages()/fs_write_pages() are called with the standard
4173 	open cookie and not with a device cookie.
4174 	If that's done differently, remove this call; it has no other
4175 	purpose.
4176 */
4177 extern "C" status_t
4178 vfs_get_cookie_from_fd(int fd, void** _cookie)
4179 {
4180 	struct file_descriptor* descriptor;
4181 
4182 	descriptor = get_fd(get_current_io_context(true), fd);
4183 	if (descriptor == NULL)
4184 		return B_FILE_ERROR;
4185 
4186 	*_cookie = descriptor->cookie;
4187 	return B_OK;
4188 }
4189 
4190 
4191 extern "C" status_t
4192 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4193 {
4194 	*vnode = get_vnode_from_fd(fd, kernel);
4195 
4196 	if (*vnode == NULL)
4197 		return B_FILE_ERROR;
4198 
4199 	return B_NO_ERROR;
4200 }
4201 
4202 
4203 extern "C" status_t
4204 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4205 {
4206 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4207 		path, kernel));
4208 
4209 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4210 	if (pathBuffer.InitCheck() != B_OK)
4211 		return B_NO_MEMORY;
4212 
4213 	char* buffer = pathBuffer.LockBuffer();
4214 	strlcpy(buffer, path, pathBuffer.BufferSize());
4215 
4216 	struct vnode* vnode;
4217 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4218 	if (status != B_OK)
4219 		return status;
4220 
4221 	*_vnode = vnode;
4222 	return B_OK;
4223 }
4224 
4225 
4226 extern "C" status_t
4227 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4228 {
4229 	struct vnode* vnode = NULL;
4230 
4231 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4232 	if (status != B_OK)
4233 		return status;
4234 
4235 	*_vnode = vnode;
4236 	return B_OK;
4237 }
4238 
4239 
4240 extern "C" status_t
4241 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4242 	const char* name, struct vnode** _vnode)
4243 {
4244 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4245 }
4246 
4247 
4248 extern "C" void
4249 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4250 {
4251 	*_mountID = vnode->device;
4252 	*_vnodeID = vnode->id;
4253 }
4254 
4255 
4256 /*!
4257 	Helper function abstracting the process of "converting" a given
4258 	vnode-pointer to a fs_vnode-pointer.
4259 	Currently only used in bindfs.
4260 */
4261 extern "C" fs_vnode*
4262 vfs_fsnode_for_vnode(struct vnode* vnode)
4263 {
4264 	return vnode;
4265 }
4266 
4267 
4268 /*!
4269 	Calls fs_open() on the given vnode and returns a new
4270 	file descriptor for it
4271 */
4272 int
4273 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4274 {
4275 	return open_vnode(vnode, openMode, kernel);
4276 }
4277 
4278 
4279 /*!	Looks up a vnode with the given mount and vnode ID.
4280 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4281 	to the node.
4282 	It's currently only be used by file_cache_create().
4283 */
4284 extern "C" status_t
4285 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4286 {
4287 	rw_lock_read_lock(&sVnodeLock);
4288 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4289 	rw_lock_read_unlock(&sVnodeLock);
4290 
4291 	if (vnode == NULL)
4292 		return B_ERROR;
4293 
4294 	*_vnode = vnode;
4295 	return B_OK;
4296 }
4297 
4298 
4299 extern "C" status_t
4300 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4301 	bool traverseLeafLink, bool kernel, void** _node)
4302 {
4303 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4304 		volume, path, kernel));
4305 
4306 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4307 	if (pathBuffer.InitCheck() != B_OK)
4308 		return B_NO_MEMORY;
4309 
4310 	fs_mount* mount;
4311 	status_t status = get_mount(volume->id, &mount);
4312 	if (status != B_OK)
4313 		return status;
4314 
4315 	char* buffer = pathBuffer.LockBuffer();
4316 	strlcpy(buffer, path, pathBuffer.BufferSize());
4317 
4318 	struct vnode* vnode = mount->root_vnode;
4319 
4320 	if (buffer[0] == '/')
4321 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4322 	else {
4323 		inc_vnode_ref_count(vnode);
4324 			// vnode_path_to_vnode() releases a reference to the starting vnode
4325 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4326 			kernel, &vnode, NULL);
4327 	}
4328 
4329 	put_mount(mount);
4330 
4331 	if (status != B_OK)
4332 		return status;
4333 
4334 	if (vnode->device != volume->id) {
4335 		// wrong mount ID - must not gain access on foreign file system nodes
4336 		put_vnode(vnode);
4337 		return B_BAD_VALUE;
4338 	}
4339 
4340 	// Use get_vnode() to resolve the cookie for the right layer.
4341 	status = get_vnode(volume, vnode->id, _node);
4342 	put_vnode(vnode);
4343 
4344 	return status;
4345 }
4346 
4347 
4348 status_t
4349 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4350 	struct stat* stat, bool kernel)
4351 {
4352 	status_t status;
4353 
4354 	if (path != NULL) {
4355 		// path given: get the stat of the node referred to by (fd, path)
4356 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
4357 		if (pathBuffer.InitCheck() != B_OK)
4358 			return B_NO_MEMORY;
4359 
4360 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4361 			traverseLeafLink, stat, kernel);
4362 	} else {
4363 		// no path given: get the FD and use the FD operation
4364 		struct file_descriptor* descriptor
4365 			= get_fd(get_current_io_context(kernel), fd);
4366 		if (descriptor == NULL)
4367 			return B_FILE_ERROR;
4368 
4369 		if (descriptor->ops->fd_read_stat)
4370 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4371 		else
4372 			status = B_UNSUPPORTED;
4373 
4374 		put_fd(descriptor);
4375 	}
4376 
4377 	return status;
4378 }
4379 
4380 
4381 /*!	Finds the full path to the file that contains the module \a moduleName,
4382 	puts it into \a pathBuffer, and returns B_OK for success.
4383 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4384 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4385 	\a pathBuffer is clobbered in any case and must not be relied on if this
4386 	functions returns unsuccessfully.
4387 	\a basePath and \a pathBuffer must not point to the same space.
4388 */
4389 status_t
4390 vfs_get_module_path(const char* basePath, const char* moduleName,
4391 	char* pathBuffer, size_t bufferSize)
4392 {
4393 	struct vnode* dir;
4394 	struct vnode* file;
4395 	status_t status;
4396 	size_t length;
4397 	char* path;
4398 
4399 	if (bufferSize == 0
4400 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4401 		return B_BUFFER_OVERFLOW;
4402 
4403 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4404 	if (status != B_OK)
4405 		return status;
4406 
4407 	// the path buffer had been clobbered by the above call
4408 	length = strlcpy(pathBuffer, basePath, bufferSize);
4409 	if (pathBuffer[length - 1] != '/')
4410 		pathBuffer[length++] = '/';
4411 
4412 	path = pathBuffer + length;
4413 	bufferSize -= length;
4414 
4415 	while (moduleName) {
4416 		char* nextPath = strchr(moduleName, '/');
4417 		if (nextPath == NULL)
4418 			length = strlen(moduleName);
4419 		else {
4420 			length = nextPath - moduleName;
4421 			nextPath++;
4422 		}
4423 
4424 		if (length + 1 >= bufferSize) {
4425 			status = B_BUFFER_OVERFLOW;
4426 			goto err;
4427 		}
4428 
4429 		memcpy(path, moduleName, length);
4430 		path[length] = '\0';
4431 		moduleName = nextPath;
4432 
4433 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4434 		if (status != B_OK) {
4435 			// vnode_path_to_vnode() has already released the reference to dir
4436 			return status;
4437 		}
4438 
4439 		if (S_ISDIR(file->Type())) {
4440 			// goto the next directory
4441 			path[length] = '/';
4442 			path[length + 1] = '\0';
4443 			path += length + 1;
4444 			bufferSize -= length + 1;
4445 
4446 			dir = file;
4447 		} else if (S_ISREG(file->Type())) {
4448 			// it's a file so it should be what we've searched for
4449 			put_vnode(file);
4450 
4451 			return B_OK;
4452 		} else {
4453 			TRACE(("vfs_get_module_path(): something is strange here: "
4454 				"0x%08" B_PRIx32 "...\n", file->Type()));
4455 			status = B_ERROR;
4456 			dir = file;
4457 			goto err;
4458 		}
4459 	}
4460 
4461 	// if we got here, the moduleName just pointed to a directory, not to
4462 	// a real module - what should we do in this case?
4463 	status = B_ENTRY_NOT_FOUND;
4464 
4465 err:
4466 	put_vnode(dir);
4467 	return status;
4468 }
4469 
4470 
4471 /*!	\brief Normalizes a given path.
4472 
4473 	The path must refer to an existing or non-existing entry in an existing
4474 	directory, that is chopping off the leaf component the remaining path must
4475 	refer to an existing directory.
4476 
4477 	The returned will be canonical in that it will be absolute, will not
4478 	contain any "." or ".." components or duplicate occurrences of '/'s,
4479 	and none of the directory components will by symbolic links.
4480 
4481 	Any two paths referring to the same entry, will result in the same
4482 	normalized path (well, that is pretty much the definition of `normalized',
4483 	isn't it :-).
4484 
4485 	\param path The path to be normalized.
4486 	\param buffer The buffer into which the normalized path will be written.
4487 		   May be the same one as \a path.
4488 	\param bufferSize The size of \a buffer.
4489 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4490 	\param kernel \c true, if the IO context of the kernel shall be used,
4491 		   otherwise that of the team this thread belongs to. Only relevant,
4492 		   if the path is relative (to get the CWD).
4493 	\return \c B_OK if everything went fine, another error code otherwise.
4494 */
4495 status_t
4496 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4497 	bool traverseLink, bool kernel)
4498 {
4499 	if (!path || !buffer || bufferSize < 1)
4500 		return B_BAD_VALUE;
4501 
4502 	if (path != buffer) {
4503 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4504 			return B_BUFFER_OVERFLOW;
4505 	}
4506 
4507 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4508 }
4509 
4510 
4511 /*!	\brief Gets the parent of the passed in node.
4512 
4513 	Gets the parent of the passed in node, and correctly resolves covered
4514 	nodes.
4515 */
4516 extern "C" status_t
4517 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4518 {
4519 	return resolve_covered_parent(parent, device, node,
4520 		get_current_io_context(true));
4521 }
4522 
4523 
4524 /*!	\brief Creates a special node in the file system.
4525 
4526 	The caller gets a reference to the newly created node (which is passed
4527 	back through \a _createdVnode) and is responsible for releasing it.
4528 
4529 	\param path The path where to create the entry for the node. Can be \c NULL,
4530 		in which case the node is created without an entry in the root FS -- it
4531 		will automatically be deleted when the last reference has been released.
4532 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4533 		the target file system will just create the node with its standard
4534 		operations. Depending on the type of the node a subnode might be created
4535 		automatically, though.
4536 	\param mode The type and permissions for the node to be created.
4537 	\param flags Flags to be passed to the creating FS.
4538 	\param kernel \c true, if called in the kernel context (relevant only if
4539 		\a path is not \c NULL and not absolute).
4540 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4541 		file system creating the node, with the private data pointer and
4542 		operations for the super node. Can be \c NULL.
4543 	\param _createVnode Pointer to pre-allocated storage where to store the
4544 		pointer to the newly created node.
4545 	\return \c B_OK, if everything went fine, another error code otherwise.
4546 */
4547 status_t
4548 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4549 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4550 	struct vnode** _createdVnode)
4551 {
4552 	struct vnode* dirNode;
4553 	char _leaf[B_FILE_NAME_LENGTH];
4554 	char* leaf = NULL;
4555 
4556 	if (path) {
4557 		// We've got a path. Get the dir vnode and the leaf name.
4558 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4559 		if (tmpPathBuffer.InitCheck() != B_OK)
4560 			return B_NO_MEMORY;
4561 
4562 		char* tmpPath = tmpPathBuffer.LockBuffer();
4563 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4564 			return B_NAME_TOO_LONG;
4565 
4566 		// get the dir vnode and the leaf name
4567 		leaf = _leaf;
4568 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4569 		if (error != B_OK)
4570 			return error;
4571 	} else {
4572 		// No path. Create the node in the root FS.
4573 		dirNode = sRoot;
4574 		inc_vnode_ref_count(dirNode);
4575 	}
4576 
4577 	VNodePutter _(dirNode);
4578 
4579 	// check support for creating special nodes
4580 	if (!HAS_FS_CALL(dirNode, create_special_node))
4581 		return B_UNSUPPORTED;
4582 
4583 	// create the node
4584 	fs_vnode superVnode;
4585 	ino_t nodeID;
4586 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4587 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4588 	if (status != B_OK)
4589 		return status;
4590 
4591 	// lookup the node
4592 	rw_lock_read_lock(&sVnodeLock);
4593 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4594 	rw_lock_read_unlock(&sVnodeLock);
4595 
4596 	if (*_createdVnode == NULL) {
4597 		panic("vfs_create_special_node(): lookup of node failed");
4598 		return B_ERROR;
4599 	}
4600 
4601 	return B_OK;
4602 }
4603 
4604 
4605 extern "C" void
4606 vfs_put_vnode(struct vnode* vnode)
4607 {
4608 	put_vnode(vnode);
4609 }
4610 
4611 
4612 extern "C" status_t
4613 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4614 {
4615 	// Get current working directory from io context
4616 	struct io_context* context = get_current_io_context(false);
4617 	status_t status = B_OK;
4618 
4619 	mutex_lock(&context->io_mutex);
4620 
4621 	if (context->cwd != NULL) {
4622 		*_mountID = context->cwd->device;
4623 		*_vnodeID = context->cwd->id;
4624 	} else
4625 		status = B_ERROR;
4626 
4627 	mutex_unlock(&context->io_mutex);
4628 	return status;
4629 }
4630 
4631 
4632 status_t
4633 vfs_unmount(dev_t mountID, uint32 flags)
4634 {
4635 	return fs_unmount(NULL, mountID, flags, true);
4636 }
4637 
4638 
4639 extern "C" status_t
4640 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4641 {
4642 	struct vnode* vnode;
4643 
4644 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4645 	if (status != B_OK)
4646 		return status;
4647 
4648 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4649 	put_vnode(vnode);
4650 	return B_OK;
4651 }
4652 
4653 
4654 extern "C" void
4655 vfs_free_unused_vnodes(int32 level)
4656 {
4657 	vnode_low_resource_handler(NULL,
4658 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4659 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4660 		level);
4661 }
4662 
4663 
4664 extern "C" bool
4665 vfs_can_page(struct vnode* vnode, void* cookie)
4666 {
4667 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4668 
4669 	if (HAS_FS_CALL(vnode, can_page))
4670 		return FS_CALL(vnode, can_page, cookie);
4671 	return false;
4672 }
4673 
4674 
4675 extern "C" status_t
4676 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4677 	const generic_io_vec* vecs, size_t count, uint32 flags,
4678 	generic_size_t* _numBytes)
4679 {
4680 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4681 		vecs, pos));
4682 
4683 #if VFS_PAGES_IO_TRACING
4684 	generic_size_t bytesRequested = *_numBytes;
4685 #endif
4686 
4687 	IORequest request;
4688 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4689 	if (status == B_OK) {
4690 		status = vfs_vnode_io(vnode, cookie, &request);
4691 		if (status == B_OK)
4692 			status = request.Wait();
4693 		*_numBytes = request.TransferredBytes();
4694 	}
4695 
4696 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4697 		status, *_numBytes));
4698 
4699 	return status;
4700 }
4701 
4702 
4703 extern "C" status_t
4704 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4705 	const generic_io_vec* vecs, size_t count, uint32 flags,
4706 	generic_size_t* _numBytes)
4707 {
4708 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4709 		vecs, pos));
4710 
4711 #if VFS_PAGES_IO_TRACING
4712 	generic_size_t bytesRequested = *_numBytes;
4713 #endif
4714 
4715 	IORequest request;
4716 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4717 	if (status == B_OK) {
4718 		status = vfs_vnode_io(vnode, cookie, &request);
4719 		if (status == B_OK)
4720 			status = request.Wait();
4721 		*_numBytes = request.TransferredBytes();
4722 	}
4723 
4724 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4725 		status, *_numBytes));
4726 
4727 	return status;
4728 }
4729 
4730 
4731 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4732 	created if \a allocate is \c true.
4733 	In case it's successful, it will also grab a reference to the cache
4734 	it returns.
4735 */
4736 extern "C" status_t
4737 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4738 {
4739 	if (vnode->cache != NULL) {
4740 		vnode->cache->AcquireRef();
4741 		*_cache = vnode->cache;
4742 		return B_OK;
4743 	}
4744 
4745 	rw_lock_read_lock(&sVnodeLock);
4746 	vnode->Lock();
4747 
4748 	status_t status = B_OK;
4749 
4750 	// The cache could have been created in the meantime
4751 	if (vnode->cache == NULL) {
4752 		if (allocate) {
4753 			// TODO: actually the vnode needs to be busy already here, or
4754 			//	else this won't work...
4755 			bool wasBusy = vnode->IsBusy();
4756 			vnode->SetBusy(true);
4757 
4758 			vnode->Unlock();
4759 			rw_lock_read_unlock(&sVnodeLock);
4760 
4761 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4762 
4763 			rw_lock_read_lock(&sVnodeLock);
4764 			vnode->Lock();
4765 			vnode->SetBusy(wasBusy);
4766 		} else
4767 			status = B_BAD_VALUE;
4768 	}
4769 
4770 	vnode->Unlock();
4771 	rw_lock_read_unlock(&sVnodeLock);
4772 
4773 	if (status == B_OK) {
4774 		vnode->cache->AcquireRef();
4775 		*_cache = vnode->cache;
4776 	}
4777 
4778 	return status;
4779 }
4780 
4781 
4782 status_t
4783 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4784 	file_io_vec* vecs, size_t* _count)
4785 {
4786 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4787 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4788 
4789 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4790 }
4791 
4792 
4793 status_t
4794 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4795 {
4796 	status_t status = FS_CALL(vnode, read_stat, stat);
4797 
4798 	// fill in the st_dev and st_ino fields
4799 	if (status == B_OK) {
4800 		stat->st_dev = vnode->device;
4801 		stat->st_ino = vnode->id;
4802 		// the rdev field must stay unset for non-special files
4803 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4804 			stat->st_rdev = -1;
4805 	}
4806 
4807 	return status;
4808 }
4809 
4810 
4811 status_t
4812 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4813 {
4814 	struct vnode* vnode;
4815 	status_t status = get_vnode(device, inode, &vnode, true, false);
4816 	if (status != B_OK)
4817 		return status;
4818 
4819 	status = vfs_stat_vnode(vnode, stat);
4820 
4821 	put_vnode(vnode);
4822 	return status;
4823 }
4824 
4825 
4826 status_t
4827 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4828 {
4829 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4830 }
4831 
4832 
4833 status_t
4834 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4835 	bool kernel, char* path, size_t pathLength)
4836 {
4837 	struct vnode* vnode;
4838 	status_t status;
4839 
4840 	// filter invalid leaf names
4841 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4842 		return B_BAD_VALUE;
4843 
4844 	// get the vnode matching the dir's node_ref
4845 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4846 		// special cases "." and "..": we can directly get the vnode of the
4847 		// referenced directory
4848 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4849 		leaf = NULL;
4850 	} else
4851 		status = get_vnode(device, inode, &vnode, true, false);
4852 	if (status != B_OK)
4853 		return status;
4854 
4855 	// get the directory path
4856 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4857 	put_vnode(vnode);
4858 		// we don't need the vnode anymore
4859 	if (status != B_OK)
4860 		return status;
4861 
4862 	// append the leaf name
4863 	if (leaf) {
4864 		// insert a directory separator if this is not the file system root
4865 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4866 				>= pathLength)
4867 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4868 			return B_NAME_TOO_LONG;
4869 		}
4870 	}
4871 
4872 	return B_OK;
4873 }
4874 
4875 
4876 /*!	If the given descriptor locked its vnode, that lock will be released. */
4877 void
4878 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4879 {
4880 	struct vnode* vnode = fd_vnode(descriptor);
4881 
4882 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4883 		vnode->mandatory_locked_by = NULL;
4884 }
4885 
4886 
4887 /*!	Releases any POSIX locks on the file descriptor. */
4888 status_t
4889 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4890 {
4891 	struct vnode* vnode = descriptor->u.vnode;
4892 
4893 	if (HAS_FS_CALL(vnode, release_lock))
4894 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4895 
4896 	return release_advisory_lock(vnode, context, NULL, NULL);
4897 }
4898 
4899 
4900 /*!	Closes all file descriptors of the specified I/O context that
4901 	have the O_CLOEXEC flag set.
4902 */
4903 void
4904 vfs_exec_io_context(io_context* context)
4905 {
4906 	uint32 i;
4907 
4908 	for (i = 0; i < context->table_size; i++) {
4909 		mutex_lock(&context->io_mutex);
4910 
4911 		struct file_descriptor* descriptor = context->fds[i];
4912 		bool remove = false;
4913 
4914 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4915 			context->fds[i] = NULL;
4916 			context->num_used_fds--;
4917 
4918 			remove = true;
4919 		}
4920 
4921 		mutex_unlock(&context->io_mutex);
4922 
4923 		if (remove) {
4924 			close_fd(context, descriptor);
4925 			put_fd(descriptor);
4926 		}
4927 	}
4928 }
4929 
4930 
4931 /*! Sets up a new io_control structure, and inherits the properties
4932 	of the parent io_control if it is given.
4933 */
4934 io_context*
4935 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4936 {
4937 	io_context* context = (io_context*)malloc(sizeof(io_context));
4938 	if (context == NULL)
4939 		return NULL;
4940 
4941 	TIOC(NewIOContext(context, parentContext));
4942 
4943 	memset(context, 0, sizeof(io_context));
4944 	context->ref_count = 1;
4945 
4946 	MutexLocker parentLocker;
4947 
4948 	size_t tableSize;
4949 	if (parentContext != NULL) {
4950 		parentLocker.SetTo(parentContext->io_mutex, false);
4951 		tableSize = parentContext->table_size;
4952 	} else
4953 		tableSize = DEFAULT_FD_TABLE_SIZE;
4954 
4955 	// allocate space for FDs and their close-on-exec flag
4956 	context->fds = (file_descriptor**)malloc(
4957 		sizeof(struct file_descriptor*) * tableSize
4958 		+ sizeof(struct select_sync*) * tableSize
4959 		+ (tableSize + 7) / 8);
4960 	if (context->fds == NULL) {
4961 		free(context);
4962 		return NULL;
4963 	}
4964 
4965 	context->select_infos = (select_info**)(context->fds + tableSize);
4966 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4967 
4968 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4969 		+ sizeof(struct select_sync*) * tableSize
4970 		+ (tableSize + 7) / 8);
4971 
4972 	mutex_init(&context->io_mutex, "I/O context");
4973 
4974 	// Copy all parent file descriptors
4975 
4976 	if (parentContext != NULL) {
4977 		size_t i;
4978 
4979 		mutex_lock(&sIOContextRootLock);
4980 		context->root = parentContext->root;
4981 		if (context->root)
4982 			inc_vnode_ref_count(context->root);
4983 		mutex_unlock(&sIOContextRootLock);
4984 
4985 		context->cwd = parentContext->cwd;
4986 		if (context->cwd)
4987 			inc_vnode_ref_count(context->cwd);
4988 
4989 		if (parentContext->inherit_fds) {
4990 			for (i = 0; i < tableSize; i++) {
4991 				struct file_descriptor* descriptor = parentContext->fds[i];
4992 
4993 				if (descriptor != NULL
4994 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4995 					bool closeOnExec = fd_close_on_exec(parentContext, i);
4996 					if (closeOnExec && purgeCloseOnExec)
4997 						continue;
4998 
4999 					TFD(InheritFD(context, i, descriptor, parentContext));
5000 
5001 					context->fds[i] = descriptor;
5002 					context->num_used_fds++;
5003 					atomic_add(&descriptor->ref_count, 1);
5004 					atomic_add(&descriptor->open_count, 1);
5005 
5006 					if (closeOnExec)
5007 						fd_set_close_on_exec(context, i, true);
5008 				}
5009 			}
5010 		}
5011 
5012 		parentLocker.Unlock();
5013 	} else {
5014 		context->root = sRoot;
5015 		context->cwd = sRoot;
5016 
5017 		if (context->root)
5018 			inc_vnode_ref_count(context->root);
5019 
5020 		if (context->cwd)
5021 			inc_vnode_ref_count(context->cwd);
5022 	}
5023 
5024 	context->table_size = tableSize;
5025 	context->inherit_fds = parentContext != NULL;
5026 
5027 	list_init(&context->node_monitors);
5028 	context->max_monitors = DEFAULT_NODE_MONITORS;
5029 
5030 	return context;
5031 }
5032 
5033 
5034 void
5035 vfs_get_io_context(io_context* context)
5036 {
5037 	atomic_add(&context->ref_count, 1);
5038 }
5039 
5040 
5041 void
5042 vfs_put_io_context(io_context* context)
5043 {
5044 	if (atomic_add(&context->ref_count, -1) == 1)
5045 		free_io_context(context);
5046 }
5047 
5048 
5049 status_t
5050 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5051 {
5052 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5053 		return B_BAD_VALUE;
5054 
5055 	TIOC(ResizeIOContext(context, newSize));
5056 
5057 	MutexLocker _(context->io_mutex);
5058 
5059 	uint32 oldSize = context->table_size;
5060 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5061 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5062 
5063 	// If the tables shrink, make sure none of the fds being dropped are in use.
5064 	if (newSize < oldSize) {
5065 		for (uint32 i = oldSize; i-- > newSize;) {
5066 			if (context->fds[i])
5067 				return B_BUSY;
5068 		}
5069 	}
5070 
5071 	// store pointers to the old tables
5072 	file_descriptor** oldFDs = context->fds;
5073 	select_info** oldSelectInfos = context->select_infos;
5074 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5075 
5076 	// allocate new tables
5077 	file_descriptor** newFDs = (file_descriptor**)malloc(
5078 		sizeof(struct file_descriptor*) * newSize
5079 		+ sizeof(struct select_sync*) * newSize
5080 		+ newCloseOnExitBitmapSize);
5081 	if (newFDs == NULL)
5082 		return B_NO_MEMORY;
5083 
5084 	context->fds = newFDs;
5085 	context->select_infos = (select_info**)(context->fds + newSize);
5086 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5087 	context->table_size = newSize;
5088 
5089 	// copy entries from old tables
5090 	uint32 toCopy = min_c(oldSize, newSize);
5091 
5092 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5093 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5094 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5095 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5096 
5097 	// clear additional entries, if the tables grow
5098 	if (newSize > oldSize) {
5099 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5100 		memset(context->select_infos + oldSize, 0,
5101 			sizeof(void*) * (newSize - oldSize));
5102 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5103 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5104 	}
5105 
5106 	free(oldFDs);
5107 
5108 	return B_OK;
5109 }
5110 
5111 
5112 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5113 
5114 	Given an arbitrary vnode (identified by mount and node ID), the function
5115 	checks, whether the vnode is covered by another vnode. If it is, the
5116 	function returns the mount and node ID of the covering vnode. Otherwise
5117 	it simply returns the supplied mount and node ID.
5118 
5119 	In case of error (e.g. the supplied node could not be found) the variables
5120 	for storing the resolved mount and node ID remain untouched and an error
5121 	code is returned.
5122 
5123 	\param mountID The mount ID of the vnode in question.
5124 	\param nodeID The node ID of the vnode in question.
5125 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5126 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5127 	\return
5128 	- \c B_OK, if everything went fine,
5129 	- another error code, if something went wrong.
5130 */
5131 status_t
5132 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5133 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5134 {
5135 	// get the node
5136 	struct vnode* node;
5137 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5138 	if (error != B_OK)
5139 		return error;
5140 
5141 	// resolve the node
5142 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5143 		put_vnode(node);
5144 		node = coveringNode;
5145 	}
5146 
5147 	// set the return values
5148 	*resolvedMountID = node->device;
5149 	*resolvedNodeID = node->id;
5150 
5151 	put_vnode(node);
5152 
5153 	return B_OK;
5154 }
5155 
5156 
5157 status_t
5158 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5159 	ino_t* _mountPointNodeID)
5160 {
5161 	ReadLocker nodeLocker(sVnodeLock);
5162 	MutexLocker mountLocker(sMountMutex);
5163 
5164 	struct fs_mount* mount = find_mount(mountID);
5165 	if (mount == NULL)
5166 		return B_BAD_VALUE;
5167 
5168 	Vnode* mountPoint = mount->covers_vnode;
5169 
5170 	*_mountPointMountID = mountPoint->device;
5171 	*_mountPointNodeID = mountPoint->id;
5172 
5173 	return B_OK;
5174 }
5175 
5176 
5177 status_t
5178 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5179 	ino_t coveredNodeID)
5180 {
5181 	// get the vnodes
5182 	Vnode* vnode;
5183 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5184 	if (error != B_OK)
5185 		return B_BAD_VALUE;
5186 	VNodePutter vnodePutter(vnode);
5187 
5188 	Vnode* coveredVnode;
5189 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5190 		false);
5191 	if (error != B_OK)
5192 		return B_BAD_VALUE;
5193 	VNodePutter coveredVnodePutter(coveredVnode);
5194 
5195 	// establish the covered/covering links
5196 	WriteLocker locker(sVnodeLock);
5197 
5198 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5199 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5200 		return B_BUSY;
5201 	}
5202 
5203 	vnode->covers = coveredVnode;
5204 	vnode->SetCovering(true);
5205 
5206 	coveredVnode->covered_by = vnode;
5207 	coveredVnode->SetCovered(true);
5208 
5209 	// the vnodes do now reference each other
5210 	inc_vnode_ref_count(vnode);
5211 	inc_vnode_ref_count(coveredVnode);
5212 
5213 	return B_OK;
5214 }
5215 
5216 
5217 int
5218 vfs_getrlimit(int resource, struct rlimit* rlp)
5219 {
5220 	if (!rlp)
5221 		return B_BAD_ADDRESS;
5222 
5223 	switch (resource) {
5224 		case RLIMIT_NOFILE:
5225 		{
5226 			struct io_context* context = get_current_io_context(false);
5227 			MutexLocker _(context->io_mutex);
5228 
5229 			rlp->rlim_cur = context->table_size;
5230 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5231 			return 0;
5232 		}
5233 
5234 		case RLIMIT_NOVMON:
5235 		{
5236 			struct io_context* context = get_current_io_context(false);
5237 			MutexLocker _(context->io_mutex);
5238 
5239 			rlp->rlim_cur = context->max_monitors;
5240 			rlp->rlim_max = MAX_NODE_MONITORS;
5241 			return 0;
5242 		}
5243 
5244 		default:
5245 			return B_BAD_VALUE;
5246 	}
5247 }
5248 
5249 
5250 int
5251 vfs_setrlimit(int resource, const struct rlimit* rlp)
5252 {
5253 	if (!rlp)
5254 		return B_BAD_ADDRESS;
5255 
5256 	switch (resource) {
5257 		case RLIMIT_NOFILE:
5258 			/* TODO: check getuid() */
5259 			if (rlp->rlim_max != RLIM_SAVED_MAX
5260 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5261 				return B_NOT_ALLOWED;
5262 
5263 			return vfs_resize_fd_table(get_current_io_context(false),
5264 				rlp->rlim_cur);
5265 
5266 		case RLIMIT_NOVMON:
5267 			/* TODO: check getuid() */
5268 			if (rlp->rlim_max != RLIM_SAVED_MAX
5269 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5270 				return B_NOT_ALLOWED;
5271 
5272 			return resize_monitor_table(get_current_io_context(false),
5273 				rlp->rlim_cur);
5274 
5275 		default:
5276 			return B_BAD_VALUE;
5277 	}
5278 }
5279 
5280 
5281 status_t
5282 vfs_init(kernel_args* args)
5283 {
5284 	vnode::StaticInit();
5285 
5286 	sVnodeTable = new(std::nothrow) VnodeTable();
5287 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5288 		panic("vfs_init: error creating vnode hash table\n");
5289 
5290 	struct vnode dummy_vnode;
5291 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5292 
5293 	struct fs_mount dummyMount;
5294 	sMountsTable = new(std::nothrow) MountTable();
5295 	if (sMountsTable == NULL
5296 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5297 		panic("vfs_init: error creating mounts hash table\n");
5298 
5299 	node_monitor_init();
5300 
5301 	sRoot = NULL;
5302 
5303 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5304 
5305 	if (block_cache_init() != B_OK)
5306 		return B_ERROR;
5307 
5308 #ifdef ADD_DEBUGGER_COMMANDS
5309 	// add some debugger commands
5310 	add_debugger_command_etc("vnode", &dump_vnode,
5311 		"Print info about the specified vnode",
5312 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5313 		"Prints information about the vnode specified by address <vnode> or\n"
5314 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5315 		"constructed and printed. It might not be possible to construct a\n"
5316 		"complete path, though.\n",
5317 		0);
5318 	add_debugger_command("vnodes", &dump_vnodes,
5319 		"list all vnodes (from the specified device)");
5320 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5321 		"list all vnode caches");
5322 	add_debugger_command("mount", &dump_mount,
5323 		"info about the specified fs_mount");
5324 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5325 	add_debugger_command("io_context", &dump_io_context,
5326 		"info about the I/O context");
5327 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5328 		"info about vnode usage");
5329 #endif
5330 
5331 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5332 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5333 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5334 		0);
5335 
5336 	fifo_init();
5337 	file_map_init();
5338 
5339 	return file_cache_init();
5340 }
5341 
5342 
5343 //	#pragma mark - fd_ops implementations
5344 
5345 
5346 /*!
5347 	Calls fs_open() on the given vnode and returns a new
5348 	file descriptor for it
5349 */
5350 static int
5351 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5352 {
5353 	void* cookie;
5354 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5355 	if (status != B_OK)
5356 		return status;
5357 
5358 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5359 	if (fd < 0) {
5360 		FS_CALL(vnode, close, cookie);
5361 		FS_CALL(vnode, free_cookie, cookie);
5362 	}
5363 	return fd;
5364 }
5365 
5366 
5367 /*!
5368 	Calls fs_open() on the given vnode and returns a new
5369 	file descriptor for it
5370 */
5371 static int
5372 create_vnode(struct vnode* directory, const char* name, int openMode,
5373 	int perms, bool kernel)
5374 {
5375 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5376 	status_t status = B_ERROR;
5377 	struct vnode* vnode;
5378 	void* cookie;
5379 	ino_t newID;
5380 
5381 	// This is somewhat tricky: If the entry already exists, the FS responsible
5382 	// for the directory might not necessarily also be the one responsible for
5383 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5384 	// we can actually never call the create() hook without O_EXCL. Instead we
5385 	// try to look the entry up first. If it already exists, we just open the
5386 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5387 	// introduces a race condition, since someone else might have created the
5388 	// entry in the meantime. We hope the respective FS returns the correct
5389 	// error code and retry (up to 3 times) again.
5390 
5391 	for (int i = 0; i < 3 && status != B_OK; i++) {
5392 		// look the node up
5393 		status = lookup_dir_entry(directory, name, &vnode);
5394 		if (status == B_OK) {
5395 			VNodePutter putter(vnode);
5396 
5397 			if ((openMode & O_EXCL) != 0)
5398 				return B_FILE_EXISTS;
5399 
5400 			// If the node is a symlink, we have to follow it, unless
5401 			// O_NOTRAVERSE is set.
5402 			if (S_ISLNK(vnode->Type()) && traverse) {
5403 				putter.Put();
5404 				char clonedName[B_FILE_NAME_LENGTH + 1];
5405 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5406 						>= B_FILE_NAME_LENGTH) {
5407 					return B_NAME_TOO_LONG;
5408 				}
5409 
5410 				inc_vnode_ref_count(directory);
5411 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5412 					kernel, &vnode, NULL);
5413 				if (status != B_OK)
5414 					return status;
5415 
5416 				putter.SetTo(vnode);
5417 			}
5418 
5419 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5420 				return B_LINK_LIMIT;
5421 
5422 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5423 			// on success keep the vnode reference for the FD
5424 			if (fd >= 0)
5425 				putter.Detach();
5426 
5427 			return fd;
5428 		}
5429 
5430 		// it doesn't exist yet -- try to create it
5431 
5432 		if (!HAS_FS_CALL(directory, create))
5433 			return B_READ_ONLY_DEVICE;
5434 
5435 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5436 			&cookie, &newID);
5437 		if (status != B_OK
5438 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5439 			return status;
5440 		}
5441 	}
5442 
5443 	if (status != B_OK)
5444 		return status;
5445 
5446 	// the node has been created successfully
5447 
5448 	rw_lock_read_lock(&sVnodeLock);
5449 	vnode = lookup_vnode(directory->device, newID);
5450 	rw_lock_read_unlock(&sVnodeLock);
5451 
5452 	if (vnode == NULL) {
5453 		panic("vfs: fs_create() returned success but there is no vnode, "
5454 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5455 		return B_BAD_VALUE;
5456 	}
5457 
5458 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5459 	if (fd >= 0)
5460 		return fd;
5461 
5462 	status = fd;
5463 
5464 	// something went wrong, clean up
5465 
5466 	FS_CALL(vnode, close, cookie);
5467 	FS_CALL(vnode, free_cookie, cookie);
5468 	put_vnode(vnode);
5469 
5470 	FS_CALL(directory, unlink, name);
5471 
5472 	return status;
5473 }
5474 
5475 
5476 /*! Calls fs open_dir() on the given vnode and returns a new
5477 	file descriptor for it
5478 */
5479 static int
5480 open_dir_vnode(struct vnode* vnode, bool kernel)
5481 {
5482 	void* cookie;
5483 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5484 	if (status != B_OK)
5485 		return status;
5486 
5487 	// directory is opened, create a fd
5488 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5489 	if (status >= 0)
5490 		return status;
5491 
5492 	FS_CALL(vnode, close_dir, cookie);
5493 	FS_CALL(vnode, free_dir_cookie, cookie);
5494 
5495 	return status;
5496 }
5497 
5498 
5499 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5500 	file descriptor for it.
5501 	Used by attr_dir_open(), and attr_dir_open_fd().
5502 */
5503 static int
5504 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5505 {
5506 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5507 		return B_UNSUPPORTED;
5508 
5509 	void* cookie;
5510 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5511 	if (status != B_OK)
5512 		return status;
5513 
5514 	// directory is opened, create a fd
5515 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5516 		kernel);
5517 	if (status >= 0)
5518 		return status;
5519 
5520 	FS_CALL(vnode, close_attr_dir, cookie);
5521 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5522 
5523 	return status;
5524 }
5525 
5526 
5527 static int
5528 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5529 	int openMode, int perms, bool kernel)
5530 {
5531 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5532 		"kernel %d\n", name, openMode, perms, kernel));
5533 
5534 	// get directory to put the new file in
5535 	struct vnode* directory;
5536 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5537 	if (status != B_OK)
5538 		return status;
5539 
5540 	status = create_vnode(directory, name, openMode, perms, kernel);
5541 	put_vnode(directory);
5542 
5543 	return status;
5544 }
5545 
5546 
5547 static int
5548 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5549 {
5550 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5551 		openMode, perms, kernel));
5552 
5553 	// get directory to put the new file in
5554 	char name[B_FILE_NAME_LENGTH];
5555 	struct vnode* directory;
5556 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5557 		kernel);
5558 	if (status < 0)
5559 		return status;
5560 
5561 	status = create_vnode(directory, name, openMode, perms, kernel);
5562 
5563 	put_vnode(directory);
5564 	return status;
5565 }
5566 
5567 
5568 static int
5569 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5570 	int openMode, bool kernel)
5571 {
5572 	if (name == NULL || *name == '\0')
5573 		return B_BAD_VALUE;
5574 
5575 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5576 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5577 
5578 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5579 
5580 	// get the vnode matching the entry_ref
5581 	struct vnode* vnode;
5582 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5583 		kernel, &vnode);
5584 	if (status != B_OK)
5585 		return status;
5586 
5587 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5588 		put_vnode(vnode);
5589 		return B_LINK_LIMIT;
5590 	}
5591 
5592 	int newFD = open_vnode(vnode, openMode, kernel);
5593 	if (newFD >= 0) {
5594 		// The vnode reference has been transferred to the FD
5595 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5596 			directoryID, vnode->id, name);
5597 	} else
5598 		put_vnode(vnode);
5599 
5600 	return newFD;
5601 }
5602 
5603 
5604 static int
5605 file_open(int fd, char* path, int openMode, bool kernel)
5606 {
5607 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5608 
5609 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5610 		fd, path, openMode, kernel));
5611 
5612 	// get the vnode matching the vnode + path combination
5613 	struct vnode* vnode;
5614 	ino_t parentID;
5615 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5616 		&parentID, kernel);
5617 	if (status != B_OK)
5618 		return status;
5619 
5620 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5621 		put_vnode(vnode);
5622 		return B_LINK_LIMIT;
5623 	}
5624 
5625 	// open the vnode
5626 	int newFD = open_vnode(vnode, openMode, kernel);
5627 	if (newFD >= 0) {
5628 		// The vnode reference has been transferred to the FD
5629 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5630 			vnode->device, parentID, vnode->id, NULL);
5631 	} else
5632 		put_vnode(vnode);
5633 
5634 	return newFD;
5635 }
5636 
5637 
5638 static status_t
5639 file_close(struct file_descriptor* descriptor)
5640 {
5641 	struct vnode* vnode = descriptor->u.vnode;
5642 	status_t status = B_OK;
5643 
5644 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5645 
5646 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5647 		vnode->id);
5648 	if (HAS_FS_CALL(vnode, close)) {
5649 		status = FS_CALL(vnode, close, descriptor->cookie);
5650 	}
5651 
5652 	if (status == B_OK) {
5653 		// remove all outstanding locks for this team
5654 		if (HAS_FS_CALL(vnode, release_lock))
5655 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5656 		else
5657 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5658 	}
5659 	return status;
5660 }
5661 
5662 
5663 static void
5664 file_free_fd(struct file_descriptor* descriptor)
5665 {
5666 	struct vnode* vnode = descriptor->u.vnode;
5667 
5668 	if (vnode != NULL) {
5669 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5670 		put_vnode(vnode);
5671 	}
5672 }
5673 
5674 
5675 static status_t
5676 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5677 	size_t* length)
5678 {
5679 	struct vnode* vnode = descriptor->u.vnode;
5680 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5681 		pos, length, *length));
5682 
5683 	if (S_ISDIR(vnode->Type()))
5684 		return B_IS_A_DIRECTORY;
5685 
5686 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5687 }
5688 
5689 
5690 static status_t
5691 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5692 	size_t* length)
5693 {
5694 	struct vnode* vnode = descriptor->u.vnode;
5695 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5696 		length));
5697 
5698 	if (S_ISDIR(vnode->Type()))
5699 		return B_IS_A_DIRECTORY;
5700 	if (!HAS_FS_CALL(vnode, write))
5701 		return B_READ_ONLY_DEVICE;
5702 
5703 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5704 }
5705 
5706 
5707 static off_t
5708 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5709 {
5710 	struct vnode* vnode = descriptor->u.vnode;
5711 	off_t offset;
5712 	bool isDevice = false;
5713 
5714 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5715 		seekType));
5716 
5717 	// some kinds of files are not seekable
5718 	switch (vnode->Type() & S_IFMT) {
5719 		case S_IFIFO:
5720 		case S_IFSOCK:
5721 			return ESPIPE;
5722 
5723 		// drivers publish block devices as chr, so pick both
5724 		case S_IFBLK:
5725 		case S_IFCHR:
5726 			isDevice = true;
5727 			break;
5728 		// The Open Group Base Specs don't mention any file types besides pipes,
5729 		// fifos, and sockets specially, so we allow seeking them.
5730 		case S_IFREG:
5731 		case S_IFDIR:
5732 		case S_IFLNK:
5733 			break;
5734 	}
5735 
5736 	switch (seekType) {
5737 		case SEEK_SET:
5738 			offset = 0;
5739 			break;
5740 		case SEEK_CUR:
5741 			offset = descriptor->pos;
5742 			break;
5743 		case SEEK_END:
5744 		{
5745 			// stat() the node
5746 			if (!HAS_FS_CALL(vnode, read_stat))
5747 				return B_UNSUPPORTED;
5748 
5749 			struct stat stat;
5750 			status_t status = FS_CALL(vnode, read_stat, &stat);
5751 			if (status != B_OK)
5752 				return status;
5753 
5754 			offset = stat.st_size;
5755 
5756 			if (offset == 0 && isDevice) {
5757 				// stat() on regular drivers doesn't report size
5758 				device_geometry geometry;
5759 
5760 				if (HAS_FS_CALL(vnode, ioctl)) {
5761 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5762 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5763 					if (status == B_OK)
5764 						offset = (off_t)geometry.bytes_per_sector
5765 							* geometry.sectors_per_track
5766 							* geometry.cylinder_count
5767 							* geometry.head_count;
5768 				}
5769 			}
5770 
5771 			break;
5772 		}
5773 		default:
5774 			return B_BAD_VALUE;
5775 	}
5776 
5777 	// assumes off_t is 64 bits wide
5778 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5779 		return B_BUFFER_OVERFLOW;
5780 
5781 	pos += offset;
5782 	if (pos < 0)
5783 		return B_BAD_VALUE;
5784 
5785 	return descriptor->pos = pos;
5786 }
5787 
5788 
5789 static status_t
5790 file_select(struct file_descriptor* descriptor, uint8 event,
5791 	struct selectsync* sync)
5792 {
5793 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5794 
5795 	struct vnode* vnode = descriptor->u.vnode;
5796 
5797 	// If the FS has no select() hook, notify select() now.
5798 	if (!HAS_FS_CALL(vnode, select)) {
5799 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5800 			return notify_select_event(sync, event);
5801 		else
5802 			return B_OK;
5803 	}
5804 
5805 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5806 }
5807 
5808 
5809 static status_t
5810 file_deselect(struct file_descriptor* descriptor, uint8 event,
5811 	struct selectsync* sync)
5812 {
5813 	struct vnode* vnode = descriptor->u.vnode;
5814 
5815 	if (!HAS_FS_CALL(vnode, deselect))
5816 		return B_OK;
5817 
5818 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5819 }
5820 
5821 
5822 static status_t
5823 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5824 	bool kernel)
5825 {
5826 	struct vnode* vnode;
5827 	status_t status;
5828 
5829 	if (name == NULL || *name == '\0')
5830 		return B_BAD_VALUE;
5831 
5832 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5833 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5834 
5835 	status = get_vnode(mountID, parentID, &vnode, true, false);
5836 	if (status != B_OK)
5837 		return status;
5838 
5839 	if (HAS_FS_CALL(vnode, create_dir))
5840 		status = FS_CALL(vnode, create_dir, name, perms);
5841 	else
5842 		status = B_READ_ONLY_DEVICE;
5843 
5844 	put_vnode(vnode);
5845 	return status;
5846 }
5847 
5848 
5849 static status_t
5850 dir_create(int fd, char* path, int perms, bool kernel)
5851 {
5852 	char filename[B_FILE_NAME_LENGTH];
5853 	struct vnode* vnode;
5854 	status_t status;
5855 
5856 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5857 		kernel));
5858 
5859 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5860 	if (status < 0)
5861 		return status;
5862 
5863 	if (HAS_FS_CALL(vnode, create_dir)) {
5864 		status = FS_CALL(vnode, create_dir, filename, perms);
5865 	} else
5866 		status = B_READ_ONLY_DEVICE;
5867 
5868 	put_vnode(vnode);
5869 	return status;
5870 }
5871 
5872 
5873 static int
5874 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5875 {
5876 	FUNCTION(("dir_open_entry_ref()\n"));
5877 
5878 	if (name && name[0] == '\0')
5879 		return B_BAD_VALUE;
5880 
5881 	// get the vnode matching the entry_ref/node_ref
5882 	struct vnode* vnode;
5883 	status_t status;
5884 	if (name) {
5885 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5886 			&vnode);
5887 	} else
5888 		status = get_vnode(mountID, parentID, &vnode, true, false);
5889 	if (status != B_OK)
5890 		return status;
5891 
5892 	int newFD = open_dir_vnode(vnode, kernel);
5893 	if (newFD >= 0) {
5894 		// The vnode reference has been transferred to the FD
5895 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5896 			vnode->id, name);
5897 	} else
5898 		put_vnode(vnode);
5899 
5900 	return newFD;
5901 }
5902 
5903 
5904 static int
5905 dir_open(int fd, char* path, bool kernel)
5906 {
5907 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5908 		kernel));
5909 
5910 	// get the vnode matching the vnode + path combination
5911 	struct vnode* vnode = NULL;
5912 	ino_t parentID;
5913 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5914 		kernel);
5915 	if (status != B_OK)
5916 		return status;
5917 
5918 	// open the dir
5919 	int newFD = open_dir_vnode(vnode, kernel);
5920 	if (newFD >= 0) {
5921 		// The vnode reference has been transferred to the FD
5922 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5923 			parentID, vnode->id, NULL);
5924 	} else
5925 		put_vnode(vnode);
5926 
5927 	return newFD;
5928 }
5929 
5930 
5931 static status_t
5932 dir_close(struct file_descriptor* descriptor)
5933 {
5934 	struct vnode* vnode = descriptor->u.vnode;
5935 
5936 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5937 
5938 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5939 		vnode->id);
5940 	if (HAS_FS_CALL(vnode, close_dir))
5941 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5942 
5943 	return B_OK;
5944 }
5945 
5946 
5947 static void
5948 dir_free_fd(struct file_descriptor* descriptor)
5949 {
5950 	struct vnode* vnode = descriptor->u.vnode;
5951 
5952 	if (vnode != NULL) {
5953 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5954 		put_vnode(vnode);
5955 	}
5956 }
5957 
5958 
5959 static status_t
5960 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5961 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5962 {
5963 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5964 		bufferSize, _count);
5965 }
5966 
5967 
5968 static status_t
5969 fix_dirent(struct vnode* parent, struct dirent* entry,
5970 	struct io_context* ioContext)
5971 {
5972 	// set d_pdev and d_pino
5973 	entry->d_pdev = parent->device;
5974 	entry->d_pino = parent->id;
5975 
5976 	// If this is the ".." entry and the directory covering another vnode,
5977 	// we need to replace d_dev and d_ino with the actual values.
5978 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5979 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
5980 			ioContext);
5981 	}
5982 
5983 	// resolve covered vnodes
5984 	ReadLocker _(&sVnodeLock);
5985 
5986 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5987 	if (vnode != NULL && vnode->covered_by != NULL) {
5988 		do {
5989 			vnode = vnode->covered_by;
5990 		} while (vnode->covered_by != NULL);
5991 
5992 		entry->d_dev = vnode->device;
5993 		entry->d_ino = vnode->id;
5994 	}
5995 
5996 	return B_OK;
5997 }
5998 
5999 
6000 static status_t
6001 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6002 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6003 {
6004 	if (!HAS_FS_CALL(vnode, read_dir))
6005 		return B_UNSUPPORTED;
6006 
6007 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6008 		_count);
6009 	if (error != B_OK)
6010 		return error;
6011 
6012 	// we need to adjust the read dirents
6013 	uint32 count = *_count;
6014 	for (uint32 i = 0; i < count; i++) {
6015 		error = fix_dirent(vnode, buffer, ioContext);
6016 		if (error != B_OK)
6017 			return error;
6018 
6019 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6020 	}
6021 
6022 	return error;
6023 }
6024 
6025 
6026 static status_t
6027 dir_rewind(struct file_descriptor* descriptor)
6028 {
6029 	struct vnode* vnode = descriptor->u.vnode;
6030 
6031 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6032 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6033 	}
6034 
6035 	return B_UNSUPPORTED;
6036 }
6037 
6038 
6039 static status_t
6040 dir_remove(int fd, char* path, bool kernel)
6041 {
6042 	char name[B_FILE_NAME_LENGTH];
6043 	struct vnode* directory;
6044 	status_t status;
6045 
6046 	if (path != NULL) {
6047 		// we need to make sure our path name doesn't stop with "/", ".",
6048 		// or ".."
6049 		char* lastSlash;
6050 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6051 			char* leaf = lastSlash + 1;
6052 			if (!strcmp(leaf, ".."))
6053 				return B_NOT_ALLOWED;
6054 
6055 			// omit multiple slashes
6056 			while (lastSlash > path && lastSlash[-1] == '/')
6057 				lastSlash--;
6058 
6059 			if (leaf[0]
6060 				&& strcmp(leaf, ".")) {
6061 				break;
6062 			}
6063 			// "name/" -> "name", or "name/." -> "name"
6064 			lastSlash[0] = '\0';
6065 		}
6066 
6067 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6068 			return B_NOT_ALLOWED;
6069 	}
6070 
6071 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6072 	if (status != B_OK)
6073 		return status;
6074 
6075 	if (HAS_FS_CALL(directory, remove_dir))
6076 		status = FS_CALL(directory, remove_dir, name);
6077 	else
6078 		status = B_READ_ONLY_DEVICE;
6079 
6080 	put_vnode(directory);
6081 	return status;
6082 }
6083 
6084 
6085 static status_t
6086 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6087 	size_t length)
6088 {
6089 	struct vnode* vnode = descriptor->u.vnode;
6090 
6091 	if (HAS_FS_CALL(vnode, ioctl))
6092 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6093 
6094 	return B_DEV_INVALID_IOCTL;
6095 }
6096 
6097 
6098 static status_t
6099 common_fcntl(int fd, int op, size_t argument, bool kernel)
6100 {
6101 	struct flock flock;
6102 
6103 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6104 		fd, op, argument, kernel ? "kernel" : "user"));
6105 
6106 	struct io_context* context = get_current_io_context(kernel);
6107 
6108 	struct file_descriptor* descriptor = get_fd(context, fd);
6109 	if (descriptor == NULL)
6110 		return B_FILE_ERROR;
6111 
6112 	struct vnode* vnode = fd_vnode(descriptor);
6113 
6114 	status_t status = B_OK;
6115 
6116 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6117 		if (descriptor->type != FDTYPE_FILE)
6118 			status = B_BAD_VALUE;
6119 		else if (kernel)
6120 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6121 		else if (user_memcpy(&flock, (struct flock*)argument,
6122 				sizeof(struct flock)) != B_OK)
6123 			status = B_BAD_ADDRESS;
6124 		if (status != B_OK) {
6125 			put_fd(descriptor);
6126 			return status;
6127 		}
6128 	}
6129 
6130 	switch (op) {
6131 		case F_SETFD:
6132 		{
6133 			// Set file descriptor flags
6134 
6135 			// O_CLOEXEC is the only flag available at this time
6136 			mutex_lock(&context->io_mutex);
6137 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6138 			mutex_unlock(&context->io_mutex);
6139 
6140 			status = B_OK;
6141 			break;
6142 		}
6143 
6144 		case F_GETFD:
6145 		{
6146 			// Get file descriptor flags
6147 			mutex_lock(&context->io_mutex);
6148 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6149 			mutex_unlock(&context->io_mutex);
6150 			break;
6151 		}
6152 
6153 		case F_SETFL:
6154 			// Set file descriptor open mode
6155 
6156 			// we only accept changes to O_APPEND and O_NONBLOCK
6157 			argument &= O_APPEND | O_NONBLOCK;
6158 			if (descriptor->ops->fd_set_flags != NULL) {
6159 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6160 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6161 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6162 					(int)argument);
6163 			} else
6164 				status = B_UNSUPPORTED;
6165 
6166 			if (status == B_OK) {
6167 				// update this descriptor's open_mode field
6168 				descriptor->open_mode = (descriptor->open_mode
6169 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6170 			}
6171 
6172 			break;
6173 
6174 		case F_GETFL:
6175 			// Get file descriptor open mode
6176 			status = descriptor->open_mode;
6177 			break;
6178 
6179 		case F_DUPFD:
6180 		case F_DUPFD_CLOEXEC:
6181 		{
6182 			status = new_fd_etc(context, descriptor, (int)argument);
6183 			if (status >= 0) {
6184 				mutex_lock(&context->io_mutex);
6185 				fd_set_close_on_exec(context, fd, op == F_DUPFD_CLOEXEC);
6186 				mutex_unlock(&context->io_mutex);
6187 
6188 				atomic_add(&descriptor->ref_count, 1);
6189 			}
6190 			break;
6191 		}
6192 
6193 		case F_GETLK:
6194 			if (vnode != NULL) {
6195 				struct flock normalizedLock;
6196 
6197 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6198 				status = normalize_flock(descriptor, &normalizedLock);
6199 				if (status != B_OK)
6200 					break;
6201 
6202 				if (HAS_FS_CALL(vnode, test_lock)) {
6203 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6204 						&normalizedLock);
6205 				} else
6206 					status = test_advisory_lock(vnode, &normalizedLock);
6207 				if (status == B_OK) {
6208 					if (normalizedLock.l_type == F_UNLCK) {
6209 						// no conflicting lock found, copy back the same struct
6210 						// we were given except change type to F_UNLCK
6211 						flock.l_type = F_UNLCK;
6212 						if (kernel) {
6213 							memcpy((struct flock*)argument, &flock,
6214 								sizeof(struct flock));
6215 						} else {
6216 							status = user_memcpy((struct flock*)argument,
6217 								&flock, sizeof(struct flock));
6218 						}
6219 					} else {
6220 						// a conflicting lock was found, copy back its range and
6221 						// type
6222 						if (normalizedLock.l_len == OFF_MAX)
6223 							normalizedLock.l_len = 0;
6224 
6225 						if (kernel) {
6226 							memcpy((struct flock*)argument,
6227 								&normalizedLock, sizeof(struct flock));
6228 						} else {
6229 							status = user_memcpy((struct flock*)argument,
6230 								&normalizedLock, sizeof(struct flock));
6231 						}
6232 					}
6233 				}
6234 			} else
6235 				status = B_BAD_VALUE;
6236 			break;
6237 
6238 		case F_SETLK:
6239 		case F_SETLKW:
6240 			status = normalize_flock(descriptor, &flock);
6241 			if (status != B_OK)
6242 				break;
6243 
6244 			if (vnode == NULL) {
6245 				status = B_BAD_VALUE;
6246 			} else if (flock.l_type == F_UNLCK) {
6247 				if (HAS_FS_CALL(vnode, release_lock)) {
6248 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6249 						&flock);
6250 				} else {
6251 					status = release_advisory_lock(vnode, context, NULL,
6252 						&flock);
6253 				}
6254 			} else {
6255 				// the open mode must match the lock type
6256 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6257 						&& flock.l_type == F_WRLCK)
6258 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6259 						&& flock.l_type == F_RDLCK))
6260 					status = B_FILE_ERROR;
6261 				else {
6262 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6263 						status = FS_CALL(vnode, acquire_lock,
6264 							descriptor->cookie, &flock, op == F_SETLKW);
6265 					} else {
6266 						status = acquire_advisory_lock(vnode, context, NULL,
6267 							&flock, op == F_SETLKW);
6268 					}
6269 				}
6270 			}
6271 			break;
6272 
6273 		// ToDo: add support for more ops?
6274 
6275 		default:
6276 			status = B_BAD_VALUE;
6277 	}
6278 
6279 	put_fd(descriptor);
6280 	return status;
6281 }
6282 
6283 
6284 static status_t
6285 common_sync(int fd, bool kernel)
6286 {
6287 	struct file_descriptor* descriptor;
6288 	struct vnode* vnode;
6289 	status_t status;
6290 
6291 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6292 
6293 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6294 	if (descriptor == NULL)
6295 		return B_FILE_ERROR;
6296 
6297 	if (HAS_FS_CALL(vnode, fsync))
6298 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6299 	else
6300 		status = B_UNSUPPORTED;
6301 
6302 	put_fd(descriptor);
6303 	return status;
6304 }
6305 
6306 
6307 static status_t
6308 common_lock_node(int fd, bool kernel)
6309 {
6310 	struct file_descriptor* descriptor;
6311 	struct vnode* vnode;
6312 
6313 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6314 	if (descriptor == NULL)
6315 		return B_FILE_ERROR;
6316 
6317 	status_t status = B_OK;
6318 
6319 	// We need to set the locking atomically - someone
6320 	// else might set one at the same time
6321 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6322 			(file_descriptor*)NULL) != NULL)
6323 		status = B_BUSY;
6324 
6325 	put_fd(descriptor);
6326 	return status;
6327 }
6328 
6329 
6330 static status_t
6331 common_unlock_node(int fd, bool kernel)
6332 {
6333 	struct file_descriptor* descriptor;
6334 	struct vnode* vnode;
6335 
6336 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6337 	if (descriptor == NULL)
6338 		return B_FILE_ERROR;
6339 
6340 	status_t status = B_OK;
6341 
6342 	// We need to set the locking atomically - someone
6343 	// else might set one at the same time
6344 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6345 			(file_descriptor*)NULL, descriptor) != descriptor)
6346 		status = B_BAD_VALUE;
6347 
6348 	put_fd(descriptor);
6349 	return status;
6350 }
6351 
6352 
6353 static status_t
6354 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6355 	bool kernel)
6356 {
6357 	struct vnode* vnode;
6358 	status_t status;
6359 
6360 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6361 	if (status != B_OK)
6362 		return status;
6363 
6364 	if (HAS_FS_CALL(vnode, read_symlink)) {
6365 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6366 	} else
6367 		status = B_BAD_VALUE;
6368 
6369 	put_vnode(vnode);
6370 	return status;
6371 }
6372 
6373 
6374 static status_t
6375 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6376 	bool kernel)
6377 {
6378 	// path validity checks have to be in the calling function!
6379 	char name[B_FILE_NAME_LENGTH];
6380 	struct vnode* vnode;
6381 	status_t status;
6382 
6383 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6384 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6385 
6386 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6387 	if (status != B_OK)
6388 		return status;
6389 
6390 	if (HAS_FS_CALL(vnode, create_symlink))
6391 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6392 	else {
6393 		status = HAS_FS_CALL(vnode, write)
6394 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6395 	}
6396 
6397 	put_vnode(vnode);
6398 
6399 	return status;
6400 }
6401 
6402 
6403 static status_t
6404 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6405 	bool traverseLeafLink, bool kernel)
6406 {
6407 	// path validity checks have to be in the calling function!
6408 
6409 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6410 		toPath, kernel));
6411 
6412 	char name[B_FILE_NAME_LENGTH];
6413 	struct vnode* directory;
6414 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6415 		kernel);
6416 	if (status != B_OK)
6417 		return status;
6418 
6419 	struct vnode* vnode;
6420 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6421 		kernel);
6422 	if (status != B_OK)
6423 		goto err;
6424 
6425 	if (directory->mount != vnode->mount) {
6426 		status = B_CROSS_DEVICE_LINK;
6427 		goto err1;
6428 	}
6429 
6430 	if (HAS_FS_CALL(directory, link))
6431 		status = FS_CALL(directory, link, name, vnode);
6432 	else
6433 		status = B_READ_ONLY_DEVICE;
6434 
6435 err1:
6436 	put_vnode(vnode);
6437 err:
6438 	put_vnode(directory);
6439 
6440 	return status;
6441 }
6442 
6443 
6444 static status_t
6445 common_unlink(int fd, char* path, bool kernel)
6446 {
6447 	char filename[B_FILE_NAME_LENGTH];
6448 	struct vnode* vnode;
6449 	status_t status;
6450 
6451 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6452 		kernel));
6453 
6454 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6455 	if (status < 0)
6456 		return status;
6457 
6458 	if (HAS_FS_CALL(vnode, unlink))
6459 		status = FS_CALL(vnode, unlink, filename);
6460 	else
6461 		status = B_READ_ONLY_DEVICE;
6462 
6463 	put_vnode(vnode);
6464 
6465 	return status;
6466 }
6467 
6468 
6469 static status_t
6470 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6471 {
6472 	struct vnode* vnode;
6473 	status_t status;
6474 
6475 	// TODO: honor effectiveUserGroup argument
6476 
6477 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6478 	if (status != B_OK)
6479 		return status;
6480 
6481 	if (HAS_FS_CALL(vnode, access))
6482 		status = FS_CALL(vnode, access, mode);
6483 	else
6484 		status = B_OK;
6485 
6486 	put_vnode(vnode);
6487 
6488 	return status;
6489 }
6490 
6491 
6492 static status_t
6493 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6494 {
6495 	struct vnode* fromVnode;
6496 	struct vnode* toVnode;
6497 	char fromName[B_FILE_NAME_LENGTH];
6498 	char toName[B_FILE_NAME_LENGTH];
6499 	status_t status;
6500 
6501 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6502 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6503 
6504 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6505 	if (status != B_OK)
6506 		return status;
6507 
6508 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6509 	if (status != B_OK)
6510 		goto err1;
6511 
6512 	if (fromVnode->device != toVnode->device) {
6513 		status = B_CROSS_DEVICE_LINK;
6514 		goto err2;
6515 	}
6516 
6517 	if (fromName[0] == '\0' || toName[0] == '\0'
6518 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6519 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6520 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6521 		status = B_BAD_VALUE;
6522 		goto err2;
6523 	}
6524 
6525 	if (HAS_FS_CALL(fromVnode, rename))
6526 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6527 	else
6528 		status = B_READ_ONLY_DEVICE;
6529 
6530 err2:
6531 	put_vnode(toVnode);
6532 err1:
6533 	put_vnode(fromVnode);
6534 
6535 	return status;
6536 }
6537 
6538 
6539 static status_t
6540 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6541 {
6542 	struct vnode* vnode = descriptor->u.vnode;
6543 
6544 	FUNCTION(("common_read_stat: stat %p\n", stat));
6545 
6546 	// TODO: remove this once all file systems properly set them!
6547 	stat->st_crtim.tv_nsec = 0;
6548 	stat->st_ctim.tv_nsec = 0;
6549 	stat->st_mtim.tv_nsec = 0;
6550 	stat->st_atim.tv_nsec = 0;
6551 
6552 	return vfs_stat_vnode(vnode, stat);
6553 }
6554 
6555 
6556 static status_t
6557 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6558 	int statMask)
6559 {
6560 	struct vnode* vnode = descriptor->u.vnode;
6561 
6562 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6563 		vnode, stat, statMask));
6564 
6565 	if (!HAS_FS_CALL(vnode, write_stat))
6566 		return B_READ_ONLY_DEVICE;
6567 
6568 	return FS_CALL(vnode, write_stat, stat, statMask);
6569 }
6570 
6571 
6572 static status_t
6573 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6574 	struct stat* stat, bool kernel)
6575 {
6576 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6577 		stat));
6578 
6579 	struct vnode* vnode;
6580 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6581 		NULL, kernel);
6582 	if (status != B_OK)
6583 		return status;
6584 
6585 	status = vfs_stat_vnode(vnode, stat);
6586 
6587 	put_vnode(vnode);
6588 	return status;
6589 }
6590 
6591 
6592 static status_t
6593 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6594 	const struct stat* stat, int statMask, bool kernel)
6595 {
6596 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6597 		"kernel %d\n", fd, path, stat, statMask, kernel));
6598 
6599 	struct vnode* vnode;
6600 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6601 		NULL, kernel);
6602 	if (status != B_OK)
6603 		return status;
6604 
6605 	if (HAS_FS_CALL(vnode, write_stat))
6606 		status = FS_CALL(vnode, write_stat, stat, statMask);
6607 	else
6608 		status = B_READ_ONLY_DEVICE;
6609 
6610 	put_vnode(vnode);
6611 
6612 	return status;
6613 }
6614 
6615 
6616 static int
6617 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6618 {
6619 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6620 		kernel));
6621 
6622 	struct vnode* vnode;
6623 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6624 		NULL, kernel);
6625 	if (status != B_OK)
6626 		return status;
6627 
6628 	status = open_attr_dir_vnode(vnode, kernel);
6629 	if (status < 0)
6630 		put_vnode(vnode);
6631 
6632 	return status;
6633 }
6634 
6635 
6636 static status_t
6637 attr_dir_close(struct file_descriptor* descriptor)
6638 {
6639 	struct vnode* vnode = descriptor->u.vnode;
6640 
6641 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6642 
6643 	if (HAS_FS_CALL(vnode, close_attr_dir))
6644 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6645 
6646 	return B_OK;
6647 }
6648 
6649 
6650 static void
6651 attr_dir_free_fd(struct file_descriptor* descriptor)
6652 {
6653 	struct vnode* vnode = descriptor->u.vnode;
6654 
6655 	if (vnode != NULL) {
6656 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6657 		put_vnode(vnode);
6658 	}
6659 }
6660 
6661 
6662 static status_t
6663 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6664 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6665 {
6666 	struct vnode* vnode = descriptor->u.vnode;
6667 
6668 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6669 
6670 	if (HAS_FS_CALL(vnode, read_attr_dir))
6671 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6672 			bufferSize, _count);
6673 
6674 	return B_UNSUPPORTED;
6675 }
6676 
6677 
6678 static status_t
6679 attr_dir_rewind(struct file_descriptor* descriptor)
6680 {
6681 	struct vnode* vnode = descriptor->u.vnode;
6682 
6683 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6684 
6685 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6686 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6687 
6688 	return B_UNSUPPORTED;
6689 }
6690 
6691 
6692 static int
6693 attr_create(int fd, char* path, const char* name, uint32 type,
6694 	int openMode, bool kernel)
6695 {
6696 	if (name == NULL || *name == '\0')
6697 		return B_BAD_VALUE;
6698 
6699 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6700 	struct vnode* vnode;
6701 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6702 		kernel);
6703 	if (status != B_OK)
6704 		return status;
6705 
6706 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6707 		status = B_LINK_LIMIT;
6708 		goto err;
6709 	}
6710 
6711 	if (!HAS_FS_CALL(vnode, create_attr)) {
6712 		status = B_READ_ONLY_DEVICE;
6713 		goto err;
6714 	}
6715 
6716 	void* cookie;
6717 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6718 	if (status != B_OK)
6719 		goto err;
6720 
6721 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6722 	if (fd >= 0)
6723 		return fd;
6724 
6725 	status = fd;
6726 
6727 	FS_CALL(vnode, close_attr, cookie);
6728 	FS_CALL(vnode, free_attr_cookie, cookie);
6729 
6730 	FS_CALL(vnode, remove_attr, name);
6731 
6732 err:
6733 	put_vnode(vnode);
6734 
6735 	return status;
6736 }
6737 
6738 
6739 static int
6740 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6741 {
6742 	if (name == NULL || *name == '\0')
6743 		return B_BAD_VALUE;
6744 
6745 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6746 	struct vnode* vnode;
6747 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6748 		kernel);
6749 	if (status != B_OK)
6750 		return status;
6751 
6752 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6753 		status = B_LINK_LIMIT;
6754 		goto err;
6755 	}
6756 
6757 	if (!HAS_FS_CALL(vnode, open_attr)) {
6758 		status = B_UNSUPPORTED;
6759 		goto err;
6760 	}
6761 
6762 	void* cookie;
6763 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6764 	if (status != B_OK)
6765 		goto err;
6766 
6767 	// now we only need a file descriptor for this attribute and we're done
6768 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6769 	if (fd >= 0)
6770 		return fd;
6771 
6772 	status = fd;
6773 
6774 	FS_CALL(vnode, close_attr, cookie);
6775 	FS_CALL(vnode, free_attr_cookie, cookie);
6776 
6777 err:
6778 	put_vnode(vnode);
6779 
6780 	return status;
6781 }
6782 
6783 
6784 static status_t
6785 attr_close(struct file_descriptor* descriptor)
6786 {
6787 	struct vnode* vnode = descriptor->u.vnode;
6788 
6789 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6790 
6791 	if (HAS_FS_CALL(vnode, close_attr))
6792 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6793 
6794 	return B_OK;
6795 }
6796 
6797 
6798 static void
6799 attr_free_fd(struct file_descriptor* descriptor)
6800 {
6801 	struct vnode* vnode = descriptor->u.vnode;
6802 
6803 	if (vnode != NULL) {
6804 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6805 		put_vnode(vnode);
6806 	}
6807 }
6808 
6809 
6810 static status_t
6811 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6812 	size_t* length)
6813 {
6814 	struct vnode* vnode = descriptor->u.vnode;
6815 
6816 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6817 		pos, length, *length));
6818 
6819 	if (!HAS_FS_CALL(vnode, read_attr))
6820 		return B_UNSUPPORTED;
6821 
6822 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6823 }
6824 
6825 
6826 static status_t
6827 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6828 	size_t* length)
6829 {
6830 	struct vnode* vnode = descriptor->u.vnode;
6831 
6832 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6833 		length));
6834 
6835 	if (!HAS_FS_CALL(vnode, write_attr))
6836 		return B_UNSUPPORTED;
6837 
6838 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6839 }
6840 
6841 
6842 static off_t
6843 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6844 {
6845 	off_t offset;
6846 
6847 	switch (seekType) {
6848 		case SEEK_SET:
6849 			offset = 0;
6850 			break;
6851 		case SEEK_CUR:
6852 			offset = descriptor->pos;
6853 			break;
6854 		case SEEK_END:
6855 		{
6856 			struct vnode* vnode = descriptor->u.vnode;
6857 			if (!HAS_FS_CALL(vnode, read_stat))
6858 				return B_UNSUPPORTED;
6859 
6860 			struct stat stat;
6861 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6862 				&stat);
6863 			if (status != B_OK)
6864 				return status;
6865 
6866 			offset = stat.st_size;
6867 			break;
6868 		}
6869 		default:
6870 			return B_BAD_VALUE;
6871 	}
6872 
6873 	// assumes off_t is 64 bits wide
6874 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6875 		return B_BUFFER_OVERFLOW;
6876 
6877 	pos += offset;
6878 	if (pos < 0)
6879 		return B_BAD_VALUE;
6880 
6881 	return descriptor->pos = pos;
6882 }
6883 
6884 
6885 static status_t
6886 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6887 {
6888 	struct vnode* vnode = descriptor->u.vnode;
6889 
6890 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6891 
6892 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6893 		return B_UNSUPPORTED;
6894 
6895 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6896 }
6897 
6898 
6899 static status_t
6900 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6901 	int statMask)
6902 {
6903 	struct vnode* vnode = descriptor->u.vnode;
6904 
6905 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6906 
6907 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6908 		return B_READ_ONLY_DEVICE;
6909 
6910 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6911 }
6912 
6913 
6914 static status_t
6915 attr_remove(int fd, const char* name, bool kernel)
6916 {
6917 	struct file_descriptor* descriptor;
6918 	struct vnode* vnode;
6919 	status_t status;
6920 
6921 	if (name == NULL || *name == '\0')
6922 		return B_BAD_VALUE;
6923 
6924 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6925 		kernel));
6926 
6927 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6928 	if (descriptor == NULL)
6929 		return B_FILE_ERROR;
6930 
6931 	if (HAS_FS_CALL(vnode, remove_attr))
6932 		status = FS_CALL(vnode, remove_attr, name);
6933 	else
6934 		status = B_READ_ONLY_DEVICE;
6935 
6936 	put_fd(descriptor);
6937 
6938 	return status;
6939 }
6940 
6941 
6942 static status_t
6943 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6944 	bool kernel)
6945 {
6946 	struct file_descriptor* fromDescriptor;
6947 	struct file_descriptor* toDescriptor;
6948 	struct vnode* fromVnode;
6949 	struct vnode* toVnode;
6950 	status_t status;
6951 
6952 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6953 		|| *toName == '\0')
6954 		return B_BAD_VALUE;
6955 
6956 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6957 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6958 
6959 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6960 	if (fromDescriptor == NULL)
6961 		return B_FILE_ERROR;
6962 
6963 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6964 	if (toDescriptor == NULL) {
6965 		status = B_FILE_ERROR;
6966 		goto err;
6967 	}
6968 
6969 	// are the files on the same volume?
6970 	if (fromVnode->device != toVnode->device) {
6971 		status = B_CROSS_DEVICE_LINK;
6972 		goto err1;
6973 	}
6974 
6975 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6976 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6977 	} else
6978 		status = B_READ_ONLY_DEVICE;
6979 
6980 err1:
6981 	put_fd(toDescriptor);
6982 err:
6983 	put_fd(fromDescriptor);
6984 
6985 	return status;
6986 }
6987 
6988 
6989 static int
6990 index_dir_open(dev_t mountID, bool kernel)
6991 {
6992 	struct fs_mount* mount;
6993 	void* cookie;
6994 
6995 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
6996 		kernel));
6997 
6998 	status_t status = get_mount(mountID, &mount);
6999 	if (status != B_OK)
7000 		return status;
7001 
7002 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7003 		status = B_UNSUPPORTED;
7004 		goto error;
7005 	}
7006 
7007 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7008 	if (status != B_OK)
7009 		goto error;
7010 
7011 	// get fd for the index directory
7012 	int fd;
7013 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7014 	if (fd >= 0)
7015 		return fd;
7016 
7017 	// something went wrong
7018 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7019 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7020 
7021 	status = fd;
7022 
7023 error:
7024 	put_mount(mount);
7025 	return status;
7026 }
7027 
7028 
7029 static status_t
7030 index_dir_close(struct file_descriptor* descriptor)
7031 {
7032 	struct fs_mount* mount = descriptor->u.mount;
7033 
7034 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7035 
7036 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7037 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7038 
7039 	return B_OK;
7040 }
7041 
7042 
7043 static void
7044 index_dir_free_fd(struct file_descriptor* descriptor)
7045 {
7046 	struct fs_mount* mount = descriptor->u.mount;
7047 
7048 	if (mount != NULL) {
7049 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7050 		put_mount(mount);
7051 	}
7052 }
7053 
7054 
7055 static status_t
7056 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7057 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7058 {
7059 	struct fs_mount* mount = descriptor->u.mount;
7060 
7061 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7062 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7063 			bufferSize, _count);
7064 	}
7065 
7066 	return B_UNSUPPORTED;
7067 }
7068 
7069 
7070 static status_t
7071 index_dir_rewind(struct file_descriptor* descriptor)
7072 {
7073 	struct fs_mount* mount = descriptor->u.mount;
7074 
7075 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7076 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7077 
7078 	return B_UNSUPPORTED;
7079 }
7080 
7081 
7082 static status_t
7083 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7084 	bool kernel)
7085 {
7086 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7087 		mountID, name, kernel));
7088 
7089 	struct fs_mount* mount;
7090 	status_t status = get_mount(mountID, &mount);
7091 	if (status != B_OK)
7092 		return status;
7093 
7094 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7095 		status = B_READ_ONLY_DEVICE;
7096 		goto out;
7097 	}
7098 
7099 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7100 
7101 out:
7102 	put_mount(mount);
7103 	return status;
7104 }
7105 
7106 
7107 #if 0
7108 static status_t
7109 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7110 {
7111 	struct vnode* vnode = descriptor->u.vnode;
7112 
7113 	// ToDo: currently unused!
7114 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7115 	if (!HAS_FS_CALL(vnode, read_index_stat))
7116 		return B_UNSUPPORTED;
7117 
7118 	return B_UNSUPPORTED;
7119 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7120 }
7121 
7122 
7123 static void
7124 index_free_fd(struct file_descriptor* descriptor)
7125 {
7126 	struct vnode* vnode = descriptor->u.vnode;
7127 
7128 	if (vnode != NULL) {
7129 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7130 		put_vnode(vnode);
7131 	}
7132 }
7133 #endif
7134 
7135 
7136 static status_t
7137 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7138 	bool kernel)
7139 {
7140 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7141 		mountID, name, kernel));
7142 
7143 	struct fs_mount* mount;
7144 	status_t status = get_mount(mountID, &mount);
7145 	if (status != B_OK)
7146 		return status;
7147 
7148 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7149 		status = B_UNSUPPORTED;
7150 		goto out;
7151 	}
7152 
7153 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7154 
7155 out:
7156 	put_mount(mount);
7157 	return status;
7158 }
7159 
7160 
7161 static status_t
7162 index_remove(dev_t mountID, const char* name, bool kernel)
7163 {
7164 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7165 		mountID, name, kernel));
7166 
7167 	struct fs_mount* mount;
7168 	status_t status = get_mount(mountID, &mount);
7169 	if (status != B_OK)
7170 		return status;
7171 
7172 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7173 		status = B_READ_ONLY_DEVICE;
7174 		goto out;
7175 	}
7176 
7177 	status = FS_MOUNT_CALL(mount, remove_index, name);
7178 
7179 out:
7180 	put_mount(mount);
7181 	return status;
7182 }
7183 
7184 
7185 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7186 		It would be nice if the FS would find some more kernel support
7187 		for them.
7188 		For example, query parsing should be moved into the kernel.
7189 */
7190 static int
7191 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7192 	int32 token, bool kernel)
7193 {
7194 	struct fs_mount* mount;
7195 	void* cookie;
7196 
7197 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7198 		device, query, kernel));
7199 
7200 	status_t status = get_mount(device, &mount);
7201 	if (status != B_OK)
7202 		return status;
7203 
7204 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7205 		status = B_UNSUPPORTED;
7206 		goto error;
7207 	}
7208 
7209 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7210 		&cookie);
7211 	if (status != B_OK)
7212 		goto error;
7213 
7214 	// get fd for the index directory
7215 	int fd;
7216 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7217 	if (fd >= 0)
7218 		return fd;
7219 
7220 	status = fd;
7221 
7222 	// something went wrong
7223 	FS_MOUNT_CALL(mount, close_query, cookie);
7224 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7225 
7226 error:
7227 	put_mount(mount);
7228 	return status;
7229 }
7230 
7231 
7232 static status_t
7233 query_close(struct file_descriptor* descriptor)
7234 {
7235 	struct fs_mount* mount = descriptor->u.mount;
7236 
7237 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7238 
7239 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7240 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7241 
7242 	return B_OK;
7243 }
7244 
7245 
7246 static void
7247 query_free_fd(struct file_descriptor* descriptor)
7248 {
7249 	struct fs_mount* mount = descriptor->u.mount;
7250 
7251 	if (mount != NULL) {
7252 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7253 		put_mount(mount);
7254 	}
7255 }
7256 
7257 
7258 static status_t
7259 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7260 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7261 {
7262 	struct fs_mount* mount = descriptor->u.mount;
7263 
7264 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7265 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7266 			bufferSize, _count);
7267 	}
7268 
7269 	return B_UNSUPPORTED;
7270 }
7271 
7272 
7273 static status_t
7274 query_rewind(struct file_descriptor* descriptor)
7275 {
7276 	struct fs_mount* mount = descriptor->u.mount;
7277 
7278 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7279 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7280 
7281 	return B_UNSUPPORTED;
7282 }
7283 
7284 
7285 //	#pragma mark - General File System functions
7286 
7287 
7288 static dev_t
7289 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7290 	const char* args, bool kernel)
7291 {
7292 	struct ::fs_mount* mount;
7293 	status_t status = B_OK;
7294 	fs_volume* volume = NULL;
7295 	int32 layer = 0;
7296 	Vnode* coveredNode = NULL;
7297 
7298 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7299 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7300 
7301 	// The path is always safe, we just have to make sure that fsName is
7302 	// almost valid - we can't make any assumptions about args, though.
7303 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7304 	// We'll get it from the DDM later.
7305 	if (fsName == NULL) {
7306 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7307 			return B_BAD_VALUE;
7308 	} else if (fsName[0] == '\0')
7309 		return B_BAD_VALUE;
7310 
7311 	RecursiveLocker mountOpLocker(sMountOpLock);
7312 
7313 	// Helper to delete a newly created file device on failure.
7314 	// Not exactly beautiful, but helps to keep the code below cleaner.
7315 	struct FileDeviceDeleter {
7316 		FileDeviceDeleter() : id(-1) {}
7317 		~FileDeviceDeleter()
7318 		{
7319 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7320 		}
7321 
7322 		partition_id id;
7323 	} fileDeviceDeleter;
7324 
7325 	// If the file system is not a "virtual" one, the device argument should
7326 	// point to a real file/device (if given at all).
7327 	// get the partition
7328 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7329 	KPartition* partition = NULL;
7330 	KPath normalizedDevice;
7331 	bool newlyCreatedFileDevice = false;
7332 
7333 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7334 		// normalize the device path
7335 		status = normalizedDevice.SetTo(device, true);
7336 		if (status != B_OK)
7337 			return status;
7338 
7339 		// get a corresponding partition from the DDM
7340 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7341 		if (partition == NULL) {
7342 			// Partition not found: This either means, the user supplied
7343 			// an invalid path, or the path refers to an image file. We try
7344 			// to let the DDM create a file device for the path.
7345 			partition_id deviceID = ddm->CreateFileDevice(
7346 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7347 			if (deviceID >= 0) {
7348 				partition = ddm->RegisterPartition(deviceID);
7349 				if (newlyCreatedFileDevice)
7350 					fileDeviceDeleter.id = deviceID;
7351 			}
7352 		}
7353 
7354 		if (!partition) {
7355 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7356 				normalizedDevice.Path()));
7357 			return B_ENTRY_NOT_FOUND;
7358 		}
7359 
7360 		device = normalizedDevice.Path();
7361 			// correct path to file device
7362 	}
7363 	PartitionRegistrar partitionRegistrar(partition, true);
7364 
7365 	// Write lock the partition's device. For the time being, we keep the lock
7366 	// until we're done mounting -- not nice, but ensure, that no-one is
7367 	// interfering.
7368 	// TODO: Just mark the partition busy while mounting!
7369 	KDiskDevice* diskDevice = NULL;
7370 	if (partition) {
7371 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7372 		if (!diskDevice) {
7373 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7374 			return B_ERROR;
7375 		}
7376 	}
7377 
7378 	DeviceWriteLocker writeLocker(diskDevice, true);
7379 		// this takes over the write lock acquired before
7380 
7381 	if (partition != NULL) {
7382 		// make sure, that the partition is not busy
7383 		if (partition->IsBusy()) {
7384 			TRACE(("fs_mount(): Partition is busy.\n"));
7385 			return B_BUSY;
7386 		}
7387 
7388 		// if no FS name had been supplied, we get it from the partition
7389 		if (fsName == NULL) {
7390 			KDiskSystem* diskSystem = partition->DiskSystem();
7391 			if (!diskSystem) {
7392 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7393 					"recognize it.\n"));
7394 				return B_BAD_VALUE;
7395 			}
7396 
7397 			if (!diskSystem->IsFileSystem()) {
7398 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7399 					"partitioning system.\n"));
7400 				return B_BAD_VALUE;
7401 			}
7402 
7403 			// The disk system name will not change, and the KDiskSystem
7404 			// object will not go away while the disk device is locked (and
7405 			// the partition has a reference to it), so this is safe.
7406 			fsName = diskSystem->Name();
7407 		}
7408 	}
7409 
7410 	mount = new(std::nothrow) (struct ::fs_mount);
7411 	if (mount == NULL)
7412 		return B_NO_MEMORY;
7413 
7414 	mount->device_name = strdup(device);
7415 		// "device" can be NULL
7416 
7417 	status = mount->entry_cache.Init();
7418 	if (status != B_OK)
7419 		goto err1;
7420 
7421 	// initialize structure
7422 	mount->id = sNextMountID++;
7423 	mount->partition = NULL;
7424 	mount->root_vnode = NULL;
7425 	mount->covers_vnode = NULL;
7426 	mount->unmounting = false;
7427 	mount->owns_file_device = false;
7428 	mount->volume = NULL;
7429 
7430 	// build up the volume(s)
7431 	while (true) {
7432 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7433 		if (layerFSName == NULL) {
7434 			if (layer == 0) {
7435 				status = B_NO_MEMORY;
7436 				goto err1;
7437 			}
7438 
7439 			break;
7440 		}
7441 		MemoryDeleter layerFSNameDeleter(layerFSName);
7442 
7443 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7444 		if (volume == NULL) {
7445 			status = B_NO_MEMORY;
7446 			goto err1;
7447 		}
7448 
7449 		volume->id = mount->id;
7450 		volume->partition = partition != NULL ? partition->ID() : -1;
7451 		volume->layer = layer++;
7452 		volume->private_volume = NULL;
7453 		volume->ops = NULL;
7454 		volume->sub_volume = NULL;
7455 		volume->super_volume = NULL;
7456 		volume->file_system = NULL;
7457 		volume->file_system_name = NULL;
7458 
7459 		volume->file_system_name = get_file_system_name(layerFSName);
7460 		if (volume->file_system_name == NULL) {
7461 			status = B_NO_MEMORY;
7462 			free(volume);
7463 			goto err1;
7464 		}
7465 
7466 		volume->file_system = get_file_system(layerFSName);
7467 		if (volume->file_system == NULL) {
7468 			status = B_DEVICE_NOT_FOUND;
7469 			free(volume->file_system_name);
7470 			free(volume);
7471 			goto err1;
7472 		}
7473 
7474 		if (mount->volume == NULL)
7475 			mount->volume = volume;
7476 		else {
7477 			volume->super_volume = mount->volume;
7478 			mount->volume->sub_volume = volume;
7479 			mount->volume = volume;
7480 		}
7481 	}
7482 
7483 	// insert mount struct into list before we call FS's mount() function
7484 	// so that vnodes can be created for this mount
7485 	mutex_lock(&sMountMutex);
7486 	sMountsTable->Insert(mount);
7487 	mutex_unlock(&sMountMutex);
7488 
7489 	ino_t rootID;
7490 
7491 	if (!sRoot) {
7492 		// we haven't mounted anything yet
7493 		if (strcmp(path, "/") != 0) {
7494 			status = B_ERROR;
7495 			goto err2;
7496 		}
7497 
7498 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7499 			args, &rootID);
7500 		if (status != B_OK || mount->volume->ops == NULL)
7501 			goto err2;
7502 	} else {
7503 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7504 		if (status != B_OK)
7505 			goto err2;
7506 
7507 		mount->covers_vnode = coveredNode;
7508 
7509 		// make sure covered_vnode is a directory
7510 		if (!S_ISDIR(coveredNode->Type())) {
7511 			status = B_NOT_A_DIRECTORY;
7512 			goto err3;
7513 		}
7514 
7515 		if (coveredNode->IsCovered()) {
7516 			// this is already a covered vnode
7517 			status = B_BUSY;
7518 			goto err3;
7519 		}
7520 
7521 		// mount it/them
7522 		fs_volume* volume = mount->volume;
7523 		while (volume) {
7524 			status = volume->file_system->mount(volume, device, flags, args,
7525 				&rootID);
7526 			if (status != B_OK || volume->ops == NULL) {
7527 				if (volume->sub_volume)
7528 					goto err4;
7529 				goto err3;
7530 			}
7531 
7532 			volume = volume->super_volume;
7533 		}
7534 
7535 		volume = mount->volume;
7536 		while (volume) {
7537 			if (volume->ops->all_layers_mounted != NULL)
7538 				volume->ops->all_layers_mounted(volume);
7539 			volume = volume->super_volume;
7540 		}
7541 	}
7542 
7543 	// the root node is supposed to be owned by the file system - it must
7544 	// exist at this point
7545 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7546 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7547 		panic("fs_mount: file system does not own its root node!\n");
7548 		status = B_ERROR;
7549 		goto err4;
7550 	}
7551 
7552 	// set up the links between the root vnode and the vnode it covers
7553 	rw_lock_write_lock(&sVnodeLock);
7554 	if (coveredNode != NULL) {
7555 		if (coveredNode->IsCovered()) {
7556 			// the vnode is covered now
7557 			status = B_BUSY;
7558 			rw_lock_write_unlock(&sVnodeLock);
7559 			goto err4;
7560 		}
7561 
7562 		mount->root_vnode->covers = coveredNode;
7563 		mount->root_vnode->SetCovering(true);
7564 
7565 		coveredNode->covered_by = mount->root_vnode;
7566 		coveredNode->SetCovered(true);
7567 	}
7568 	rw_lock_write_unlock(&sVnodeLock);
7569 
7570 	if (!sRoot) {
7571 		sRoot = mount->root_vnode;
7572 		mutex_lock(&sIOContextRootLock);
7573 		get_current_io_context(true)->root = sRoot;
7574 		mutex_unlock(&sIOContextRootLock);
7575 		inc_vnode_ref_count(sRoot);
7576 	}
7577 
7578 	// supply the partition (if any) with the mount cookie and mark it mounted
7579 	if (partition) {
7580 		partition->SetMountCookie(mount->volume->private_volume);
7581 		partition->SetVolumeID(mount->id);
7582 
7583 		// keep a partition reference as long as the partition is mounted
7584 		partitionRegistrar.Detach();
7585 		mount->partition = partition;
7586 		mount->owns_file_device = newlyCreatedFileDevice;
7587 		fileDeviceDeleter.id = -1;
7588 	}
7589 
7590 	notify_mount(mount->id,
7591 		coveredNode != NULL ? coveredNode->device : -1,
7592 		coveredNode ? coveredNode->id : -1);
7593 
7594 	return mount->id;
7595 
7596 err4:
7597 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7598 err3:
7599 	if (coveredNode != NULL)
7600 		put_vnode(coveredNode);
7601 err2:
7602 	mutex_lock(&sMountMutex);
7603 	sMountsTable->Remove(mount);
7604 	mutex_unlock(&sMountMutex);
7605 err1:
7606 	delete mount;
7607 
7608 	return status;
7609 }
7610 
7611 
7612 static status_t
7613 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7614 {
7615 	struct fs_mount* mount;
7616 	status_t err;
7617 
7618 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7619 		mountID, kernel));
7620 
7621 	struct vnode* pathVnode = NULL;
7622 	if (path != NULL) {
7623 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7624 		if (err != B_OK)
7625 			return B_ENTRY_NOT_FOUND;
7626 	}
7627 
7628 	RecursiveLocker mountOpLocker(sMountOpLock);
7629 
7630 	// this lock is not strictly necessary, but here in case of KDEBUG
7631 	// to keep the ASSERT in find_mount() working.
7632 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7633 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7634 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7635 	if (mount == NULL) {
7636 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7637 			pathVnode);
7638 	}
7639 
7640 	if (path != NULL) {
7641 		put_vnode(pathVnode);
7642 
7643 		if (mount->root_vnode != pathVnode) {
7644 			// not mountpoint
7645 			return B_BAD_VALUE;
7646 		}
7647 	}
7648 
7649 	// if the volume is associated with a partition, lock the device of the
7650 	// partition as long as we are unmounting
7651 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7652 	KPartition* partition = mount->partition;
7653 	KDiskDevice* diskDevice = NULL;
7654 	if (partition != NULL) {
7655 		if (partition->Device() == NULL) {
7656 			dprintf("fs_unmount(): There is no device!\n");
7657 			return B_ERROR;
7658 		}
7659 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7660 		if (!diskDevice) {
7661 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7662 			return B_ERROR;
7663 		}
7664 	}
7665 	DeviceWriteLocker writeLocker(diskDevice, true);
7666 
7667 	// make sure, that the partition is not busy
7668 	if (partition != NULL) {
7669 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7670 			TRACE(("fs_unmount(): Partition is busy.\n"));
7671 			return B_BUSY;
7672 		}
7673 	}
7674 
7675 	// grab the vnode master mutex to keep someone from creating
7676 	// a vnode while we're figuring out if we can continue
7677 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7678 
7679 	bool disconnectedDescriptors = false;
7680 
7681 	while (true) {
7682 		bool busy = false;
7683 
7684 		// cycle through the list of vnodes associated with this mount and
7685 		// make sure all of them are not busy or have refs on them
7686 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7687 		while (struct vnode* vnode = iterator.Next()) {
7688 			if (vnode->IsBusy()) {
7689 				busy = true;
7690 				break;
7691 			}
7692 
7693 			// check the vnode's ref count -- subtract additional references for
7694 			// covering
7695 			int32 refCount = vnode->ref_count;
7696 			if (vnode->covers != NULL)
7697 				refCount--;
7698 			if (vnode->covered_by != NULL)
7699 				refCount--;
7700 
7701 			if (refCount != 0) {
7702 				// there are still vnodes in use on this mount, so we cannot
7703 				// unmount yet
7704 				busy = true;
7705 				break;
7706 			}
7707 		}
7708 
7709 		if (!busy)
7710 			break;
7711 
7712 		if ((flags & B_FORCE_UNMOUNT) == 0)
7713 			return B_BUSY;
7714 
7715 		if (disconnectedDescriptors) {
7716 			// wait a bit until the last access is finished, and then try again
7717 			vnodesWriteLocker.Unlock();
7718 			snooze(100000);
7719 			// TODO: if there is some kind of bug that prevents the ref counts
7720 			// from getting back to zero, this will fall into an endless loop...
7721 			vnodesWriteLocker.Lock();
7722 			continue;
7723 		}
7724 
7725 		// the file system is still busy - but we're forced to unmount it,
7726 		// so let's disconnect all open file descriptors
7727 
7728 		mount->unmounting = true;
7729 			// prevent new vnodes from being created
7730 
7731 		vnodesWriteLocker.Unlock();
7732 
7733 		disconnect_mount_or_vnode_fds(mount, NULL);
7734 		disconnectedDescriptors = true;
7735 
7736 		vnodesWriteLocker.Lock();
7737 	}
7738 
7739 	// We can safely continue. Mark all of the vnodes busy and this mount
7740 	// structure in unmounting state. Also undo the vnode covers/covered_by
7741 	// links.
7742 	mount->unmounting = true;
7743 
7744 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7745 	while (struct vnode* vnode = iterator.Next()) {
7746 		// Remove all covers/covered_by links from other mounts' nodes to this
7747 		// vnode and adjust the node ref count accordingly. We will release the
7748 		// references to the external vnodes below.
7749 		if (Vnode* coveredNode = vnode->covers) {
7750 			if (Vnode* coveringNode = vnode->covered_by) {
7751 				// We have both covered and covering vnodes, so just remove us
7752 				// from the chain.
7753 				coveredNode->covered_by = coveringNode;
7754 				coveringNode->covers = coveredNode;
7755 				vnode->ref_count -= 2;
7756 
7757 				vnode->covered_by = NULL;
7758 				vnode->covers = NULL;
7759 				vnode->SetCovering(false);
7760 				vnode->SetCovered(false);
7761 			} else {
7762 				// We only have a covered vnode. Remove its link to us.
7763 				coveredNode->covered_by = NULL;
7764 				coveredNode->SetCovered(false);
7765 				vnode->ref_count--;
7766 
7767 				// If the other node is an external vnode, we keep its link
7768 				// link around so we can put the reference later on. Otherwise
7769 				// we get rid of it right now.
7770 				if (coveredNode->mount == mount) {
7771 					vnode->covers = NULL;
7772 					coveredNode->ref_count--;
7773 				}
7774 			}
7775 		} else if (Vnode* coveringNode = vnode->covered_by) {
7776 			// We only have a covering vnode. Remove its link to us.
7777 			coveringNode->covers = NULL;
7778 			coveringNode->SetCovering(false);
7779 			vnode->ref_count--;
7780 
7781 			// If the other node is an external vnode, we keep its link
7782 			// link around so we can put the reference later on. Otherwise
7783 			// we get rid of it right now.
7784 			if (coveringNode->mount == mount) {
7785 				vnode->covered_by = NULL;
7786 				coveringNode->ref_count--;
7787 			}
7788 		}
7789 
7790 		vnode->SetBusy(true);
7791 		vnode_to_be_freed(vnode);
7792 	}
7793 
7794 	vnodesWriteLocker.Unlock();
7795 
7796 	// Free all vnodes associated with this mount.
7797 	// They will be removed from the mount list by free_vnode(), so
7798 	// we don't have to do this.
7799 	while (struct vnode* vnode = mount->vnodes.Head()) {
7800 		// Put the references to external covered/covering vnodes we kept above.
7801 		if (Vnode* coveredNode = vnode->covers)
7802 			put_vnode(coveredNode);
7803 		if (Vnode* coveringNode = vnode->covered_by)
7804 			put_vnode(coveringNode);
7805 
7806 		free_vnode(vnode, false);
7807 	}
7808 
7809 	// remove the mount structure from the hash table
7810 	mutex_lock(&sMountMutex);
7811 	sMountsTable->Remove(mount);
7812 	mutex_unlock(&sMountMutex);
7813 
7814 	mountOpLocker.Unlock();
7815 
7816 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7817 	notify_unmount(mount->id);
7818 
7819 	// dereference the partition and mark it unmounted
7820 	if (partition) {
7821 		partition->SetVolumeID(-1);
7822 		partition->SetMountCookie(NULL);
7823 
7824 		if (mount->owns_file_device)
7825 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7826 		partition->Unregister();
7827 	}
7828 
7829 	delete mount;
7830 	return B_OK;
7831 }
7832 
7833 
7834 static status_t
7835 fs_sync(dev_t device)
7836 {
7837 	struct fs_mount* mount;
7838 	status_t status = get_mount(device, &mount);
7839 	if (status != B_OK)
7840 		return status;
7841 
7842 	struct vnode marker;
7843 	memset(&marker, 0, sizeof(marker));
7844 	marker.SetBusy(true);
7845 	marker.SetRemoved(true);
7846 
7847 	// First, synchronize all file caches
7848 
7849 	while (true) {
7850 		WriteLocker locker(sVnodeLock);
7851 			// Note: That's the easy way. Which is probably OK for sync(),
7852 			// since it's a relatively rare call and doesn't need to allow for
7853 			// a lot of concurrency. Using a read lock would be possible, but
7854 			// also more involved, since we had to lock the individual nodes
7855 			// and take care of the locking order, which we might not want to
7856 			// do while holding fs_mount::rlock.
7857 
7858 		// synchronize access to vnode list
7859 		recursive_lock_lock(&mount->rlock);
7860 
7861 		struct vnode* vnode;
7862 		if (!marker.IsRemoved()) {
7863 			vnode = mount->vnodes.GetNext(&marker);
7864 			mount->vnodes.Remove(&marker);
7865 			marker.SetRemoved(true);
7866 		} else
7867 			vnode = mount->vnodes.First();
7868 
7869 		while (vnode != NULL && (vnode->cache == NULL
7870 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7871 			// TODO: we could track writes (and writable mapped vnodes)
7872 			//	and have a simple flag that we could test for here
7873 			vnode = mount->vnodes.GetNext(vnode);
7874 		}
7875 
7876 		if (vnode != NULL) {
7877 			// insert marker vnode again
7878 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7879 			marker.SetRemoved(false);
7880 		}
7881 
7882 		recursive_lock_unlock(&mount->rlock);
7883 
7884 		if (vnode == NULL)
7885 			break;
7886 
7887 		vnode = lookup_vnode(mount->id, vnode->id);
7888 		if (vnode == NULL || vnode->IsBusy())
7889 			continue;
7890 
7891 		if (vnode->ref_count == 0) {
7892 			// this vnode has been unused before
7893 			vnode_used(vnode);
7894 		}
7895 		inc_vnode_ref_count(vnode);
7896 
7897 		locker.Unlock();
7898 
7899 		if (vnode->cache != NULL && !vnode->IsRemoved())
7900 			vnode->cache->WriteModified();
7901 
7902 		put_vnode(vnode);
7903 	}
7904 
7905 	// And then, let the file systems do their synchronizing work
7906 
7907 	if (HAS_FS_MOUNT_CALL(mount, sync))
7908 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7909 
7910 	put_mount(mount);
7911 	return status;
7912 }
7913 
7914 
7915 static status_t
7916 fs_read_info(dev_t device, struct fs_info* info)
7917 {
7918 	struct fs_mount* mount;
7919 	status_t status = get_mount(device, &mount);
7920 	if (status != B_OK)
7921 		return status;
7922 
7923 	memset(info, 0, sizeof(struct fs_info));
7924 
7925 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7926 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7927 
7928 	// fill in info the file system doesn't (have to) know about
7929 	if (status == B_OK) {
7930 		info->dev = mount->id;
7931 		info->root = mount->root_vnode->id;
7932 
7933 		fs_volume* volume = mount->volume;
7934 		while (volume->super_volume != NULL)
7935 			volume = volume->super_volume;
7936 
7937 		strlcpy(info->fsh_name, volume->file_system_name,
7938 			sizeof(info->fsh_name));
7939 		if (mount->device_name != NULL) {
7940 			strlcpy(info->device_name, mount->device_name,
7941 				sizeof(info->device_name));
7942 		}
7943 	}
7944 
7945 	// if the call is not supported by the file system, there are still
7946 	// the parts that we filled out ourselves
7947 
7948 	put_mount(mount);
7949 	return status;
7950 }
7951 
7952 
7953 static status_t
7954 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7955 {
7956 	struct fs_mount* mount;
7957 	status_t status = get_mount(device, &mount);
7958 	if (status != B_OK)
7959 		return status;
7960 
7961 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7962 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7963 	else
7964 		status = B_READ_ONLY_DEVICE;
7965 
7966 	put_mount(mount);
7967 	return status;
7968 }
7969 
7970 
7971 static dev_t
7972 fs_next_device(int32* _cookie)
7973 {
7974 	struct fs_mount* mount = NULL;
7975 	dev_t device = *_cookie;
7976 
7977 	mutex_lock(&sMountMutex);
7978 
7979 	// Since device IDs are assigned sequentially, this algorithm
7980 	// does work good enough. It makes sure that the device list
7981 	// returned is sorted, and that no device is skipped when an
7982 	// already visited device got unmounted.
7983 
7984 	while (device < sNextMountID) {
7985 		mount = find_mount(device++);
7986 		if (mount != NULL && mount->volume->private_volume != NULL)
7987 			break;
7988 	}
7989 
7990 	*_cookie = device;
7991 
7992 	if (mount != NULL)
7993 		device = mount->id;
7994 	else
7995 		device = B_BAD_VALUE;
7996 
7997 	mutex_unlock(&sMountMutex);
7998 
7999 	return device;
8000 }
8001 
8002 
8003 ssize_t
8004 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8005 	void *buffer, size_t readBytes)
8006 {
8007 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8008 	if (attrFD < 0)
8009 		return attrFD;
8010 
8011 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8012 
8013 	_kern_close(attrFD);
8014 
8015 	return bytesRead;
8016 }
8017 
8018 
8019 static status_t
8020 get_cwd(char* buffer, size_t size, bool kernel)
8021 {
8022 	// Get current working directory from io context
8023 	struct io_context* context = get_current_io_context(kernel);
8024 	status_t status;
8025 
8026 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8027 
8028 	mutex_lock(&context->io_mutex);
8029 
8030 	struct vnode* vnode = context->cwd;
8031 	if (vnode)
8032 		inc_vnode_ref_count(vnode);
8033 
8034 	mutex_unlock(&context->io_mutex);
8035 
8036 	if (vnode) {
8037 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8038 		put_vnode(vnode);
8039 	} else
8040 		status = B_ERROR;
8041 
8042 	return status;
8043 }
8044 
8045 
8046 static status_t
8047 set_cwd(int fd, char* path, bool kernel)
8048 {
8049 	struct io_context* context;
8050 	struct vnode* vnode = NULL;
8051 	struct vnode* oldDirectory;
8052 	status_t status;
8053 
8054 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8055 
8056 	// Get vnode for passed path, and bail if it failed
8057 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8058 	if (status < 0)
8059 		return status;
8060 
8061 	if (!S_ISDIR(vnode->Type())) {
8062 		// nope, can't cwd to here
8063 		status = B_NOT_A_DIRECTORY;
8064 		goto err;
8065 	}
8066 
8067 	// We need to have the permission to enter the directory, too
8068 	if (HAS_FS_CALL(vnode, access)) {
8069 		status = FS_CALL(vnode, access, X_OK);
8070 		if (status != B_OK)
8071 			goto err;
8072 	}
8073 
8074 	// Get current io context and lock
8075 	context = get_current_io_context(kernel);
8076 	mutex_lock(&context->io_mutex);
8077 
8078 	// save the old current working directory first
8079 	oldDirectory = context->cwd;
8080 	context->cwd = vnode;
8081 
8082 	mutex_unlock(&context->io_mutex);
8083 
8084 	if (oldDirectory)
8085 		put_vnode(oldDirectory);
8086 
8087 	return B_NO_ERROR;
8088 
8089 err:
8090 	put_vnode(vnode);
8091 	return status;
8092 }
8093 
8094 
8095 //	#pragma mark - kernel mirrored syscalls
8096 
8097 
8098 dev_t
8099 _kern_mount(const char* path, const char* device, const char* fsName,
8100 	uint32 flags, const char* args, size_t argsLength)
8101 {
8102 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8103 	if (pathBuffer.InitCheck() != B_OK)
8104 		return B_NO_MEMORY;
8105 
8106 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8107 }
8108 
8109 
8110 status_t
8111 _kern_unmount(const char* path, uint32 flags)
8112 {
8113 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8114 	if (pathBuffer.InitCheck() != B_OK)
8115 		return B_NO_MEMORY;
8116 
8117 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8118 }
8119 
8120 
8121 status_t
8122 _kern_read_fs_info(dev_t device, struct fs_info* info)
8123 {
8124 	if (info == NULL)
8125 		return B_BAD_VALUE;
8126 
8127 	return fs_read_info(device, info);
8128 }
8129 
8130 
8131 status_t
8132 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8133 {
8134 	if (info == NULL)
8135 		return B_BAD_VALUE;
8136 
8137 	return fs_write_info(device, info, mask);
8138 }
8139 
8140 
8141 status_t
8142 _kern_sync(void)
8143 {
8144 	// Note: _kern_sync() is also called from _user_sync()
8145 	int32 cookie = 0;
8146 	dev_t device;
8147 	while ((device = next_dev(&cookie)) >= 0) {
8148 		status_t status = fs_sync(device);
8149 		if (status != B_OK && status != B_BAD_VALUE) {
8150 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8151 				strerror(status));
8152 		}
8153 	}
8154 
8155 	return B_OK;
8156 }
8157 
8158 
8159 dev_t
8160 _kern_next_device(int32* _cookie)
8161 {
8162 	return fs_next_device(_cookie);
8163 }
8164 
8165 
8166 status_t
8167 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8168 	size_t infoSize)
8169 {
8170 	if (infoSize != sizeof(fd_info))
8171 		return B_BAD_VALUE;
8172 
8173 	// get the team
8174 	Team* team = Team::Get(teamID);
8175 	if (team == NULL)
8176 		return B_BAD_TEAM_ID;
8177 	BReference<Team> teamReference(team, true);
8178 
8179 	// now that we have a team reference, its I/O context won't go away
8180 	io_context* context = team->io_context;
8181 	MutexLocker contextLocker(context->io_mutex);
8182 
8183 	uint32 slot = *_cookie;
8184 
8185 	struct file_descriptor* descriptor;
8186 	while (slot < context->table_size
8187 		&& (descriptor = context->fds[slot]) == NULL) {
8188 		slot++;
8189 	}
8190 
8191 	if (slot >= context->table_size)
8192 		return B_ENTRY_NOT_FOUND;
8193 
8194 	info->number = slot;
8195 	info->open_mode = descriptor->open_mode;
8196 
8197 	struct vnode* vnode = fd_vnode(descriptor);
8198 	if (vnode != NULL) {
8199 		info->device = vnode->device;
8200 		info->node = vnode->id;
8201 	} else if (descriptor->u.mount != NULL) {
8202 		info->device = descriptor->u.mount->id;
8203 		info->node = -1;
8204 	}
8205 
8206 	*_cookie = slot + 1;
8207 	return B_OK;
8208 }
8209 
8210 
8211 int
8212 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8213 	int perms)
8214 {
8215 	if ((openMode & O_CREAT) != 0) {
8216 		return file_create_entry_ref(device, inode, name, openMode, perms,
8217 			true);
8218 	}
8219 
8220 	return file_open_entry_ref(device, inode, name, openMode, true);
8221 }
8222 
8223 
8224 /*!	\brief Opens a node specified by a FD + path pair.
8225 
8226 	At least one of \a fd and \a path must be specified.
8227 	If only \a fd is given, the function opens the node identified by this
8228 	FD. If only a path is given, this path is opened. If both are given and
8229 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8230 	of the directory (!) identified by \a fd.
8231 
8232 	\param fd The FD. May be < 0.
8233 	\param path The absolute or relative path. May be \c NULL.
8234 	\param openMode The open mode.
8235 	\return A FD referring to the newly opened node, or an error code,
8236 			if an error occurs.
8237 */
8238 int
8239 _kern_open(int fd, const char* path, int openMode, int perms)
8240 {
8241 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8242 	if (pathBuffer.InitCheck() != B_OK)
8243 		return B_NO_MEMORY;
8244 
8245 	if ((openMode & O_CREAT) != 0)
8246 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8247 
8248 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8249 }
8250 
8251 
8252 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8253 
8254 	The supplied name may be \c NULL, in which case directory identified
8255 	by \a device and \a inode will be opened. Otherwise \a device and
8256 	\a inode identify the parent directory of the directory to be opened
8257 	and \a name its entry name.
8258 
8259 	\param device If \a name is specified the ID of the device the parent
8260 		   directory of the directory to be opened resides on, otherwise
8261 		   the device of the directory itself.
8262 	\param inode If \a name is specified the node ID of the parent
8263 		   directory of the directory to be opened, otherwise node ID of the
8264 		   directory itself.
8265 	\param name The entry name of the directory to be opened. If \c NULL,
8266 		   the \a device + \a inode pair identify the node to be opened.
8267 	\return The FD of the newly opened directory or an error code, if
8268 			something went wrong.
8269 */
8270 int
8271 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8272 {
8273 	return dir_open_entry_ref(device, inode, name, true);
8274 }
8275 
8276 
8277 /*!	\brief Opens a directory specified by a FD + path pair.
8278 
8279 	At least one of \a fd and \a path must be specified.
8280 	If only \a fd is given, the function opens the directory identified by this
8281 	FD. If only a path is given, this path is opened. If both are given and
8282 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8283 	of the directory (!) identified by \a fd.
8284 
8285 	\param fd The FD. May be < 0.
8286 	\param path The absolute or relative path. May be \c NULL.
8287 	\return A FD referring to the newly opened directory, or an error code,
8288 			if an error occurs.
8289 */
8290 int
8291 _kern_open_dir(int fd, const char* path)
8292 {
8293 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8294 	if (pathBuffer.InitCheck() != B_OK)
8295 		return B_NO_MEMORY;
8296 
8297 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8298 }
8299 
8300 
8301 status_t
8302 _kern_fcntl(int fd, int op, size_t argument)
8303 {
8304 	return common_fcntl(fd, op, argument, true);
8305 }
8306 
8307 
8308 status_t
8309 _kern_fsync(int fd)
8310 {
8311 	return common_sync(fd, true);
8312 }
8313 
8314 
8315 status_t
8316 _kern_lock_node(int fd)
8317 {
8318 	return common_lock_node(fd, true);
8319 }
8320 
8321 
8322 status_t
8323 _kern_unlock_node(int fd)
8324 {
8325 	return common_unlock_node(fd, true);
8326 }
8327 
8328 
8329 status_t
8330 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8331 	int perms)
8332 {
8333 	return dir_create_entry_ref(device, inode, name, perms, true);
8334 }
8335 
8336 
8337 /*!	\brief Creates a directory specified by a FD + path pair.
8338 
8339 	\a path must always be specified (it contains the name of the new directory
8340 	at least). If only a path is given, this path identifies the location at
8341 	which the directory shall be created. If both \a fd and \a path are given
8342 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8343 	of the directory (!) identified by \a fd.
8344 
8345 	\param fd The FD. May be < 0.
8346 	\param path The absolute or relative path. Must not be \c NULL.
8347 	\param perms The access permissions the new directory shall have.
8348 	\return \c B_OK, if the directory has been created successfully, another
8349 			error code otherwise.
8350 */
8351 status_t
8352 _kern_create_dir(int fd, const char* path, int perms)
8353 {
8354 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8355 	if (pathBuffer.InitCheck() != B_OK)
8356 		return B_NO_MEMORY;
8357 
8358 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8359 }
8360 
8361 
8362 status_t
8363 _kern_remove_dir(int fd, const char* path)
8364 {
8365 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8366 	if (pathBuffer.InitCheck() != B_OK)
8367 		return B_NO_MEMORY;
8368 
8369 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8370 }
8371 
8372 
8373 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8374 
8375 	At least one of \a fd and \a path must be specified.
8376 	If only \a fd is given, the function the symlink to be read is the node
8377 	identified by this FD. If only a path is given, this path identifies the
8378 	symlink to be read. If both are given and the path is absolute, \a fd is
8379 	ignored; a relative path is reckoned off of the directory (!) identified
8380 	by \a fd.
8381 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8382 	will still be updated to reflect the required buffer size.
8383 
8384 	\param fd The FD. May be < 0.
8385 	\param path The absolute or relative path. May be \c NULL.
8386 	\param buffer The buffer into which the contents of the symlink shall be
8387 		   written.
8388 	\param _bufferSize A pointer to the size of the supplied buffer.
8389 	\return The length of the link on success or an appropriate error code
8390 */
8391 status_t
8392 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8393 {
8394 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8395 	if (pathBuffer.InitCheck() != B_OK)
8396 		return B_NO_MEMORY;
8397 
8398 	return common_read_link(fd, pathBuffer.LockBuffer(),
8399 		buffer, _bufferSize, true);
8400 }
8401 
8402 
8403 /*!	\brief Creates a symlink specified by a FD + path pair.
8404 
8405 	\a path must always be specified (it contains the name of the new symlink
8406 	at least). If only a path is given, this path identifies the location at
8407 	which the symlink shall be created. If both \a fd and \a path are given and
8408 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8409 	of the directory (!) identified by \a fd.
8410 
8411 	\param fd The FD. May be < 0.
8412 	\param toPath The absolute or relative path. Must not be \c NULL.
8413 	\param mode The access permissions the new symlink shall have.
8414 	\return \c B_OK, if the symlink has been created successfully, another
8415 			error code otherwise.
8416 */
8417 status_t
8418 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8419 {
8420 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8421 	if (pathBuffer.InitCheck() != B_OK)
8422 		return B_NO_MEMORY;
8423 
8424 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8425 		toPath, mode, true);
8426 }
8427 
8428 
8429 status_t
8430 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8431 	bool traverseLeafLink)
8432 {
8433 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8434 	KPath toPathBuffer(toPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8435 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8436 		return B_NO_MEMORY;
8437 
8438 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8439 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8440 }
8441 
8442 
8443 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8444 
8445 	\a path must always be specified (it contains at least the name of the entry
8446 	to be deleted). If only a path is given, this path identifies the entry
8447 	directly. If both \a fd and \a path are given and the path is absolute,
8448 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8449 	identified by \a fd.
8450 
8451 	\param fd The FD. May be < 0.
8452 	\param path The absolute or relative path. Must not be \c NULL.
8453 	\return \c B_OK, if the entry has been removed successfully, another
8454 			error code otherwise.
8455 */
8456 status_t
8457 _kern_unlink(int fd, const char* path)
8458 {
8459 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8460 	if (pathBuffer.InitCheck() != B_OK)
8461 		return B_NO_MEMORY;
8462 
8463 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8464 }
8465 
8466 
8467 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8468 		   by another FD + path pair.
8469 
8470 	\a oldPath and \a newPath must always be specified (they contain at least
8471 	the name of the entry). If only a path is given, this path identifies the
8472 	entry directly. If both a FD and a path are given and the path is absolute,
8473 	the FD is ignored; a relative path is reckoned off of the directory (!)
8474 	identified by the respective FD.
8475 
8476 	\param oldFD The FD of the old location. May be < 0.
8477 	\param oldPath The absolute or relative path of the old location. Must not
8478 		   be \c NULL.
8479 	\param newFD The FD of the new location. May be < 0.
8480 	\param newPath The absolute or relative path of the new location. Must not
8481 		   be \c NULL.
8482 	\return \c B_OK, if the entry has been moved successfully, another
8483 			error code otherwise.
8484 */
8485 status_t
8486 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8487 {
8488 	KPath oldPathBuffer(oldPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8489 	KPath newPathBuffer(newPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8490 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8491 		return B_NO_MEMORY;
8492 
8493 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8494 		newFD, newPathBuffer.LockBuffer(), true);
8495 }
8496 
8497 
8498 status_t
8499 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8500 {
8501 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8502 	if (pathBuffer.InitCheck() != B_OK)
8503 		return B_NO_MEMORY;
8504 
8505 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8506 		true);
8507 }
8508 
8509 
8510 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8511 
8512 	If only \a fd is given, the stat operation associated with the type
8513 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8514 	given, this path identifies the entry for whose node to retrieve the
8515 	stat data. If both \a fd and \a path are given and the path is absolute,
8516 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8517 	identified by \a fd and specifies the entry whose stat data shall be
8518 	retrieved.
8519 
8520 	\param fd The FD. May be < 0.
8521 	\param path The absolute or relative path. Must not be \c NULL.
8522 	\param traverseLeafLink If \a path is given, \c true specifies that the
8523 		   function shall not stick to symlinks, but traverse them.
8524 	\param stat The buffer the stat data shall be written into.
8525 	\param statSize The size of the supplied stat buffer.
8526 	\return \c B_OK, if the the stat data have been read successfully, another
8527 			error code otherwise.
8528 */
8529 status_t
8530 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8531 	struct stat* stat, size_t statSize)
8532 {
8533 	struct stat completeStat;
8534 	struct stat* originalStat = NULL;
8535 	status_t status;
8536 
8537 	if (statSize > sizeof(struct stat))
8538 		return B_BAD_VALUE;
8539 
8540 	// this supports different stat extensions
8541 	if (statSize < sizeof(struct stat)) {
8542 		originalStat = stat;
8543 		stat = &completeStat;
8544 	}
8545 
8546 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8547 
8548 	if (status == B_OK && originalStat != NULL)
8549 		memcpy(originalStat, stat, statSize);
8550 
8551 	return status;
8552 }
8553 
8554 
8555 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8556 
8557 	If only \a fd is given, the stat operation associated with the type
8558 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8559 	given, this path identifies the entry for whose node to write the
8560 	stat data. If both \a fd and \a path are given and the path is absolute,
8561 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8562 	identified by \a fd and specifies the entry whose stat data shall be
8563 	written.
8564 
8565 	\param fd The FD. May be < 0.
8566 	\param path The absolute or relative path. May be \c NULL.
8567 	\param traverseLeafLink If \a path is given, \c true specifies that the
8568 		   function shall not stick to symlinks, but traverse them.
8569 	\param stat The buffer containing the stat data to be written.
8570 	\param statSize The size of the supplied stat buffer.
8571 	\param statMask A mask specifying which parts of the stat data shall be
8572 		   written.
8573 	\return \c B_OK, if the the stat data have been written successfully,
8574 			another error code otherwise.
8575 */
8576 status_t
8577 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8578 	const struct stat* stat, size_t statSize, int statMask)
8579 {
8580 	struct stat completeStat;
8581 
8582 	if (statSize > sizeof(struct stat))
8583 		return B_BAD_VALUE;
8584 
8585 	// this supports different stat extensions
8586 	if (statSize < sizeof(struct stat)) {
8587 		memset((uint8*)&completeStat + statSize, 0,
8588 			sizeof(struct stat) - statSize);
8589 		memcpy(&completeStat, stat, statSize);
8590 		stat = &completeStat;
8591 	}
8592 
8593 	status_t status;
8594 
8595 	if (path != NULL) {
8596 		// path given: write the stat of the node referred to by (fd, path)
8597 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8598 		if (pathBuffer.InitCheck() != B_OK)
8599 			return B_NO_MEMORY;
8600 
8601 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8602 			traverseLeafLink, stat, statMask, true);
8603 	} else {
8604 		// no path given: get the FD and use the FD operation
8605 		struct file_descriptor* descriptor
8606 			= get_fd(get_current_io_context(true), fd);
8607 		if (descriptor == NULL)
8608 			return B_FILE_ERROR;
8609 
8610 		if (descriptor->ops->fd_write_stat)
8611 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8612 		else
8613 			status = B_UNSUPPORTED;
8614 
8615 		put_fd(descriptor);
8616 	}
8617 
8618 	return status;
8619 }
8620 
8621 
8622 int
8623 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8624 {
8625 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8626 	if (pathBuffer.InitCheck() != B_OK)
8627 		return B_NO_MEMORY;
8628 
8629 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8630 }
8631 
8632 
8633 int
8634 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8635 	int openMode)
8636 {
8637 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8638 	if (pathBuffer.InitCheck() != B_OK)
8639 		return B_NO_MEMORY;
8640 
8641 	if ((openMode & O_CREAT) != 0) {
8642 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8643 			true);
8644 	}
8645 
8646 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8647 }
8648 
8649 
8650 status_t
8651 _kern_remove_attr(int fd, const char* name)
8652 {
8653 	return attr_remove(fd, name, true);
8654 }
8655 
8656 
8657 status_t
8658 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8659 	const char* toName)
8660 {
8661 	return attr_rename(fromFile, fromName, toFile, toName, true);
8662 }
8663 
8664 
8665 int
8666 _kern_open_index_dir(dev_t device)
8667 {
8668 	return index_dir_open(device, true);
8669 }
8670 
8671 
8672 status_t
8673 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8674 {
8675 	return index_create(device, name, type, flags, true);
8676 }
8677 
8678 
8679 status_t
8680 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8681 {
8682 	return index_name_read_stat(device, name, stat, true);
8683 }
8684 
8685 
8686 status_t
8687 _kern_remove_index(dev_t device, const char* name)
8688 {
8689 	return index_remove(device, name, true);
8690 }
8691 
8692 
8693 status_t
8694 _kern_getcwd(char* buffer, size_t size)
8695 {
8696 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8697 
8698 	// Call vfs to get current working directory
8699 	return get_cwd(buffer, size, true);
8700 }
8701 
8702 
8703 status_t
8704 _kern_setcwd(int fd, const char* path)
8705 {
8706 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8707 	if (pathBuffer.InitCheck() != B_OK)
8708 		return B_NO_MEMORY;
8709 
8710 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8711 }
8712 
8713 
8714 //	#pragma mark - userland syscalls
8715 
8716 
8717 dev_t
8718 _user_mount(const char* userPath, const char* userDevice,
8719 	const char* userFileSystem, uint32 flags, const char* userArgs,
8720 	size_t argsLength)
8721 {
8722 	char fileSystem[B_FILE_NAME_LENGTH];
8723 	KPath path, device;
8724 	char* args = NULL;
8725 	status_t status;
8726 
8727 	if (!IS_USER_ADDRESS(userPath)
8728 		|| !IS_USER_ADDRESS(userFileSystem)
8729 		|| !IS_USER_ADDRESS(userDevice))
8730 		return B_BAD_ADDRESS;
8731 
8732 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8733 		return B_NO_MEMORY;
8734 
8735 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8736 		return B_BAD_ADDRESS;
8737 
8738 	if (userFileSystem != NULL
8739 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8740 		return B_BAD_ADDRESS;
8741 
8742 	if (userDevice != NULL
8743 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8744 			< B_OK)
8745 		return B_BAD_ADDRESS;
8746 
8747 	if (userArgs != NULL && argsLength > 0) {
8748 		if (!IS_USER_ADDRESS(userArgs))
8749 			return B_BAD_ADDRESS;
8750 
8751 		// this is a safety restriction
8752 		if (argsLength >= 65536)
8753 			return B_NAME_TOO_LONG;
8754 
8755 		args = (char*)malloc(argsLength + 1);
8756 		if (args == NULL)
8757 			return B_NO_MEMORY;
8758 
8759 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8760 			free(args);
8761 			return B_BAD_ADDRESS;
8762 		}
8763 	}
8764 	path.UnlockBuffer();
8765 	device.UnlockBuffer();
8766 
8767 	status = fs_mount(path.LockBuffer(),
8768 		userDevice != NULL ? device.Path() : NULL,
8769 		userFileSystem ? fileSystem : NULL, flags, args, false);
8770 
8771 	free(args);
8772 	return status;
8773 }
8774 
8775 
8776 status_t
8777 _user_unmount(const char* userPath, uint32 flags)
8778 {
8779 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8780 
8781 	if (!IS_USER_ADDRESS(userPath))
8782 		return B_BAD_ADDRESS;
8783 
8784 	if (pathBuffer.InitCheck() != B_OK)
8785 		return B_NO_MEMORY;
8786 
8787 	char* path = pathBuffer.LockBuffer();
8788 
8789 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8790 		return B_BAD_ADDRESS;
8791 
8792 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8793 }
8794 
8795 
8796 status_t
8797 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8798 {
8799 	struct fs_info info;
8800 	status_t status;
8801 
8802 	if (userInfo == NULL)
8803 		return B_BAD_VALUE;
8804 
8805 	if (!IS_USER_ADDRESS(userInfo))
8806 		return B_BAD_ADDRESS;
8807 
8808 	status = fs_read_info(device, &info);
8809 	if (status != B_OK)
8810 		return status;
8811 
8812 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8813 		return B_BAD_ADDRESS;
8814 
8815 	return B_OK;
8816 }
8817 
8818 
8819 status_t
8820 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8821 {
8822 	struct fs_info info;
8823 
8824 	if (userInfo == NULL)
8825 		return B_BAD_VALUE;
8826 
8827 	if (!IS_USER_ADDRESS(userInfo)
8828 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8829 		return B_BAD_ADDRESS;
8830 
8831 	return fs_write_info(device, &info, mask);
8832 }
8833 
8834 
8835 dev_t
8836 _user_next_device(int32* _userCookie)
8837 {
8838 	int32 cookie;
8839 	dev_t device;
8840 
8841 	if (!IS_USER_ADDRESS(_userCookie)
8842 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8843 		return B_BAD_ADDRESS;
8844 
8845 	device = fs_next_device(&cookie);
8846 
8847 	if (device >= B_OK) {
8848 		// update user cookie
8849 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8850 			return B_BAD_ADDRESS;
8851 	}
8852 
8853 	return device;
8854 }
8855 
8856 
8857 status_t
8858 _user_sync(void)
8859 {
8860 	return _kern_sync();
8861 }
8862 
8863 
8864 status_t
8865 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8866 	size_t infoSize)
8867 {
8868 	struct fd_info info;
8869 	uint32 cookie;
8870 
8871 	// only root can do this (or should root's group be enough?)
8872 	if (geteuid() != 0)
8873 		return B_NOT_ALLOWED;
8874 
8875 	if (infoSize != sizeof(fd_info))
8876 		return B_BAD_VALUE;
8877 
8878 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8879 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8880 		return B_BAD_ADDRESS;
8881 
8882 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8883 	if (status != B_OK)
8884 		return status;
8885 
8886 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8887 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8888 		return B_BAD_ADDRESS;
8889 
8890 	return status;
8891 }
8892 
8893 
8894 status_t
8895 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8896 	char* userPath, size_t pathLength)
8897 {
8898 	if (!IS_USER_ADDRESS(userPath))
8899 		return B_BAD_ADDRESS;
8900 
8901 	KPath path(B_PATH_NAME_LENGTH + 1);
8902 	if (path.InitCheck() != B_OK)
8903 		return B_NO_MEMORY;
8904 
8905 	// copy the leaf name onto the stack
8906 	char stackLeaf[B_FILE_NAME_LENGTH];
8907 	if (leaf != NULL) {
8908 		if (!IS_USER_ADDRESS(leaf))
8909 			return B_BAD_ADDRESS;
8910 
8911 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8912 		if (length < 0)
8913 			return length;
8914 		if (length >= B_FILE_NAME_LENGTH)
8915 			return B_NAME_TOO_LONG;
8916 
8917 		leaf = stackLeaf;
8918 	}
8919 
8920 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8921 		false, path.LockBuffer(), path.BufferSize());
8922 	if (status != B_OK)
8923 		return status;
8924 
8925 	path.UnlockBuffer();
8926 
8927 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8928 	if (length < 0)
8929 		return length;
8930 	if (length >= (int)pathLength)
8931 		return B_BUFFER_OVERFLOW;
8932 
8933 	return B_OK;
8934 }
8935 
8936 
8937 status_t
8938 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8939 {
8940 	if (userPath == NULL || buffer == NULL)
8941 		return B_BAD_VALUE;
8942 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8943 		return B_BAD_ADDRESS;
8944 
8945 	// copy path from userland
8946 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8947 	if (pathBuffer.InitCheck() != B_OK)
8948 		return B_NO_MEMORY;
8949 	char* path = pathBuffer.LockBuffer();
8950 
8951 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8952 		return B_BAD_ADDRESS;
8953 
8954 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8955 		false);
8956 	if (error != B_OK)
8957 		return error;
8958 
8959 	// copy back to userland
8960 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8961 	if (len < 0)
8962 		return len;
8963 	if (len >= B_PATH_NAME_LENGTH)
8964 		return B_BUFFER_OVERFLOW;
8965 
8966 	return B_OK;
8967 }
8968 
8969 
8970 int
8971 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8972 	int openMode, int perms)
8973 {
8974 	char name[B_FILE_NAME_LENGTH];
8975 
8976 	if (userName == NULL || device < 0 || inode < 0)
8977 		return B_BAD_VALUE;
8978 	if (!IS_USER_ADDRESS(userName)
8979 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8980 		return B_BAD_ADDRESS;
8981 
8982 	if ((openMode & O_CREAT) != 0) {
8983 		return file_create_entry_ref(device, inode, name, openMode, perms,
8984 			false);
8985 	}
8986 
8987 	return file_open_entry_ref(device, inode, name, openMode, false);
8988 }
8989 
8990 
8991 int
8992 _user_open(int fd, const char* userPath, int openMode, int perms)
8993 {
8994 	KPath path(B_PATH_NAME_LENGTH + 1);
8995 	if (path.InitCheck() != B_OK)
8996 		return B_NO_MEMORY;
8997 
8998 	char* buffer = path.LockBuffer();
8999 
9000 	if (!IS_USER_ADDRESS(userPath)
9001 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
9002 		return B_BAD_ADDRESS;
9003 
9004 	if ((openMode & O_CREAT) != 0)
9005 		return file_create(fd, buffer, openMode, perms, false);
9006 
9007 	return file_open(fd, buffer, openMode, false);
9008 }
9009 
9010 
9011 int
9012 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9013 {
9014 	if (userName != NULL) {
9015 		char name[B_FILE_NAME_LENGTH];
9016 
9017 		if (!IS_USER_ADDRESS(userName)
9018 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
9019 			return B_BAD_ADDRESS;
9020 
9021 		return dir_open_entry_ref(device, inode, name, false);
9022 	}
9023 	return dir_open_entry_ref(device, inode, NULL, false);
9024 }
9025 
9026 
9027 int
9028 _user_open_dir(int fd, const char* userPath)
9029 {
9030 	if (userPath == NULL)
9031 		return dir_open(fd, NULL, false);
9032 
9033 	KPath path(B_PATH_NAME_LENGTH + 1);
9034 	if (path.InitCheck() != B_OK)
9035 		return B_NO_MEMORY;
9036 
9037 	char* buffer = path.LockBuffer();
9038 
9039 	if (!IS_USER_ADDRESS(userPath)
9040 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
9041 		return B_BAD_ADDRESS;
9042 
9043 	return dir_open(fd, buffer, false);
9044 }
9045 
9046 
9047 /*!	\brief Opens a directory's parent directory and returns the entry name
9048 		   of the former.
9049 
9050 	Aside from that it returns the directory's entry name, this method is
9051 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9052 	equivalent, if \a userName is \c NULL.
9053 
9054 	If a name buffer is supplied and the name does not fit the buffer, the
9055 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9056 
9057 	\param fd A FD referring to a directory.
9058 	\param userName Buffer the directory's entry name shall be written into.
9059 		   May be \c NULL.
9060 	\param nameLength Size of the name buffer.
9061 	\return The file descriptor of the opened parent directory, if everything
9062 			went fine, an error code otherwise.
9063 */
9064 int
9065 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9066 {
9067 	bool kernel = false;
9068 
9069 	if (userName && !IS_USER_ADDRESS(userName))
9070 		return B_BAD_ADDRESS;
9071 
9072 	// open the parent dir
9073 	int parentFD = dir_open(fd, (char*)"..", kernel);
9074 	if (parentFD < 0)
9075 		return parentFD;
9076 	FDCloser fdCloser(parentFD, kernel);
9077 
9078 	if (userName) {
9079 		// get the vnodes
9080 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9081 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9082 		VNodePutter parentVNodePutter(parentVNode);
9083 		VNodePutter dirVNodePutter(dirVNode);
9084 		if (!parentVNode || !dirVNode)
9085 			return B_FILE_ERROR;
9086 
9087 		// get the vnode name
9088 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9089 		struct dirent* buffer = (struct dirent*)_buffer;
9090 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9091 			sizeof(_buffer), get_current_io_context(false));
9092 		if (status != B_OK)
9093 			return status;
9094 
9095 		// copy the name to the userland buffer
9096 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9097 		if (len < 0)
9098 			return len;
9099 		if (len >= (int)nameLength)
9100 			return B_BUFFER_OVERFLOW;
9101 	}
9102 
9103 	return fdCloser.Detach();
9104 }
9105 
9106 
9107 status_t
9108 _user_fcntl(int fd, int op, size_t argument)
9109 {
9110 	status_t status = common_fcntl(fd, op, argument, false);
9111 	if (op == F_SETLKW)
9112 		syscall_restart_handle_post(status);
9113 
9114 	return status;
9115 }
9116 
9117 
9118 status_t
9119 _user_fsync(int fd)
9120 {
9121 	return common_sync(fd, false);
9122 }
9123 
9124 
9125 status_t
9126 _user_flock(int fd, int operation)
9127 {
9128 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9129 
9130 	// Check if the operation is valid
9131 	switch (operation & ~LOCK_NB) {
9132 		case LOCK_UN:
9133 		case LOCK_SH:
9134 		case LOCK_EX:
9135 			break;
9136 
9137 		default:
9138 			return B_BAD_VALUE;
9139 	}
9140 
9141 	struct file_descriptor* descriptor;
9142 	struct vnode* vnode;
9143 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9144 	if (descriptor == NULL)
9145 		return B_FILE_ERROR;
9146 
9147 	if (descriptor->type != FDTYPE_FILE) {
9148 		put_fd(descriptor);
9149 		return B_BAD_VALUE;
9150 	}
9151 
9152 	struct flock flock;
9153 	flock.l_start = 0;
9154 	flock.l_len = OFF_MAX;
9155 	flock.l_whence = 0;
9156 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9157 
9158 	status_t status;
9159 	if ((operation & LOCK_UN) != 0) {
9160 		if (HAS_FS_CALL(vnode, release_lock))
9161 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9162 		else
9163 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9164 	} else {
9165 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9166 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9167 				(operation & LOCK_NB) == 0);
9168 		} else {
9169 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9170 				(operation & LOCK_NB) == 0);
9171 		}
9172 	}
9173 
9174 	syscall_restart_handle_post(status);
9175 
9176 	put_fd(descriptor);
9177 	return status;
9178 }
9179 
9180 
9181 status_t
9182 _user_lock_node(int fd)
9183 {
9184 	return common_lock_node(fd, false);
9185 }
9186 
9187 
9188 status_t
9189 _user_unlock_node(int fd)
9190 {
9191 	return common_unlock_node(fd, false);
9192 }
9193 
9194 
9195 status_t
9196 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9197 	int perms)
9198 {
9199 	char name[B_FILE_NAME_LENGTH];
9200 	status_t status;
9201 
9202 	if (!IS_USER_ADDRESS(userName))
9203 		return B_BAD_ADDRESS;
9204 
9205 	status = user_strlcpy(name, userName, sizeof(name));
9206 	if (status < 0)
9207 		return status;
9208 
9209 	return dir_create_entry_ref(device, inode, name, perms, false);
9210 }
9211 
9212 
9213 status_t
9214 _user_create_dir(int fd, const char* userPath, int perms)
9215 {
9216 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9217 	if (pathBuffer.InitCheck() != B_OK)
9218 		return B_NO_MEMORY;
9219 
9220 	char* path = pathBuffer.LockBuffer();
9221 
9222 	if (!IS_USER_ADDRESS(userPath)
9223 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9224 		return B_BAD_ADDRESS;
9225 
9226 	return dir_create(fd, path, perms, false);
9227 }
9228 
9229 
9230 status_t
9231 _user_remove_dir(int fd, const char* userPath)
9232 {
9233 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9234 	if (pathBuffer.InitCheck() != B_OK)
9235 		return B_NO_MEMORY;
9236 
9237 	char* path = pathBuffer.LockBuffer();
9238 
9239 	if (userPath != NULL) {
9240 		if (!IS_USER_ADDRESS(userPath)
9241 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9242 			return B_BAD_ADDRESS;
9243 	}
9244 
9245 	return dir_remove(fd, userPath ? path : NULL, false);
9246 }
9247 
9248 
9249 status_t
9250 _user_read_link(int fd, const char* userPath, char* userBuffer,
9251 	size_t* userBufferSize)
9252 {
9253 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9254 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9255 		return B_NO_MEMORY;
9256 
9257 	size_t bufferSize;
9258 
9259 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9260 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9261 		return B_BAD_ADDRESS;
9262 
9263 	char* path = pathBuffer.LockBuffer();
9264 	char* buffer = linkBuffer.LockBuffer();
9265 
9266 	if (userPath) {
9267 		if (!IS_USER_ADDRESS(userPath)
9268 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9269 			return B_BAD_ADDRESS;
9270 
9271 		if (bufferSize > B_PATH_NAME_LENGTH)
9272 			bufferSize = B_PATH_NAME_LENGTH;
9273 	}
9274 
9275 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9276 		&bufferSize, false);
9277 
9278 	// we also update the bufferSize in case of errors
9279 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9280 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9281 		return B_BAD_ADDRESS;
9282 
9283 	if (status != B_OK)
9284 		return status;
9285 
9286 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9287 		return B_BAD_ADDRESS;
9288 
9289 	return B_OK;
9290 }
9291 
9292 
9293 status_t
9294 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9295 	int mode)
9296 {
9297 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9298 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9299 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9300 		return B_NO_MEMORY;
9301 
9302 	char* path = pathBuffer.LockBuffer();
9303 	char* toPath = toPathBuffer.LockBuffer();
9304 
9305 	if (!IS_USER_ADDRESS(userPath)
9306 		|| !IS_USER_ADDRESS(userToPath)
9307 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9308 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9309 		return B_BAD_ADDRESS;
9310 
9311 	return common_create_symlink(fd, path, toPath, mode, false);
9312 }
9313 
9314 
9315 status_t
9316 _user_create_link(int pathFD, const char* userPath, int toFD,
9317 	const char* userToPath, bool traverseLeafLink)
9318 {
9319 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9320 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9321 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9322 		return B_NO_MEMORY;
9323 
9324 	char* path = pathBuffer.LockBuffer();
9325 	char* toPath = toPathBuffer.LockBuffer();
9326 
9327 	if (!IS_USER_ADDRESS(userPath)
9328 		|| !IS_USER_ADDRESS(userToPath)
9329 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9330 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9331 		return B_BAD_ADDRESS;
9332 
9333 	status_t status = check_path(toPath);
9334 	if (status != B_OK)
9335 		return status;
9336 
9337 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9338 		false);
9339 }
9340 
9341 
9342 status_t
9343 _user_unlink(int fd, const char* userPath)
9344 {
9345 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9346 	if (pathBuffer.InitCheck() != B_OK)
9347 		return B_NO_MEMORY;
9348 
9349 	char* path = pathBuffer.LockBuffer();
9350 
9351 	if (!IS_USER_ADDRESS(userPath)
9352 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9353 		return B_BAD_ADDRESS;
9354 
9355 	return common_unlink(fd, path, false);
9356 }
9357 
9358 
9359 status_t
9360 _user_rename(int oldFD, const char* userOldPath, int newFD,
9361 	const char* userNewPath)
9362 {
9363 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9364 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9365 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9366 		return B_NO_MEMORY;
9367 
9368 	char* oldPath = oldPathBuffer.LockBuffer();
9369 	char* newPath = newPathBuffer.LockBuffer();
9370 
9371 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9372 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9373 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9374 		return B_BAD_ADDRESS;
9375 
9376 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9377 }
9378 
9379 
9380 status_t
9381 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9382 {
9383 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9384 	if (pathBuffer.InitCheck() != B_OK)
9385 		return B_NO_MEMORY;
9386 
9387 	char* path = pathBuffer.LockBuffer();
9388 
9389 	if (!IS_USER_ADDRESS(userPath)
9390 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9391 		return B_BAD_ADDRESS;
9392 	}
9393 
9394 	// split into directory vnode and filename path
9395 	char filename[B_FILE_NAME_LENGTH];
9396 	struct vnode* dir;
9397 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9398 	if (status != B_OK)
9399 		return status;
9400 
9401 	VNodePutter _(dir);
9402 
9403 	// the underlying FS needs to support creating FIFOs
9404 	if (!HAS_FS_CALL(dir, create_special_node))
9405 		return B_UNSUPPORTED;
9406 
9407 	// create the entry	-- the FIFO sub node is set up automatically
9408 	fs_vnode superVnode;
9409 	ino_t nodeID;
9410 	status = FS_CALL(dir, create_special_node, filename, NULL,
9411 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9412 
9413 	// create_special_node() acquired a reference for us that we don't need.
9414 	if (status == B_OK)
9415 		put_vnode(dir->mount->volume, nodeID);
9416 
9417 	return status;
9418 }
9419 
9420 
9421 status_t
9422 _user_create_pipe(int* userFDs)
9423 {
9424 	// rootfs should support creating FIFOs, but let's be sure
9425 	if (!HAS_FS_CALL(sRoot, create_special_node))
9426 		return B_UNSUPPORTED;
9427 
9428 	// create the node	-- the FIFO sub node is set up automatically
9429 	fs_vnode superVnode;
9430 	ino_t nodeID;
9431 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9432 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9433 	if (status != B_OK)
9434 		return status;
9435 
9436 	// We've got one reference to the node and need another one.
9437 	struct vnode* vnode;
9438 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9439 	if (status != B_OK) {
9440 		// that should not happen
9441 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9442 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9443 		return status;
9444 	}
9445 
9446 	// Everything looks good so far. Open two FDs for reading respectively
9447 	// writing.
9448 	int fds[2];
9449 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9450 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9451 
9452 	FDCloser closer0(fds[0], false);
9453 	FDCloser closer1(fds[1], false);
9454 
9455 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9456 
9457 	// copy FDs to userland
9458 	if (status == B_OK) {
9459 		if (!IS_USER_ADDRESS(userFDs)
9460 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9461 			status = B_BAD_ADDRESS;
9462 		}
9463 	}
9464 
9465 	// keep FDs, if everything went fine
9466 	if (status == B_OK) {
9467 		closer0.Detach();
9468 		closer1.Detach();
9469 	}
9470 
9471 	return status;
9472 }
9473 
9474 
9475 status_t
9476 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9477 {
9478 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9479 	if (pathBuffer.InitCheck() != B_OK)
9480 		return B_NO_MEMORY;
9481 
9482 	char* path = pathBuffer.LockBuffer();
9483 
9484 	if (!IS_USER_ADDRESS(userPath)
9485 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9486 		return B_BAD_ADDRESS;
9487 
9488 	return common_access(fd, path, mode, effectiveUserGroup, false);
9489 }
9490 
9491 
9492 status_t
9493 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9494 	struct stat* userStat, size_t statSize)
9495 {
9496 	struct stat stat;
9497 	status_t status;
9498 
9499 	if (statSize > sizeof(struct stat))
9500 		return B_BAD_VALUE;
9501 
9502 	if (!IS_USER_ADDRESS(userStat))
9503 		return B_BAD_ADDRESS;
9504 
9505 	if (userPath != NULL) {
9506 		// path given: get the stat of the node referred to by (fd, path)
9507 		if (!IS_USER_ADDRESS(userPath))
9508 			return B_BAD_ADDRESS;
9509 
9510 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9511 		if (pathBuffer.InitCheck() != B_OK)
9512 			return B_NO_MEMORY;
9513 
9514 		char* path = pathBuffer.LockBuffer();
9515 
9516 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9517 		if (length < B_OK)
9518 			return length;
9519 		if (length >= B_PATH_NAME_LENGTH)
9520 			return B_NAME_TOO_LONG;
9521 
9522 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9523 	} else {
9524 		// no path given: get the FD and use the FD operation
9525 		struct file_descriptor* descriptor
9526 			= get_fd(get_current_io_context(false), fd);
9527 		if (descriptor == NULL)
9528 			return B_FILE_ERROR;
9529 
9530 		if (descriptor->ops->fd_read_stat)
9531 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9532 		else
9533 			status = B_UNSUPPORTED;
9534 
9535 		put_fd(descriptor);
9536 	}
9537 
9538 	if (status != B_OK)
9539 		return status;
9540 
9541 	return user_memcpy(userStat, &stat, statSize);
9542 }
9543 
9544 
9545 status_t
9546 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9547 	const struct stat* userStat, size_t statSize, int statMask)
9548 {
9549 	if (statSize > sizeof(struct stat))
9550 		return B_BAD_VALUE;
9551 
9552 	struct stat stat;
9553 
9554 	if (!IS_USER_ADDRESS(userStat)
9555 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9556 		return B_BAD_ADDRESS;
9557 
9558 	// clear additional stat fields
9559 	if (statSize < sizeof(struct stat))
9560 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9561 
9562 	status_t status;
9563 
9564 	if (userPath != NULL) {
9565 		// path given: write the stat of the node referred to by (fd, path)
9566 		if (!IS_USER_ADDRESS(userPath))
9567 			return B_BAD_ADDRESS;
9568 
9569 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9570 		if (pathBuffer.InitCheck() != B_OK)
9571 			return B_NO_MEMORY;
9572 
9573 		char* path = pathBuffer.LockBuffer();
9574 
9575 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9576 		if (length < B_OK)
9577 			return length;
9578 		if (length >= B_PATH_NAME_LENGTH)
9579 			return B_NAME_TOO_LONG;
9580 
9581 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9582 			statMask, false);
9583 	} else {
9584 		// no path given: get the FD and use the FD operation
9585 		struct file_descriptor* descriptor
9586 			= get_fd(get_current_io_context(false), fd);
9587 		if (descriptor == NULL)
9588 			return B_FILE_ERROR;
9589 
9590 		if (descriptor->ops->fd_write_stat) {
9591 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9592 				statMask);
9593 		} else
9594 			status = B_UNSUPPORTED;
9595 
9596 		put_fd(descriptor);
9597 	}
9598 
9599 	return status;
9600 }
9601 
9602 
9603 int
9604 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9605 {
9606 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9607 	if (pathBuffer.InitCheck() != B_OK)
9608 		return B_NO_MEMORY;
9609 
9610 	char* path = pathBuffer.LockBuffer();
9611 
9612 	if (userPath != NULL) {
9613 		if (!IS_USER_ADDRESS(userPath)
9614 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9615 			return B_BAD_ADDRESS;
9616 	}
9617 
9618 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9619 }
9620 
9621 
9622 ssize_t
9623 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9624 	size_t readBytes)
9625 {
9626 	char attribute[B_FILE_NAME_LENGTH];
9627 
9628 	if (userAttribute == NULL)
9629 		return B_BAD_VALUE;
9630 	if (!IS_USER_ADDRESS(userAttribute)
9631 		|| user_strlcpy(attribute, userAttribute, sizeof(attribute)) < B_OK) {
9632 		return B_BAD_ADDRESS;
9633 	}
9634 
9635 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9636 	if (attr < 0)
9637 		return attr;
9638 
9639 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9640 	_user_close(attr);
9641 
9642 	return bytes;
9643 }
9644 
9645 
9646 ssize_t
9647 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9648 	const void* buffer, size_t writeBytes)
9649 {
9650 	char attribute[B_FILE_NAME_LENGTH];
9651 
9652 	if (userAttribute == NULL)
9653 		return B_BAD_VALUE;
9654 	if (!IS_USER_ADDRESS(userAttribute)
9655 		|| user_strlcpy(attribute, userAttribute, sizeof(attribute)) < B_OK) {
9656 		return B_BAD_ADDRESS;
9657 	}
9658 
9659 	// Try to support the BeOS typical truncation as well as the position
9660 	// argument
9661 	int attr = attr_create(fd, NULL, attribute, type,
9662 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9663 	if (attr < 0)
9664 		return attr;
9665 
9666 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9667 	_user_close(attr);
9668 
9669 	return bytes;
9670 }
9671 
9672 
9673 status_t
9674 _user_stat_attr(int fd, const char* userAttribute,
9675 	struct attr_info* userAttrInfo)
9676 {
9677 	char attribute[B_FILE_NAME_LENGTH];
9678 
9679 	if (userAttribute == NULL || userAttrInfo == NULL)
9680 		return B_BAD_VALUE;
9681 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo)
9682 		|| user_strlcpy(attribute, userAttribute, sizeof(attribute)) < B_OK) {
9683 		return B_BAD_ADDRESS;
9684 	}
9685 
9686 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9687 	if (attr < 0)
9688 		return attr;
9689 
9690 	struct file_descriptor* descriptor
9691 		= get_fd(get_current_io_context(false), attr);
9692 	if (descriptor == NULL) {
9693 		_user_close(attr);
9694 		return B_FILE_ERROR;
9695 	}
9696 
9697 	struct stat stat;
9698 	status_t status;
9699 	if (descriptor->ops->fd_read_stat)
9700 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9701 	else
9702 		status = B_UNSUPPORTED;
9703 
9704 	put_fd(descriptor);
9705 	_user_close(attr);
9706 
9707 	if (status == B_OK) {
9708 		attr_info info;
9709 		info.type = stat.st_type;
9710 		info.size = stat.st_size;
9711 
9712 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9713 			return B_BAD_ADDRESS;
9714 	}
9715 
9716 	return status;
9717 }
9718 
9719 
9720 int
9721 _user_open_attr(int fd, const char* userPath, const char* userName,
9722 	uint32 type, int openMode)
9723 {
9724 	char name[B_FILE_NAME_LENGTH];
9725 
9726 	if (!IS_USER_ADDRESS(userName)
9727 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9728 		return B_BAD_ADDRESS;
9729 
9730 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9731 	if (pathBuffer.InitCheck() != B_OK)
9732 		return B_NO_MEMORY;
9733 
9734 	char* path = pathBuffer.LockBuffer();
9735 
9736 	if (userPath != NULL) {
9737 		if (!IS_USER_ADDRESS(userPath)
9738 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9739 			return B_BAD_ADDRESS;
9740 	}
9741 
9742 	if ((openMode & O_CREAT) != 0) {
9743 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9744 			false);
9745 	}
9746 
9747 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9748 }
9749 
9750 
9751 status_t
9752 _user_remove_attr(int fd, const char* userName)
9753 {
9754 	char name[B_FILE_NAME_LENGTH];
9755 
9756 	if (!IS_USER_ADDRESS(userName)
9757 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9758 		return B_BAD_ADDRESS;
9759 
9760 	return attr_remove(fd, name, false);
9761 }
9762 
9763 
9764 status_t
9765 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9766 	const char* userToName)
9767 {
9768 	if (!IS_USER_ADDRESS(userFromName)
9769 		|| !IS_USER_ADDRESS(userToName))
9770 		return B_BAD_ADDRESS;
9771 
9772 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9773 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9774 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9775 		return B_NO_MEMORY;
9776 
9777 	char* fromName = fromNameBuffer.LockBuffer();
9778 	char* toName = toNameBuffer.LockBuffer();
9779 
9780 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9781 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9782 		return B_BAD_ADDRESS;
9783 
9784 	return attr_rename(fromFile, fromName, toFile, toName, false);
9785 }
9786 
9787 
9788 int
9789 _user_open_index_dir(dev_t device)
9790 {
9791 	return index_dir_open(device, false);
9792 }
9793 
9794 
9795 status_t
9796 _user_create_index(dev_t device, const char* userName, uint32 type,
9797 	uint32 flags)
9798 {
9799 	char name[B_FILE_NAME_LENGTH];
9800 
9801 	if (!IS_USER_ADDRESS(userName)
9802 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9803 		return B_BAD_ADDRESS;
9804 
9805 	return index_create(device, name, type, flags, false);
9806 }
9807 
9808 
9809 status_t
9810 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9811 {
9812 	char name[B_FILE_NAME_LENGTH];
9813 	struct stat stat;
9814 	status_t status;
9815 
9816 	if (!IS_USER_ADDRESS(userName)
9817 		|| !IS_USER_ADDRESS(userStat)
9818 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9819 		return B_BAD_ADDRESS;
9820 
9821 	status = index_name_read_stat(device, name, &stat, false);
9822 	if (status == B_OK) {
9823 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9824 			return B_BAD_ADDRESS;
9825 	}
9826 
9827 	return status;
9828 }
9829 
9830 
9831 status_t
9832 _user_remove_index(dev_t device, const char* userName)
9833 {
9834 	char name[B_FILE_NAME_LENGTH];
9835 
9836 	if (!IS_USER_ADDRESS(userName)
9837 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9838 		return B_BAD_ADDRESS;
9839 
9840 	return index_remove(device, name, false);
9841 }
9842 
9843 
9844 status_t
9845 _user_getcwd(char* userBuffer, size_t size)
9846 {
9847 	if (size == 0)
9848 		return B_BAD_VALUE;
9849 	if (!IS_USER_ADDRESS(userBuffer))
9850 		return B_BAD_ADDRESS;
9851 
9852 	if (size > kMaxPathLength)
9853 		size = kMaxPathLength;
9854 
9855 	KPath pathBuffer(size);
9856 	if (pathBuffer.InitCheck() != B_OK)
9857 		return B_NO_MEMORY;
9858 
9859 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9860 
9861 	char* path = pathBuffer.LockBuffer();
9862 
9863 	status_t status = get_cwd(path, size, false);
9864 	if (status != B_OK)
9865 		return status;
9866 
9867 	// Copy back the result
9868 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9869 		return B_BAD_ADDRESS;
9870 
9871 	return status;
9872 }
9873 
9874 
9875 status_t
9876 _user_setcwd(int fd, const char* userPath)
9877 {
9878 	TRACE(("user_setcwd: path = %p\n", userPath));
9879 
9880 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9881 	if (pathBuffer.InitCheck() != B_OK)
9882 		return B_NO_MEMORY;
9883 
9884 	char* path = pathBuffer.LockBuffer();
9885 
9886 	if (userPath != NULL) {
9887 		if (!IS_USER_ADDRESS(userPath)
9888 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9889 			return B_BAD_ADDRESS;
9890 	}
9891 
9892 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9893 }
9894 
9895 
9896 status_t
9897 _user_change_root(const char* userPath)
9898 {
9899 	// only root is allowed to chroot()
9900 	if (geteuid() != 0)
9901 		return B_NOT_ALLOWED;
9902 
9903 	// alloc path buffer
9904 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9905 	if (pathBuffer.InitCheck() != B_OK)
9906 		return B_NO_MEMORY;
9907 
9908 	// copy userland path to kernel
9909 	char* path = pathBuffer.LockBuffer();
9910 	if (userPath != NULL) {
9911 		if (!IS_USER_ADDRESS(userPath)
9912 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9913 			return B_BAD_ADDRESS;
9914 	}
9915 
9916 	// get the vnode
9917 	struct vnode* vnode;
9918 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9919 	if (status != B_OK)
9920 		return status;
9921 
9922 	// set the new root
9923 	struct io_context* context = get_current_io_context(false);
9924 	mutex_lock(&sIOContextRootLock);
9925 	struct vnode* oldRoot = context->root;
9926 	context->root = vnode;
9927 	mutex_unlock(&sIOContextRootLock);
9928 
9929 	put_vnode(oldRoot);
9930 
9931 	return B_OK;
9932 }
9933 
9934 
9935 int
9936 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9937 	uint32 flags, port_id port, int32 token)
9938 {
9939 	char* query;
9940 
9941 	if (device < 0 || userQuery == NULL || queryLength == 0)
9942 		return B_BAD_VALUE;
9943 
9944 	if (!IS_USER_ADDRESS(userQuery))
9945 		return B_BAD_ADDRESS;
9946 
9947 	// this is a safety restriction
9948 	if (queryLength >= 65536)
9949 		return B_NAME_TOO_LONG;
9950 
9951 	query = (char*)malloc(queryLength + 1);
9952 	if (query == NULL)
9953 		return B_NO_MEMORY;
9954 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9955 		free(query);
9956 		return B_BAD_ADDRESS;
9957 	}
9958 
9959 	int fd = query_open(device, query, flags, port, token, false);
9960 
9961 	free(query);
9962 	return fd;
9963 }
9964 
9965 
9966 #include "vfs_request_io.cpp"
9967