xref: /haiku/src/system/kernel/fs/vfs.cpp (revision f290b766707b386d72e2eaadd35cc3d999405077)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <vfs.h>
53 #include <vm/vm.h>
54 #include <vm/VMCache.h>
55 #include <wait_for_objects.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 typedef DoublyLinkedList<vnode> VnodeList;
116 
117 /*!	\brief Structure to manage a mounted file system
118 
119 	Note: The root_vnode and root_vnode->covers fields (what others?) are
120 	initialized in fs_mount() and not changed afterwards. That is as soon
121 	as the mount is mounted and it is made sure it won't be unmounted
122 	(e.g. by holding a reference to a vnode of that mount) (read) access
123 	to those fields is always safe, even without additional locking. Morever
124 	while mounted the mount holds a reference to the root_vnode->covers vnode,
125 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
126 	safe if a reference to vnode is held (note that for the root mount
127 	root_vnode->covers is NULL, though).
128 */
129 struct fs_mount {
130 	fs_mount()
131 		:
132 		volume(NULL),
133 		device_name(NULL)
134 	{
135 		recursive_lock_init(&rlock, "mount rlock");
136 	}
137 
138 	~fs_mount()
139 	{
140 		recursive_lock_destroy(&rlock);
141 		free(device_name);
142 
143 		while (volume) {
144 			fs_volume* superVolume = volume->super_volume;
145 
146 			if (volume->file_system != NULL)
147 				put_module(volume->file_system->info.name);
148 
149 			free(volume->file_system_name);
150 			free(volume);
151 			volume = superVolume;
152 		}
153 	}
154 
155 	struct fs_mount* next;
156 	dev_t			id;
157 	fs_volume*		volume;
158 	char*			device_name;
159 	recursive_lock	rlock;	// guards the vnodes list
160 		// TODO: Make this a mutex! It is never used recursively.
161 	struct vnode*	root_vnode;
162 	struct vnode*	covers_vnode;	// immutable
163 	KPartition*		partition;
164 	VnodeList		vnodes;
165 	EntryCache		entry_cache;
166 	bool			unmounting;
167 	bool			owns_file_device;
168 };
169 
170 
171 namespace {
172 
173 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
174 	list_link		link;
175 	void*			bound_to;
176 	team_id			team;
177 	pid_t			session;
178 	off_t			start;
179 	off_t			end;
180 	bool			shared;
181 };
182 
183 typedef DoublyLinkedList<advisory_lock> LockList;
184 
185 } // namespace
186 
187 
188 struct advisory_locking {
189 	sem_id			lock;
190 	sem_id			wait_sem;
191 	LockList		locks;
192 
193 	advisory_locking()
194 		:
195 		lock(-1),
196 		wait_sem(-1)
197 	{
198 	}
199 
200 	~advisory_locking()
201 	{
202 		if (lock >= 0)
203 			delete_sem(lock);
204 		if (wait_sem >= 0)
205 			delete_sem(wait_sem);
206 	}
207 };
208 
209 /*!	\brief Guards sMountsTable.
210 
211 	The holder is allowed to read/write access the sMountsTable.
212 	Manipulation of the fs_mount structures themselves
213 	(and their destruction) requires different locks though.
214 */
215 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
216 
217 /*!	\brief Guards mount/unmount operations.
218 
219 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
220 	That is locking the lock ensures that no FS is mounted/unmounted. In
221 	particular this means that
222 	- sMountsTable will not be modified,
223 	- the fields immutable after initialization of the fs_mount structures in
224 	  sMountsTable will not be modified,
225 
226 	The thread trying to lock the lock must not hold sVnodeLock or
227 	sMountMutex.
228 */
229 static recursive_lock sMountOpLock;
230 
231 /*!	\brief Guards sVnodeTable.
232 
233 	The holder is allowed read/write access to sVnodeTable and to
234 	any unbusy vnode in that table, save to the immutable fields (device, id,
235 	private_node, mount) to which only read-only access is allowed.
236 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
237 	well as the busy, removed, unused flags, and the vnode's type can also be
238 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
239 	locked. Write access to covered_by and covers requires to write lock
240 	sVnodeLock.
241 
242 	The thread trying to acquire the lock must not hold sMountMutex.
243 	You must not hold this lock when calling create_sem(), as this might call
244 	vfs_free_unused_vnodes() and thus cause a deadlock.
245 */
246 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
247 
248 /*!	\brief Guards io_context::root.
249 
250 	Must be held when setting or getting the io_context::root field.
251 	The only operation allowed while holding this lock besides getting or
252 	setting the field is inc_vnode_ref_count() on io_context::root.
253 */
254 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
255 
256 
257 namespace {
258 
259 struct vnode_hash_key {
260 	dev_t	device;
261 	ino_t	vnode;
262 };
263 
264 struct VnodeHash {
265 	typedef vnode_hash_key	KeyType;
266 	typedef	struct vnode	ValueType;
267 
268 #define VHASH(mountid, vnodeid) \
269 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
270 
271 	size_t HashKey(KeyType key) const
272 	{
273 		return VHASH(key.device, key.vnode);
274 	}
275 
276 	size_t Hash(ValueType* vnode) const
277 	{
278 		return VHASH(vnode->device, vnode->id);
279 	}
280 
281 #undef VHASH
282 
283 	bool Compare(KeyType key, ValueType* vnode) const
284 	{
285 		return vnode->device == key.device && vnode->id == key.vnode;
286 	}
287 
288 	ValueType*& GetLink(ValueType* value) const
289 	{
290 		return value->next;
291 	}
292 };
293 
294 typedef BOpenHashTable<VnodeHash> VnodeTable;
295 
296 
297 struct MountHash {
298 	typedef dev_t			KeyType;
299 	typedef	struct fs_mount	ValueType;
300 
301 	size_t HashKey(KeyType key) const
302 	{
303 		return key;
304 	}
305 
306 	size_t Hash(ValueType* mount) const
307 	{
308 		return mount->id;
309 	}
310 
311 	bool Compare(KeyType key, ValueType* mount) const
312 	{
313 		return mount->id == key;
314 	}
315 
316 	ValueType*& GetLink(ValueType* value) const
317 	{
318 		return value->next;
319 	}
320 };
321 
322 typedef BOpenHashTable<MountHash> MountTable;
323 
324 } // namespace
325 
326 
327 #define VNODE_HASH_TABLE_SIZE 1024
328 static VnodeTable* sVnodeTable;
329 static struct vnode* sRoot;
330 
331 #define MOUNTS_HASH_TABLE_SIZE 16
332 static MountTable* sMountsTable;
333 static dev_t sNextMountID = 1;
334 
335 #define MAX_TEMP_IO_VECS 8
336 
337 // How long to wait for busy vnodes (10s)
338 #define BUSY_VNODE_RETRIES 2000
339 #define BUSY_VNODE_DELAY 5000
340 
341 mode_t __gUmask = 022;
342 
343 /* function declarations */
344 
345 static void free_unused_vnodes();
346 
347 // file descriptor operation prototypes
348 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
349 	void* buffer, size_t* _bytes);
350 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
351 	const void* buffer, size_t* _bytes);
352 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
353 	int seekType);
354 static void file_free_fd(struct file_descriptor* descriptor);
355 static status_t file_close(struct file_descriptor* descriptor);
356 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
357 	struct selectsync* sync);
358 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
359 	struct selectsync* sync);
360 static status_t dir_read(struct io_context* context,
361 	struct file_descriptor* descriptor, struct dirent* buffer,
362 	size_t bufferSize, uint32* _count);
363 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
364 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
365 static status_t dir_rewind(struct file_descriptor* descriptor);
366 static void dir_free_fd(struct file_descriptor* descriptor);
367 static status_t dir_close(struct file_descriptor* descriptor);
368 static status_t attr_dir_read(struct io_context* context,
369 	struct file_descriptor* descriptor, struct dirent* buffer,
370 	size_t bufferSize, uint32* _count);
371 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
372 static void attr_dir_free_fd(struct file_descriptor* descriptor);
373 static status_t attr_dir_close(struct file_descriptor* descriptor);
374 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
375 	void* buffer, size_t* _bytes);
376 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
377 	const void* buffer, size_t* _bytes);
378 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
379 	int seekType);
380 static void attr_free_fd(struct file_descriptor* descriptor);
381 static status_t attr_close(struct file_descriptor* descriptor);
382 static status_t attr_read_stat(struct file_descriptor* descriptor,
383 	struct stat* statData);
384 static status_t attr_write_stat(struct file_descriptor* descriptor,
385 	const struct stat* stat, int statMask);
386 static status_t index_dir_read(struct io_context* context,
387 	struct file_descriptor* descriptor, struct dirent* buffer,
388 	size_t bufferSize, uint32* _count);
389 static status_t index_dir_rewind(struct file_descriptor* descriptor);
390 static void index_dir_free_fd(struct file_descriptor* descriptor);
391 static status_t index_dir_close(struct file_descriptor* descriptor);
392 static status_t query_read(struct io_context* context,
393 	struct file_descriptor* descriptor, struct dirent* buffer,
394 	size_t bufferSize, uint32* _count);
395 static status_t query_rewind(struct file_descriptor* descriptor);
396 static void query_free_fd(struct file_descriptor* descriptor);
397 static status_t query_close(struct file_descriptor* descriptor);
398 
399 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
400 	void* buffer, size_t length);
401 static status_t common_read_stat(struct file_descriptor* descriptor,
402 	struct stat* statData);
403 static status_t common_write_stat(struct file_descriptor* descriptor,
404 	const struct stat* statData, int statMask);
405 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
406 	struct stat* stat, bool kernel);
407 
408 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
409 	bool traverseLeafLink, int count, bool kernel,
410 	struct vnode** _vnode, ino_t* _parentID);
411 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
412 	size_t bufferSize, bool kernel);
413 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
414 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
415 static void inc_vnode_ref_count(struct vnode* vnode);
416 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
417 	bool reenter);
418 static inline void put_vnode(struct vnode* vnode);
419 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
420 	bool kernel);
421 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
422 
423 
424 static struct fd_ops sFileOps = {
425 	file_read,
426 	file_write,
427 	file_seek,
428 	common_ioctl,
429 	NULL,		// set_flags
430 	file_select,
431 	file_deselect,
432 	NULL,		// read_dir()
433 	NULL,		// rewind_dir()
434 	common_read_stat,
435 	common_write_stat,
436 	file_close,
437 	file_free_fd
438 };
439 
440 static struct fd_ops sDirectoryOps = {
441 	NULL,		// read()
442 	NULL,		// write()
443 	NULL,		// seek()
444 	common_ioctl,
445 	NULL,		// set_flags
446 	NULL,		// select()
447 	NULL,		// deselect()
448 	dir_read,
449 	dir_rewind,
450 	common_read_stat,
451 	common_write_stat,
452 	dir_close,
453 	dir_free_fd
454 };
455 
456 static struct fd_ops sAttributeDirectoryOps = {
457 	NULL,		// read()
458 	NULL,		// write()
459 	NULL,		// seek()
460 	common_ioctl,
461 	NULL,		// set_flags
462 	NULL,		// select()
463 	NULL,		// deselect()
464 	attr_dir_read,
465 	attr_dir_rewind,
466 	common_read_stat,
467 	common_write_stat,
468 	attr_dir_close,
469 	attr_dir_free_fd
470 };
471 
472 static struct fd_ops sAttributeOps = {
473 	attr_read,
474 	attr_write,
475 	attr_seek,
476 	common_ioctl,
477 	NULL,		// set_flags
478 	NULL,		// select()
479 	NULL,		// deselect()
480 	NULL,		// read_dir()
481 	NULL,		// rewind_dir()
482 	attr_read_stat,
483 	attr_write_stat,
484 	attr_close,
485 	attr_free_fd
486 };
487 
488 static struct fd_ops sIndexDirectoryOps = {
489 	NULL,		// read()
490 	NULL,		// write()
491 	NULL,		// seek()
492 	NULL,		// ioctl()
493 	NULL,		// set_flags
494 	NULL,		// select()
495 	NULL,		// deselect()
496 	index_dir_read,
497 	index_dir_rewind,
498 	NULL,		// read_stat()
499 	NULL,		// write_stat()
500 	index_dir_close,
501 	index_dir_free_fd
502 };
503 
504 #if 0
505 static struct fd_ops sIndexOps = {
506 	NULL,		// read()
507 	NULL,		// write()
508 	NULL,		// seek()
509 	NULL,		// ioctl()
510 	NULL,		// set_flags
511 	NULL,		// select()
512 	NULL,		// deselect()
513 	NULL,		// dir_read()
514 	NULL,		// dir_rewind()
515 	index_read_stat,	// read_stat()
516 	NULL,		// write_stat()
517 	NULL,		// dir_close()
518 	NULL		// free_fd()
519 };
520 #endif
521 
522 static struct fd_ops sQueryOps = {
523 	NULL,		// read()
524 	NULL,		// write()
525 	NULL,		// seek()
526 	NULL,		// ioctl()
527 	NULL,		// set_flags
528 	NULL,		// select()
529 	NULL,		// deselect()
530 	query_read,
531 	query_rewind,
532 	NULL,		// read_stat()
533 	NULL,		// write_stat()
534 	query_close,
535 	query_free_fd
536 };
537 
538 
539 namespace {
540 
541 class VNodePutter {
542 public:
543 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
544 
545 	~VNodePutter()
546 	{
547 		Put();
548 	}
549 
550 	void SetTo(struct vnode* vnode)
551 	{
552 		Put();
553 		fVNode = vnode;
554 	}
555 
556 	void Put()
557 	{
558 		if (fVNode) {
559 			put_vnode(fVNode);
560 			fVNode = NULL;
561 		}
562 	}
563 
564 	struct vnode* Detach()
565 	{
566 		struct vnode* vnode = fVNode;
567 		fVNode = NULL;
568 		return vnode;
569 	}
570 
571 private:
572 	struct vnode* fVNode;
573 };
574 
575 
576 class FDCloser {
577 public:
578 	FDCloser() : fFD(-1), fKernel(true) {}
579 
580 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
581 
582 	~FDCloser()
583 	{
584 		Close();
585 	}
586 
587 	void SetTo(int fd, bool kernel)
588 	{
589 		Close();
590 		fFD = fd;
591 		fKernel = kernel;
592 	}
593 
594 	void Close()
595 	{
596 		if (fFD >= 0) {
597 			if (fKernel)
598 				_kern_close(fFD);
599 			else
600 				_user_close(fFD);
601 			fFD = -1;
602 		}
603 	}
604 
605 	int Detach()
606 	{
607 		int fd = fFD;
608 		fFD = -1;
609 		return fd;
610 	}
611 
612 private:
613 	int		fFD;
614 	bool	fKernel;
615 };
616 
617 } // namespace
618 
619 
620 #if VFS_PAGES_IO_TRACING
621 
622 namespace VFSPagesIOTracing {
623 
624 class PagesIOTraceEntry : public AbstractTraceEntry {
625 protected:
626 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
627 		const generic_io_vec* vecs, uint32 count, uint32 flags,
628 		generic_size_t bytesRequested, status_t status,
629 		generic_size_t bytesTransferred)
630 		:
631 		fVnode(vnode),
632 		fMountID(vnode->mount->id),
633 		fNodeID(vnode->id),
634 		fCookie(cookie),
635 		fPos(pos),
636 		fCount(count),
637 		fFlags(flags),
638 		fBytesRequested(bytesRequested),
639 		fStatus(status),
640 		fBytesTransferred(bytesTransferred)
641 	{
642 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
643 			sizeof(generic_io_vec) * count, false);
644 	}
645 
646 	void AddDump(TraceOutput& out, const char* mode)
647 	{
648 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
649 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
650 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
651 			(uint64)fBytesRequested);
652 
653 		if (fVecs != NULL) {
654 			for (uint32 i = 0; i < fCount; i++) {
655 				if (i > 0)
656 					out.Print(", ");
657 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
658 					(uint64)fVecs[i].length);
659 			}
660 		}
661 
662 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
663 			"transferred: %" B_PRIu64, fFlags, fStatus,
664 			(uint64)fBytesTransferred);
665 	}
666 
667 protected:
668 	struct vnode*	fVnode;
669 	dev_t			fMountID;
670 	ino_t			fNodeID;
671 	void*			fCookie;
672 	off_t			fPos;
673 	generic_io_vec*	fVecs;
674 	uint32			fCount;
675 	uint32			fFlags;
676 	generic_size_t	fBytesRequested;
677 	status_t		fStatus;
678 	generic_size_t	fBytesTransferred;
679 };
680 
681 
682 class ReadPages : public PagesIOTraceEntry {
683 public:
684 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
685 		const generic_io_vec* vecs, uint32 count, uint32 flags,
686 		generic_size_t bytesRequested, status_t status,
687 		generic_size_t bytesTransferred)
688 		:
689 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
690 			bytesRequested, status, bytesTransferred)
691 	{
692 		Initialized();
693 	}
694 
695 	virtual void AddDump(TraceOutput& out)
696 	{
697 		PagesIOTraceEntry::AddDump(out, "read");
698 	}
699 };
700 
701 
702 class WritePages : public PagesIOTraceEntry {
703 public:
704 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
705 		const generic_io_vec* vecs, uint32 count, uint32 flags,
706 		generic_size_t bytesRequested, status_t status,
707 		generic_size_t bytesTransferred)
708 		:
709 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
710 			bytesRequested, status, bytesTransferred)
711 	{
712 		Initialized();
713 	}
714 
715 	virtual void AddDump(TraceOutput& out)
716 	{
717 		PagesIOTraceEntry::AddDump(out, "write");
718 	}
719 };
720 
721 }	// namespace VFSPagesIOTracing
722 
723 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
724 #else
725 #	define TPIO(x) ;
726 #endif	// VFS_PAGES_IO_TRACING
727 
728 
729 /*! Finds the mounted device (the fs_mount structure) with the given ID.
730 	Note, you must hold the gMountMutex lock when you call this function.
731 */
732 static struct fs_mount*
733 find_mount(dev_t id)
734 {
735 	ASSERT_LOCKED_MUTEX(&sMountMutex);
736 
737 	return sMountsTable->Lookup(id);
738 }
739 
740 
741 static status_t
742 get_mount(dev_t id, struct fs_mount** _mount)
743 {
744 	struct fs_mount* mount;
745 
746 	ReadLocker nodeLocker(sVnodeLock);
747 	MutexLocker mountLocker(sMountMutex);
748 
749 	mount = find_mount(id);
750 	if (mount == NULL)
751 		return B_BAD_VALUE;
752 
753 	struct vnode* rootNode = mount->root_vnode;
754 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
755 		|| rootNode->ref_count == 0) {
756 		// might have been called during a mount/unmount operation
757 		return B_BUSY;
758 	}
759 
760 	inc_vnode_ref_count(rootNode);
761 	*_mount = mount;
762 	return B_OK;
763 }
764 
765 
766 static void
767 put_mount(struct fs_mount* mount)
768 {
769 	if (mount)
770 		put_vnode(mount->root_vnode);
771 }
772 
773 
774 /*!	Tries to open the specified file system module.
775 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
776 	Returns a pointer to file system module interface, or NULL if it
777 	could not open the module.
778 */
779 static file_system_module_info*
780 get_file_system(const char* fsName)
781 {
782 	char name[B_FILE_NAME_LENGTH];
783 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
784 		// construct module name if we didn't get one
785 		// (we currently support only one API)
786 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
787 		fsName = NULL;
788 	}
789 
790 	file_system_module_info* info;
791 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
792 		return NULL;
793 
794 	return info;
795 }
796 
797 
798 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
799 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
800 	The name is allocated for you, and you have to free() it when you're
801 	done with it.
802 	Returns NULL if the required memory is not available.
803 */
804 static char*
805 get_file_system_name(const char* fsName)
806 {
807 	const size_t length = strlen("file_systems/");
808 
809 	if (strncmp(fsName, "file_systems/", length)) {
810 		// the name already seems to be the module's file name
811 		return strdup(fsName);
812 	}
813 
814 	fsName += length;
815 	const char* end = strchr(fsName, '/');
816 	if (end == NULL) {
817 		// this doesn't seem to be a valid name, but well...
818 		return strdup(fsName);
819 	}
820 
821 	// cut off the trailing /v1
822 
823 	char* name = (char*)malloc(end + 1 - fsName);
824 	if (name == NULL)
825 		return NULL;
826 
827 	strlcpy(name, fsName, end + 1 - fsName);
828 	return name;
829 }
830 
831 
832 /*!	Accepts a list of file system names separated by a colon, one for each
833 	layer and returns the file system name for the specified layer.
834 	The name is allocated for you, and you have to free() it when you're
835 	done with it.
836 	Returns NULL if the required memory is not available or if there is no
837 	name for the specified layer.
838 */
839 static char*
840 get_file_system_name_for_layer(const char* fsNames, int32 layer)
841 {
842 	while (layer >= 0) {
843 		const char* end = strchr(fsNames, ':');
844 		if (end == NULL) {
845 			if (layer == 0)
846 				return strdup(fsNames);
847 			return NULL;
848 		}
849 
850 		if (layer == 0) {
851 			size_t length = end - fsNames + 1;
852 			char* result = (char*)malloc(length);
853 			strlcpy(result, fsNames, length);
854 			return result;
855 		}
856 
857 		fsNames = end + 1;
858 		layer--;
859 	}
860 
861 	return NULL;
862 }
863 
864 
865 static void
866 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
867 {
868 	RecursiveLocker _(mount->rlock);
869 	mount->vnodes.Add(vnode);
870 }
871 
872 
873 static void
874 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
875 {
876 	RecursiveLocker _(mount->rlock);
877 	mount->vnodes.Remove(vnode);
878 }
879 
880 
881 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
882 
883 	The caller must hold the sVnodeLock (read lock at least).
884 
885 	\param mountID the mount ID.
886 	\param vnodeID the node ID.
887 
888 	\return The vnode structure, if it was found in the hash table, \c NULL
889 			otherwise.
890 */
891 static struct vnode*
892 lookup_vnode(dev_t mountID, ino_t vnodeID)
893 {
894 	struct vnode_hash_key key;
895 
896 	key.device = mountID;
897 	key.vnode = vnodeID;
898 
899 	return sVnodeTable->Lookup(key);
900 }
901 
902 
903 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
904 
905 	This will also wait for BUSY_VNODE_DELAY before returning if one should
906 	still wait for the vnode becoming unbusy.
907 
908 	\return \c true if one should retry, \c false if not.
909 */
910 static bool
911 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
912 {
913 	if (--tries < 0) {
914 		// vnode doesn't seem to become unbusy
915 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
916 			" is not becoming unbusy!\n", mountID, vnodeID);
917 		return false;
918 	}
919 	snooze(BUSY_VNODE_DELAY);
920 	return true;
921 }
922 
923 
924 /*!	Creates a new vnode with the given mount and node ID.
925 	If the node already exists, it is returned instead and no new node is
926 	created. In either case -- but not, if an error occurs -- the function write
927 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
928 	error the lock is not held on return.
929 
930 	\param mountID The mount ID.
931 	\param vnodeID The vnode ID.
932 	\param _vnode Will be set to the new vnode on success.
933 	\param _nodeCreated Will be set to \c true when the returned vnode has
934 		been newly created, \c false when it already existed. Will not be
935 		changed on error.
936 	\return \c B_OK, when the vnode was successfully created and inserted or
937 		a node with the given ID was found, \c B_NO_MEMORY or
938 		\c B_ENTRY_NOT_FOUND on error.
939 */
940 static status_t
941 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
942 	bool& _nodeCreated)
943 {
944 	FUNCTION(("create_new_vnode_and_lock()\n"));
945 
946 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
947 	if (vnode == NULL)
948 		return B_NO_MEMORY;
949 
950 	// initialize basic values
951 	memset(vnode, 0, sizeof(struct vnode));
952 	vnode->device = mountID;
953 	vnode->id = vnodeID;
954 	vnode->ref_count = 1;
955 	vnode->SetBusy(true);
956 
957 	// look up the node -- it might have been added by someone else in the
958 	// meantime
959 	rw_lock_write_lock(&sVnodeLock);
960 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
961 	if (existingVnode != NULL) {
962 		free(vnode);
963 		_vnode = existingVnode;
964 		_nodeCreated = false;
965 		return B_OK;
966 	}
967 
968 	// get the mount structure
969 	mutex_lock(&sMountMutex);
970 	vnode->mount = find_mount(mountID);
971 	if (!vnode->mount || vnode->mount->unmounting) {
972 		mutex_unlock(&sMountMutex);
973 		rw_lock_write_unlock(&sVnodeLock);
974 		free(vnode);
975 		return B_ENTRY_NOT_FOUND;
976 	}
977 
978 	// add the vnode to the mount's node list and the hash table
979 	sVnodeTable->Insert(vnode);
980 	add_vnode_to_mount_list(vnode, vnode->mount);
981 
982 	mutex_unlock(&sMountMutex);
983 
984 	_vnode = vnode;
985 	_nodeCreated = true;
986 
987 	// keep the vnode lock locked
988 	return B_OK;
989 }
990 
991 
992 /*!	Frees the vnode and all resources it has acquired, and removes
993 	it from the vnode hash as well as from its mount structure.
994 	Will also make sure that any cache modifications are written back.
995 */
996 static void
997 free_vnode(struct vnode* vnode, bool reenter)
998 {
999 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1000 		vnode);
1001 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1002 
1003 	// write back any changes in this vnode's cache -- but only
1004 	// if the vnode won't be deleted, in which case the changes
1005 	// will be discarded
1006 
1007 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1008 		FS_CALL_NO_PARAMS(vnode, fsync);
1009 
1010 	// Note: If this vnode has a cache attached, there will still be two
1011 	// references to that cache at this point. The last one belongs to the vnode
1012 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1013 	// cache. Each but the last reference to a cache also includes a reference
1014 	// to the vnode. The file cache, however, released its reference (cf.
1015 	// file_cache_create()), so that this vnode's ref count has the chance to
1016 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1017 	// cache reference to be released, which will also release a (no longer
1018 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1019 	// count, so that it will neither become negative nor 0.
1020 	vnode->ref_count = 2;
1021 
1022 	if (!vnode->IsUnpublished()) {
1023 		if (vnode->IsRemoved())
1024 			FS_CALL(vnode, remove_vnode, reenter);
1025 		else
1026 			FS_CALL(vnode, put_vnode, reenter);
1027 	}
1028 
1029 	// If the vnode has a VMCache attached, make sure that it won't try to get
1030 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1031 	// long as the vnode is busy and in the hash, that won't happen, but as
1032 	// soon as we've removed it from the hash, it could reload the vnode -- with
1033 	// a new cache attached!
1034 	if (vnode->cache != NULL)
1035 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1036 
1037 	// The file system has removed the resources of the vnode now, so we can
1038 	// make it available again (by removing the busy vnode from the hash).
1039 	rw_lock_write_lock(&sVnodeLock);
1040 	sVnodeTable->Remove(vnode);
1041 	rw_lock_write_unlock(&sVnodeLock);
1042 
1043 	// if we have a VMCache attached, remove it
1044 	if (vnode->cache)
1045 		vnode->cache->ReleaseRef();
1046 
1047 	vnode->cache = NULL;
1048 
1049 	remove_vnode_from_mount_list(vnode, vnode->mount);
1050 
1051 	free(vnode);
1052 }
1053 
1054 
1055 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1056 	if the counter dropped to 0.
1057 
1058 	The caller must, of course, own a reference to the vnode to call this
1059 	function.
1060 	The caller must not hold the sVnodeLock or the sMountMutex.
1061 
1062 	\param vnode the vnode.
1063 	\param alwaysFree don't move this vnode into the unused list, but really
1064 		   delete it if possible.
1065 	\param reenter \c true, if this function is called (indirectly) from within
1066 		   a file system. This will be passed to file system hooks only.
1067 	\return \c B_OK, if everything went fine, an error code otherwise.
1068 */
1069 static status_t
1070 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1071 {
1072 	ReadLocker locker(sVnodeLock);
1073 	AutoLocker<Vnode> nodeLocker(vnode);
1074 
1075 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1076 
1077 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1078 
1079 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1080 		vnode->ref_count));
1081 
1082 	if (oldRefCount != 1)
1083 		return B_OK;
1084 
1085 	if (vnode->IsBusy())
1086 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1087 
1088 	bool freeNode = false;
1089 	bool freeUnusedNodes = false;
1090 
1091 	// Just insert the vnode into an unused list if we don't need
1092 	// to delete it
1093 	if (vnode->IsRemoved() || alwaysFree) {
1094 		vnode_to_be_freed(vnode);
1095 		vnode->SetBusy(true);
1096 		freeNode = true;
1097 	} else
1098 		freeUnusedNodes = vnode_unused(vnode);
1099 
1100 	nodeLocker.Unlock();
1101 	locker.Unlock();
1102 
1103 	if (freeNode)
1104 		free_vnode(vnode, reenter);
1105 	else if (freeUnusedNodes)
1106 		free_unused_vnodes();
1107 
1108 	return B_OK;
1109 }
1110 
1111 
1112 /*!	\brief Increments the reference counter of the given vnode.
1113 
1114 	The caller must make sure that the node isn't deleted while this function
1115 	is called. This can be done either:
1116 	- by ensuring that a reference to the node exists and remains in existence,
1117 	  or
1118 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1119 	  or by holding sVnodeLock write locked.
1120 
1121 	In the second case the caller is responsible for dealing with the ref count
1122 	0 -> 1 transition. That is 1. this function must not be invoked when the
1123 	node is busy in the first place and 2. vnode_used() must be called for the
1124 	node.
1125 
1126 	\param vnode the vnode.
1127 */
1128 static void
1129 inc_vnode_ref_count(struct vnode* vnode)
1130 {
1131 	atomic_add(&vnode->ref_count, 1);
1132 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1133 		vnode->ref_count));
1134 }
1135 
1136 
1137 static bool
1138 is_special_node_type(int type)
1139 {
1140 	// at the moment only FIFOs are supported
1141 	return S_ISFIFO(type);
1142 }
1143 
1144 
1145 static status_t
1146 create_special_sub_node(struct vnode* vnode, uint32 flags)
1147 {
1148 	if (S_ISFIFO(vnode->Type()))
1149 		return create_fifo_vnode(vnode->mount->volume, vnode);
1150 
1151 	return B_BAD_VALUE;
1152 }
1153 
1154 
1155 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1156 
1157 	If the node is not yet in memory, it will be loaded.
1158 
1159 	The caller must not hold the sVnodeLock or the sMountMutex.
1160 
1161 	\param mountID the mount ID.
1162 	\param vnodeID the node ID.
1163 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1164 		   retrieved vnode structure shall be written.
1165 	\param reenter \c true, if this function is called (indirectly) from within
1166 		   a file system.
1167 	\return \c B_OK, if everything when fine, an error code otherwise.
1168 */
1169 static status_t
1170 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1171 	int reenter)
1172 {
1173 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1174 		mountID, vnodeID, _vnode));
1175 
1176 	rw_lock_read_lock(&sVnodeLock);
1177 
1178 	int32 tries = BUSY_VNODE_RETRIES;
1179 restart:
1180 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1181 	AutoLocker<Vnode> nodeLocker(vnode);
1182 
1183 	if (vnode && vnode->IsBusy()) {
1184 		nodeLocker.Unlock();
1185 		rw_lock_read_unlock(&sVnodeLock);
1186 		if (!canWait) {
1187 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1188 				mountID, vnodeID);
1189 			return B_BUSY;
1190 		}
1191 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1192 			return B_BUSY;
1193 
1194 		rw_lock_read_lock(&sVnodeLock);
1195 		goto restart;
1196 	}
1197 
1198 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1199 
1200 	status_t status;
1201 
1202 	if (vnode) {
1203 		if (vnode->ref_count == 0) {
1204 			// this vnode has been unused before
1205 			vnode_used(vnode);
1206 		}
1207 		inc_vnode_ref_count(vnode);
1208 
1209 		nodeLocker.Unlock();
1210 		rw_lock_read_unlock(&sVnodeLock);
1211 	} else {
1212 		// we need to create a new vnode and read it in
1213 		rw_lock_read_unlock(&sVnodeLock);
1214 			// unlock -- create_new_vnode_and_lock() write-locks on success
1215 		bool nodeCreated;
1216 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1217 			nodeCreated);
1218 		if (status != B_OK)
1219 			return status;
1220 
1221 		if (!nodeCreated) {
1222 			rw_lock_read_lock(&sVnodeLock);
1223 			rw_lock_write_unlock(&sVnodeLock);
1224 			goto restart;
1225 		}
1226 
1227 		rw_lock_write_unlock(&sVnodeLock);
1228 
1229 		int type;
1230 		uint32 flags;
1231 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1232 			&flags, reenter);
1233 		if (status == B_OK && vnode->private_node == NULL)
1234 			status = B_BAD_VALUE;
1235 
1236 		bool gotNode = status == B_OK;
1237 		bool publishSpecialSubNode = false;
1238 		if (gotNode) {
1239 			vnode->SetType(type);
1240 			publishSpecialSubNode = is_special_node_type(type)
1241 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1242 		}
1243 
1244 		if (gotNode && publishSpecialSubNode)
1245 			status = create_special_sub_node(vnode, flags);
1246 
1247 		if (status != B_OK) {
1248 			if (gotNode)
1249 				FS_CALL(vnode, put_vnode, reenter);
1250 
1251 			rw_lock_write_lock(&sVnodeLock);
1252 			sVnodeTable->Remove(vnode);
1253 			remove_vnode_from_mount_list(vnode, vnode->mount);
1254 			rw_lock_write_unlock(&sVnodeLock);
1255 
1256 			free(vnode);
1257 			return status;
1258 		}
1259 
1260 		rw_lock_read_lock(&sVnodeLock);
1261 		vnode->Lock();
1262 
1263 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1264 		vnode->SetBusy(false);
1265 
1266 		vnode->Unlock();
1267 		rw_lock_read_unlock(&sVnodeLock);
1268 	}
1269 
1270 	TRACE(("get_vnode: returning %p\n", vnode));
1271 
1272 	*_vnode = vnode;
1273 	return B_OK;
1274 }
1275 
1276 
1277 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1278 	if the counter dropped to 0.
1279 
1280 	The caller must, of course, own a reference to the vnode to call this
1281 	function.
1282 	The caller must not hold the sVnodeLock or the sMountMutex.
1283 
1284 	\param vnode the vnode.
1285 */
1286 static inline void
1287 put_vnode(struct vnode* vnode)
1288 {
1289 	dec_vnode_ref_count(vnode, false, false);
1290 }
1291 
1292 
1293 static void
1294 free_unused_vnodes(int32 level)
1295 {
1296 	unused_vnodes_check_started();
1297 
1298 	if (level == B_NO_LOW_RESOURCE) {
1299 		unused_vnodes_check_done();
1300 		return;
1301 	}
1302 
1303 	flush_hot_vnodes();
1304 
1305 	// determine how many nodes to free
1306 	uint32 count = 1;
1307 	{
1308 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1309 
1310 		switch (level) {
1311 			case B_LOW_RESOURCE_NOTE:
1312 				count = sUnusedVnodes / 100;
1313 				break;
1314 			case B_LOW_RESOURCE_WARNING:
1315 				count = sUnusedVnodes / 10;
1316 				break;
1317 			case B_LOW_RESOURCE_CRITICAL:
1318 				count = sUnusedVnodes;
1319 				break;
1320 		}
1321 
1322 		if (count > sUnusedVnodes)
1323 			count = sUnusedVnodes;
1324 	}
1325 
1326 	// Write back the modified pages of some unused vnodes and free them.
1327 
1328 	for (uint32 i = 0; i < count; i++) {
1329 		ReadLocker vnodesReadLocker(sVnodeLock);
1330 
1331 		// get the first node
1332 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1333 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1334 			&sUnusedVnodeList);
1335 		unusedVnodesLocker.Unlock();
1336 
1337 		if (vnode == NULL)
1338 			break;
1339 
1340 		// lock the node
1341 		AutoLocker<Vnode> nodeLocker(vnode);
1342 
1343 		// Check whether the node is still unused -- since we only append to the
1344 		// tail of the unused queue, the vnode should still be at its head.
1345 		// Alternatively we could check its ref count for 0 and its busy flag,
1346 		// but if the node is no longer at the head of the queue, it means it
1347 		// has been touched in the meantime, i.e. it is no longer the least
1348 		// recently used unused vnode and we rather don't free it.
1349 		unusedVnodesLocker.Lock();
1350 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1351 			continue;
1352 		unusedVnodesLocker.Unlock();
1353 
1354 		ASSERT(!vnode->IsBusy());
1355 
1356 		// grab a reference
1357 		inc_vnode_ref_count(vnode);
1358 		vnode_used(vnode);
1359 
1360 		// write back changes and free the node
1361 		nodeLocker.Unlock();
1362 		vnodesReadLocker.Unlock();
1363 
1364 		if (vnode->cache != NULL)
1365 			vnode->cache->WriteModified();
1366 
1367 		dec_vnode_ref_count(vnode, true, false);
1368 			// this should free the vnode when it's still unused
1369 	}
1370 
1371 	unused_vnodes_check_done();
1372 }
1373 
1374 
1375 /*!	Gets the vnode the given vnode is covering.
1376 
1377 	The caller must have \c sVnodeLock read-locked at least.
1378 
1379 	The function returns a reference to the retrieved vnode (if any), the caller
1380 	is responsible to free.
1381 
1382 	\param vnode The vnode whose covered node shall be returned.
1383 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1384 		vnode.
1385 */
1386 static inline Vnode*
1387 get_covered_vnode_locked(Vnode* vnode)
1388 {
1389 	if (Vnode* coveredNode = vnode->covers) {
1390 		while (coveredNode->covers != NULL)
1391 			coveredNode = coveredNode->covers;
1392 
1393 		inc_vnode_ref_count(coveredNode);
1394 		return coveredNode;
1395 	}
1396 
1397 	return NULL;
1398 }
1399 
1400 
1401 /*!	Gets the vnode the given vnode is covering.
1402 
1403 	The caller must not hold \c sVnodeLock. Note that this implies a race
1404 	condition, since the situation can change at any time.
1405 
1406 	The function returns a reference to the retrieved vnode (if any), the caller
1407 	is responsible to free.
1408 
1409 	\param vnode The vnode whose covered node shall be returned.
1410 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1411 		vnode.
1412 */
1413 static inline Vnode*
1414 get_covered_vnode(Vnode* vnode)
1415 {
1416 	if (!vnode->IsCovering())
1417 		return NULL;
1418 
1419 	ReadLocker vnodeReadLocker(sVnodeLock);
1420 	return get_covered_vnode_locked(vnode);
1421 }
1422 
1423 
1424 /*!	Gets the vnode the given vnode is covered by.
1425 
1426 	The caller must have \c sVnodeLock read-locked at least.
1427 
1428 	The function returns a reference to the retrieved vnode (if any), the caller
1429 	is responsible to free.
1430 
1431 	\param vnode The vnode whose covering node shall be returned.
1432 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1433 		any vnode.
1434 */
1435 static Vnode*
1436 get_covering_vnode_locked(Vnode* vnode)
1437 {
1438 	if (Vnode* coveringNode = vnode->covered_by) {
1439 		while (coveringNode->covered_by != NULL)
1440 			coveringNode = coveringNode->covered_by;
1441 
1442 		inc_vnode_ref_count(coveringNode);
1443 		return coveringNode;
1444 	}
1445 
1446 	return NULL;
1447 }
1448 
1449 
1450 /*!	Gets the vnode the given vnode is covered by.
1451 
1452 	The caller must not hold \c sVnodeLock. Note that this implies a race
1453 	condition, since the situation can change at any time.
1454 
1455 	The function returns a reference to the retrieved vnode (if any), the caller
1456 	is responsible to free.
1457 
1458 	\param vnode The vnode whose covering node shall be returned.
1459 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1460 		any vnode.
1461 */
1462 static inline Vnode*
1463 get_covering_vnode(Vnode* vnode)
1464 {
1465 	if (!vnode->IsCovered())
1466 		return NULL;
1467 
1468 	ReadLocker vnodeReadLocker(sVnodeLock);
1469 	return get_covering_vnode_locked(vnode);
1470 }
1471 
1472 
1473 static void
1474 free_unused_vnodes()
1475 {
1476 	free_unused_vnodes(
1477 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1478 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1479 }
1480 
1481 
1482 static void
1483 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1484 {
1485 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1486 
1487 	free_unused_vnodes(level);
1488 }
1489 
1490 
1491 static inline void
1492 put_advisory_locking(struct advisory_locking* locking)
1493 {
1494 	release_sem(locking->lock);
1495 }
1496 
1497 
1498 /*!	Returns the advisory_locking object of the \a vnode in case it
1499 	has one, and locks it.
1500 	You have to call put_advisory_locking() when you're done with
1501 	it.
1502 	Note, you must not have the vnode mutex locked when calling
1503 	this function.
1504 */
1505 static struct advisory_locking*
1506 get_advisory_locking(struct vnode* vnode)
1507 {
1508 	rw_lock_read_lock(&sVnodeLock);
1509 	vnode->Lock();
1510 
1511 	struct advisory_locking* locking = vnode->advisory_locking;
1512 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1513 
1514 	vnode->Unlock();
1515 	rw_lock_read_unlock(&sVnodeLock);
1516 
1517 	if (lock >= 0)
1518 		lock = acquire_sem(lock);
1519 	if (lock < 0) {
1520 		// This means the locking has been deleted in the mean time
1521 		// or had never existed in the first place - otherwise, we
1522 		// would get the lock at some point.
1523 		return NULL;
1524 	}
1525 
1526 	return locking;
1527 }
1528 
1529 
1530 /*!	Creates a locked advisory_locking object, and attaches it to the
1531 	given \a vnode.
1532 	Returns B_OK in case of success - also if the vnode got such an
1533 	object from someone else in the mean time, you'll still get this
1534 	one locked then.
1535 */
1536 static status_t
1537 create_advisory_locking(struct vnode* vnode)
1538 {
1539 	if (vnode == NULL)
1540 		return B_FILE_ERROR;
1541 
1542 	ObjectDeleter<advisory_locking> lockingDeleter;
1543 	struct advisory_locking* locking = NULL;
1544 
1545 	while (get_advisory_locking(vnode) == NULL) {
1546 		// no locking object set on the vnode yet, create one
1547 		if (locking == NULL) {
1548 			locking = new(std::nothrow) advisory_locking;
1549 			if (locking == NULL)
1550 				return B_NO_MEMORY;
1551 			lockingDeleter.SetTo(locking);
1552 
1553 			locking->wait_sem = create_sem(0, "advisory lock");
1554 			if (locking->wait_sem < 0)
1555 				return locking->wait_sem;
1556 
1557 			locking->lock = create_sem(0, "advisory locking");
1558 			if (locking->lock < 0)
1559 				return locking->lock;
1560 		}
1561 
1562 		// set our newly created locking object
1563 		ReadLocker _(sVnodeLock);
1564 		AutoLocker<Vnode> nodeLocker(vnode);
1565 		if (vnode->advisory_locking == NULL) {
1566 			vnode->advisory_locking = locking;
1567 			lockingDeleter.Detach();
1568 			return B_OK;
1569 		}
1570 	}
1571 
1572 	// The vnode already had a locking object. That's just as well.
1573 
1574 	return B_OK;
1575 }
1576 
1577 
1578 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1579 	with the advisory_lock \a lock.
1580 */
1581 static bool
1582 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1583 {
1584 	if (flock == NULL)
1585 		return true;
1586 
1587 	return lock->start <= flock->l_start - 1 + flock->l_len
1588 		&& lock->end >= flock->l_start;
1589 }
1590 
1591 
1592 /*!	Tests whether acquiring a lock would block.
1593 */
1594 static status_t
1595 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1596 {
1597 	flock->l_type = F_UNLCK;
1598 
1599 	struct advisory_locking* locking = get_advisory_locking(vnode);
1600 	if (locking == NULL)
1601 		return B_OK;
1602 
1603 	team_id team = team_get_current_team_id();
1604 
1605 	LockList::Iterator iterator = locking->locks.GetIterator();
1606 	while (iterator.HasNext()) {
1607 		struct advisory_lock* lock = iterator.Next();
1608 
1609 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1610 			// locks do overlap
1611 			if (flock->l_type != F_RDLCK || !lock->shared) {
1612 				// collision
1613 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1614 				flock->l_whence = SEEK_SET;
1615 				flock->l_start = lock->start;
1616 				flock->l_len = lock->end - lock->start + 1;
1617 				flock->l_pid = lock->team;
1618 				break;
1619 			}
1620 		}
1621 	}
1622 
1623 	put_advisory_locking(locking);
1624 	return B_OK;
1625 }
1626 
1627 
1628 /*!	Removes the specified lock, or all locks of the calling team
1629 	if \a flock is NULL.
1630 */
1631 static status_t
1632 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1633 	struct file_descriptor* descriptor, struct flock* flock)
1634 {
1635 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1636 
1637 	struct advisory_locking* locking = get_advisory_locking(vnode);
1638 	if (locking == NULL)
1639 		return B_OK;
1640 
1641 	// find matching lock entries
1642 
1643 	LockList::Iterator iterator = locking->locks.GetIterator();
1644 	while (iterator.HasNext()) {
1645 		struct advisory_lock* lock = iterator.Next();
1646 		bool removeLock = false;
1647 
1648 		if (descriptor != NULL && lock->bound_to == descriptor) {
1649 			// Remove flock() locks
1650 			removeLock = true;
1651 		} else if (lock->bound_to == context
1652 				&& advisory_lock_intersects(lock, flock)) {
1653 			// Remove POSIX locks
1654 			bool endsBeyond = false;
1655 			bool startsBefore = false;
1656 			if (flock != NULL) {
1657 				startsBefore = lock->start < flock->l_start;
1658 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1659 			}
1660 
1661 			if (!startsBefore && !endsBeyond) {
1662 				// lock is completely contained in flock
1663 				removeLock = true;
1664 			} else if (startsBefore && !endsBeyond) {
1665 				// cut the end of the lock
1666 				lock->end = flock->l_start - 1;
1667 			} else if (!startsBefore && endsBeyond) {
1668 				// cut the start of the lock
1669 				lock->start = flock->l_start + flock->l_len;
1670 			} else {
1671 				// divide the lock into two locks
1672 				struct advisory_lock* secondLock = new advisory_lock;
1673 				if (secondLock == NULL) {
1674 					// TODO: we should probably revert the locks we already
1675 					// changed... (ie. allocate upfront)
1676 					put_advisory_locking(locking);
1677 					return B_NO_MEMORY;
1678 				}
1679 
1680 				lock->end = flock->l_start - 1;
1681 
1682 				secondLock->bound_to = context;
1683 				secondLock->team = lock->team;
1684 				secondLock->session = lock->session;
1685 				// values must already be normalized when getting here
1686 				secondLock->start = flock->l_start + flock->l_len;
1687 				secondLock->end = lock->end;
1688 				secondLock->shared = lock->shared;
1689 
1690 				locking->locks.Add(secondLock);
1691 			}
1692 		}
1693 
1694 		if (removeLock) {
1695 			// this lock is no longer used
1696 			iterator.Remove();
1697 			free(lock);
1698 		}
1699 	}
1700 
1701 	bool removeLocking = locking->locks.IsEmpty();
1702 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1703 
1704 	put_advisory_locking(locking);
1705 
1706 	if (removeLocking) {
1707 		// We can remove the whole advisory locking structure; it's no
1708 		// longer used
1709 		locking = get_advisory_locking(vnode);
1710 		if (locking != NULL) {
1711 			ReadLocker locker(sVnodeLock);
1712 			AutoLocker<Vnode> nodeLocker(vnode);
1713 
1714 			// the locking could have been changed in the mean time
1715 			if (locking->locks.IsEmpty()) {
1716 				vnode->advisory_locking = NULL;
1717 				nodeLocker.Unlock();
1718 				locker.Unlock();
1719 
1720 				// we've detached the locking from the vnode, so we can
1721 				// safely delete it
1722 				delete locking;
1723 			} else {
1724 				// the locking is in use again
1725 				nodeLocker.Unlock();
1726 				locker.Unlock();
1727 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1728 			}
1729 		}
1730 	}
1731 
1732 	return B_OK;
1733 }
1734 
1735 
1736 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1737 	will wait for the lock to become available, if there are any collisions
1738 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1739 
1740 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1741 	BSD flock() semantics are used, that is, all children can unlock the file
1742 	in question (we even allow parents to remove the lock, though, but that
1743 	seems to be in line to what the BSD's are doing).
1744 */
1745 static status_t
1746 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1747 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1748 {
1749 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1750 		vnode, flock, wait ? "yes" : "no"));
1751 	dprintf("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1752 		vnode, flock, wait ? "yes" : "no");
1753 
1754 	bool shared = flock->l_type == F_RDLCK;
1755 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1756 	status_t status = B_OK;
1757 
1758 	// TODO: do deadlock detection!
1759 
1760 	struct advisory_locking* locking;
1761 
1762 	while (true) {
1763 		// if this vnode has an advisory_locking structure attached,
1764 		// lock that one and search for any colliding file lock
1765 		status = create_advisory_locking(vnode);
1766 		if (status != B_OK)
1767 			return status;
1768 
1769 		locking = vnode->advisory_locking;
1770 		team_id team = team_get_current_team_id();
1771 		sem_id waitForLock = -1;
1772 
1773 		// test for collisions
1774 		LockList::Iterator iterator = locking->locks.GetIterator();
1775 		while (iterator.HasNext()) {
1776 			struct advisory_lock* lock = iterator.Next();
1777 
1778 			// TODO: locks from the same team might be joinable!
1779 			if ((lock->team != team || lock->bound_to != boundTo)
1780 					&& advisory_lock_intersects(lock, flock)) {
1781 				// locks do overlap
1782 				if (!shared || !lock->shared) {
1783 					// we need to wait
1784 					waitForLock = locking->wait_sem;
1785 					break;
1786 				}
1787 			}
1788 		}
1789 
1790 		if (waitForLock < 0)
1791 			break;
1792 
1793 		// We need to wait. Do that or fail now, if we've been asked not to.
1794 
1795 		if (!wait) {
1796 			put_advisory_locking(locking);
1797 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1798 		}
1799 
1800 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1801 			B_CAN_INTERRUPT, 0);
1802 		if (status != B_OK && status != B_BAD_SEM_ID)
1803 			return status;
1804 
1805 		// We have been notified, but we need to re-lock the locking object. So
1806 		// go another round...
1807 	}
1808 
1809 	// install new lock
1810 
1811 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1812 		sizeof(struct advisory_lock));
1813 	if (lock == NULL) {
1814 		put_advisory_locking(locking);
1815 		return B_NO_MEMORY;
1816 	}
1817 
1818 	lock->bound_to = boundTo;
1819 	lock->team = team_get_current_team_id();
1820 	lock->session = thread_get_current_thread()->team->session_id;
1821 	// values must already be normalized when getting here
1822 	lock->start = flock->l_start;
1823 	lock->end = flock->l_start - 1 + flock->l_len;
1824 	lock->shared = shared;
1825 
1826 	locking->locks.Add(lock);
1827 	put_advisory_locking(locking);
1828 
1829 	return status;
1830 }
1831 
1832 
1833 /*!	Normalizes the \a flock structure to make it easier to compare the
1834 	structure with others. The l_start and l_len fields are set to absolute
1835 	values according to the l_whence field.
1836 */
1837 static status_t
1838 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1839 {
1840 	switch (flock->l_whence) {
1841 		case SEEK_SET:
1842 			break;
1843 		case SEEK_CUR:
1844 			flock->l_start += descriptor->pos;
1845 			break;
1846 		case SEEK_END:
1847 		{
1848 			struct vnode* vnode = descriptor->u.vnode;
1849 			struct stat stat;
1850 			status_t status;
1851 
1852 			if (!HAS_FS_CALL(vnode, read_stat))
1853 				return B_UNSUPPORTED;
1854 
1855 			status = FS_CALL(vnode, read_stat, &stat);
1856 			if (status != B_OK)
1857 				return status;
1858 
1859 			flock->l_start += stat.st_size;
1860 			break;
1861 		}
1862 		default:
1863 			return B_BAD_VALUE;
1864 	}
1865 
1866 	if (flock->l_start < 0)
1867 		flock->l_start = 0;
1868 	if (flock->l_len == 0)
1869 		flock->l_len = OFF_MAX;
1870 
1871 	// don't let the offset and length overflow
1872 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1873 		flock->l_len = OFF_MAX - flock->l_start;
1874 
1875 	if (flock->l_len < 0) {
1876 		// a negative length reverses the region
1877 		flock->l_start += flock->l_len;
1878 		flock->l_len = -flock->l_len;
1879 	}
1880 
1881 	return B_OK;
1882 }
1883 
1884 
1885 static void
1886 replace_vnode_if_disconnected(struct fs_mount* mount,
1887 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1888 	struct vnode* fallBack, bool lockRootLock)
1889 {
1890 	struct vnode* givenVnode = vnode;
1891 	bool vnodeReplaced = false;
1892 
1893 	ReadLocker vnodeReadLocker(sVnodeLock);
1894 
1895 	if (lockRootLock)
1896 		mutex_lock(&sIOContextRootLock);
1897 
1898 	while (vnode != NULL && vnode->mount == mount
1899 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1900 		if (vnode->covers != NULL) {
1901 			// redirect the vnode to the covered vnode
1902 			vnode = vnode->covers;
1903 		} else
1904 			vnode = fallBack;
1905 
1906 		vnodeReplaced = true;
1907 	}
1908 
1909 	// If we've replaced the node, grab a reference for the new one.
1910 	if (vnodeReplaced && vnode != NULL)
1911 		inc_vnode_ref_count(vnode);
1912 
1913 	if (lockRootLock)
1914 		mutex_unlock(&sIOContextRootLock);
1915 
1916 	vnodeReadLocker.Unlock();
1917 
1918 	if (vnodeReplaced)
1919 		put_vnode(givenVnode);
1920 }
1921 
1922 
1923 /*!	Disconnects all file descriptors that are associated with the
1924 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1925 	\a mount object.
1926 
1927 	Note, after you've called this function, there might still be ongoing
1928 	accesses - they won't be interrupted if they already happened before.
1929 	However, any subsequent access will fail.
1930 
1931 	This is not a cheap function and should be used with care and rarely.
1932 	TODO: there is currently no means to stop a blocking read/write!
1933 */
1934 static void
1935 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1936 	struct vnode* vnodeToDisconnect)
1937 {
1938 	// iterate over all teams and peek into their file descriptors
1939 	TeamListIterator teamIterator;
1940 	while (Team* team = teamIterator.Next()) {
1941 		BReference<Team> teamReference(team, true);
1942 		TeamLocker teamLocker(team);
1943 
1944 		// lock the I/O context
1945 		io_context* context = team->io_context;
1946 		if (context == NULL)
1947 			continue;
1948 		MutexLocker contextLocker(context->io_mutex);
1949 
1950 		teamLocker.Unlock();
1951 
1952 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1953 			sRoot, true);
1954 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1955 			sRoot, false);
1956 
1957 		for (uint32 i = 0; i < context->table_size; i++) {
1958 			struct file_descriptor* descriptor = context->fds[i];
1959 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1960 				continue;
1961 
1962 			inc_fd_ref_count(descriptor);
1963 
1964 			// if this descriptor points at this mount, we
1965 			// need to disconnect it to be able to unmount
1966 			struct vnode* vnode = fd_vnode(descriptor);
1967 			if (vnodeToDisconnect != NULL) {
1968 				if (vnode == vnodeToDisconnect)
1969 					disconnect_fd(descriptor);
1970 			} else if ((vnode != NULL && vnode->mount == mount)
1971 				|| (vnode == NULL && descriptor->u.mount == mount))
1972 				disconnect_fd(descriptor);
1973 
1974 			put_fd(descriptor);
1975 		}
1976 	}
1977 }
1978 
1979 
1980 /*!	\brief Gets the root node of the current IO context.
1981 	If \a kernel is \c true, the kernel IO context will be used.
1982 	The caller obtains a reference to the returned node.
1983 */
1984 struct vnode*
1985 get_root_vnode(bool kernel)
1986 {
1987 	if (!kernel) {
1988 		// Get current working directory from io context
1989 		struct io_context* context = get_current_io_context(kernel);
1990 
1991 		mutex_lock(&sIOContextRootLock);
1992 
1993 		struct vnode* root = context->root;
1994 		if (root != NULL)
1995 			inc_vnode_ref_count(root);
1996 
1997 		mutex_unlock(&sIOContextRootLock);
1998 
1999 		if (root != NULL)
2000 			return root;
2001 
2002 		// That should never happen.
2003 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2004 			"have a root\n", team_get_current_team_id());
2005 	}
2006 
2007 	inc_vnode_ref_count(sRoot);
2008 	return sRoot;
2009 }
2010 
2011 
2012 /*!	\brief Gets the directory path and leaf name for a given path.
2013 
2014 	The supplied \a path is transformed to refer to the directory part of
2015 	the entry identified by the original path, and into the buffer \a filename
2016 	the leaf name of the original entry is written.
2017 	Neither the returned path nor the leaf name can be expected to be
2018 	canonical.
2019 
2020 	\param path The path to be analyzed. Must be able to store at least one
2021 		   additional character.
2022 	\param filename The buffer into which the leaf name will be written.
2023 		   Must be of size B_FILE_NAME_LENGTH at least.
2024 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2025 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2026 		   if the given path name is empty.
2027 */
2028 static status_t
2029 get_dir_path_and_leaf(char* path, char* filename)
2030 {
2031 	if (*path == '\0')
2032 		return B_ENTRY_NOT_FOUND;
2033 
2034 	char* last = strrchr(path, '/');
2035 		// '/' are not allowed in file names!
2036 
2037 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2038 
2039 	if (last == NULL) {
2040 		// this path is single segment with no '/' in it
2041 		// ex. "foo"
2042 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2043 			return B_NAME_TOO_LONG;
2044 
2045 		strcpy(path, ".");
2046 	} else {
2047 		last++;
2048 		if (last[0] == '\0') {
2049 			// special case: the path ends in one or more '/' - remove them
2050 			while (*--last == '/' && last != path);
2051 			last[1] = '\0';
2052 
2053 			if (last == path && last[0] == '/') {
2054 				// This path points to the root of the file system
2055 				strcpy(filename, ".");
2056 				return B_OK;
2057 			}
2058 			for (; last != path && *(last - 1) != '/'; last--);
2059 				// rewind to the start of the leaf before the '/'
2060 		}
2061 
2062 		// normal leaf: replace the leaf portion of the path with a '.'
2063 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2064 			return B_NAME_TOO_LONG;
2065 
2066 		last[0] = '.';
2067 		last[1] = '\0';
2068 	}
2069 	return B_OK;
2070 }
2071 
2072 
2073 static status_t
2074 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2075 	bool traverse, bool kernel, struct vnode** _vnode)
2076 {
2077 	char clonedName[B_FILE_NAME_LENGTH + 1];
2078 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2079 		return B_NAME_TOO_LONG;
2080 
2081 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2082 	struct vnode* directory;
2083 
2084 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2085 	if (status < 0)
2086 		return status;
2087 
2088 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2089 		_vnode, NULL);
2090 }
2091 
2092 
2093 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2094 	and returns the respective vnode.
2095 	On success a reference to the vnode is acquired for the caller.
2096 */
2097 static status_t
2098 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2099 {
2100 	ino_t id;
2101 	bool missing;
2102 
2103 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2104 		return missing ? B_ENTRY_NOT_FOUND
2105 			: get_vnode(dir->device, id, _vnode, true, false);
2106 	}
2107 
2108 	status_t status = FS_CALL(dir, lookup, name, &id);
2109 	if (status != B_OK)
2110 		return status;
2111 
2112 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2113 	// have a reference and just need to look the node up.
2114 	rw_lock_read_lock(&sVnodeLock);
2115 	*_vnode = lookup_vnode(dir->device, id);
2116 	rw_lock_read_unlock(&sVnodeLock);
2117 
2118 	if (*_vnode == NULL) {
2119 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2120 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2121 		return B_ENTRY_NOT_FOUND;
2122 	}
2123 
2124 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2125 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2126 //		(*_vnode)->mount->id, (*_vnode)->id);
2127 
2128 	return B_OK;
2129 }
2130 
2131 
2132 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2133 	\a path must not be NULL.
2134 	If it returns successfully, \a path contains the name of the last path
2135 	component. This function clobbers the buffer pointed to by \a path only
2136 	if it does contain more than one component.
2137 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2138 	it is successful or not!
2139 */
2140 static status_t
2141 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2142 	int count, struct io_context* ioContext, struct vnode** _vnode,
2143 	ino_t* _parentID)
2144 {
2145 	status_t status = B_OK;
2146 	ino_t lastParentID = vnode->id;
2147 
2148 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2149 
2150 	if (path == NULL) {
2151 		put_vnode(vnode);
2152 		return B_BAD_VALUE;
2153 	}
2154 
2155 	if (*path == '\0') {
2156 		put_vnode(vnode);
2157 		return B_ENTRY_NOT_FOUND;
2158 	}
2159 
2160 	while (true) {
2161 		struct vnode* nextVnode;
2162 		char* nextPath;
2163 
2164 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2165 			path));
2166 
2167 		// done?
2168 		if (path[0] == '\0')
2169 			break;
2170 
2171 		// walk to find the next path component ("path" will point to a single
2172 		// path component), and filter out multiple slashes
2173 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2174 				nextPath++);
2175 
2176 		if (*nextPath == '/') {
2177 			*nextPath = '\0';
2178 			do
2179 				nextPath++;
2180 			while (*nextPath == '/');
2181 		}
2182 
2183 		// See if the '..' is at a covering vnode move to the covered
2184 		// vnode so we pass the '..' path to the underlying filesystem.
2185 		// Also prevent breaking the root of the IO context.
2186 		if (strcmp("..", path) == 0) {
2187 			if (vnode == ioContext->root) {
2188 				// Attempted prison break! Keep it contained.
2189 				path = nextPath;
2190 				continue;
2191 			}
2192 
2193 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2194 				nextVnode = coveredVnode;
2195 				put_vnode(vnode);
2196 				vnode = nextVnode;
2197 			}
2198 		}
2199 
2200 		// check if vnode is really a directory
2201 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2202 			status = B_NOT_A_DIRECTORY;
2203 
2204 		// Check if we have the right to search the current directory vnode.
2205 		// If a file system doesn't have the access() function, we assume that
2206 		// searching a directory is always allowed
2207 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2208 			status = FS_CALL(vnode, access, X_OK);
2209 
2210 		// Tell the filesystem to get the vnode of this path component (if we
2211 		// got the permission from the call above)
2212 		if (status == B_OK)
2213 			status = lookup_dir_entry(vnode, path, &nextVnode);
2214 
2215 		if (status != B_OK) {
2216 			put_vnode(vnode);
2217 			return status;
2218 		}
2219 
2220 		// If the new node is a symbolic link, resolve it (if we've been told
2221 		// to do it)
2222 		if (S_ISLNK(nextVnode->Type())
2223 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2224 			size_t bufferSize;
2225 			char* buffer;
2226 
2227 			TRACE(("traverse link\n"));
2228 
2229 			// it's not exactly nice style using goto in this way, but hey,
2230 			// it works :-/
2231 			if (count + 1 > B_MAX_SYMLINKS) {
2232 				status = B_LINK_LIMIT;
2233 				goto resolve_link_error;
2234 			}
2235 
2236 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2237 			if (buffer == NULL) {
2238 				status = B_NO_MEMORY;
2239 				goto resolve_link_error;
2240 			}
2241 
2242 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2243 				bufferSize--;
2244 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2245 				// null-terminate
2246 				if (status >= 0)
2247 					buffer[bufferSize] = '\0';
2248 			} else
2249 				status = B_BAD_VALUE;
2250 
2251 			if (status != B_OK) {
2252 				free(buffer);
2253 
2254 		resolve_link_error:
2255 				put_vnode(vnode);
2256 				put_vnode(nextVnode);
2257 
2258 				return status;
2259 			}
2260 			put_vnode(nextVnode);
2261 
2262 			// Check if we start from the root directory or the current
2263 			// directory ("vnode" still points to that one).
2264 			// Cut off all leading slashes if it's the root directory
2265 			path = buffer;
2266 			bool absoluteSymlink = false;
2267 			if (path[0] == '/') {
2268 				// we don't need the old directory anymore
2269 				put_vnode(vnode);
2270 
2271 				while (*++path == '/')
2272 					;
2273 
2274 				mutex_lock(&sIOContextRootLock);
2275 				vnode = ioContext->root;
2276 				inc_vnode_ref_count(vnode);
2277 				mutex_unlock(&sIOContextRootLock);
2278 
2279 				absoluteSymlink = true;
2280 			}
2281 
2282 			inc_vnode_ref_count(vnode);
2283 				// balance the next recursion - we will decrement the
2284 				// ref_count of the vnode, no matter if we succeeded or not
2285 
2286 			if (absoluteSymlink && *path == '\0') {
2287 				// symlink was just "/"
2288 				nextVnode = vnode;
2289 			} else {
2290 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2291 					ioContext, &nextVnode, &lastParentID);
2292 			}
2293 
2294 			free(buffer);
2295 
2296 			if (status != B_OK) {
2297 				put_vnode(vnode);
2298 				return status;
2299 			}
2300 		} else
2301 			lastParentID = vnode->id;
2302 
2303 		// decrease the ref count on the old dir we just looked up into
2304 		put_vnode(vnode);
2305 
2306 		path = nextPath;
2307 		vnode = nextVnode;
2308 
2309 		// see if we hit a covered node
2310 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2311 			put_vnode(vnode);
2312 			vnode = coveringNode;
2313 		}
2314 	}
2315 
2316 	*_vnode = vnode;
2317 	if (_parentID)
2318 		*_parentID = lastParentID;
2319 
2320 	return B_OK;
2321 }
2322 
2323 
2324 static status_t
2325 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2326 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2327 {
2328 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2329 		get_current_io_context(kernel), _vnode, _parentID);
2330 }
2331 
2332 
2333 static status_t
2334 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2335 	ino_t* _parentID, bool kernel)
2336 {
2337 	struct vnode* start = NULL;
2338 
2339 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2340 
2341 	if (!path)
2342 		return B_BAD_VALUE;
2343 
2344 	if (*path == '\0')
2345 		return B_ENTRY_NOT_FOUND;
2346 
2347 	// figure out if we need to start at root or at cwd
2348 	if (*path == '/') {
2349 		if (sRoot == NULL) {
2350 			// we're a bit early, aren't we?
2351 			return B_ERROR;
2352 		}
2353 
2354 		while (*++path == '/')
2355 			;
2356 		start = get_root_vnode(kernel);
2357 
2358 		if (*path == '\0') {
2359 			*_vnode = start;
2360 			return B_OK;
2361 		}
2362 
2363 	} else {
2364 		struct io_context* context = get_current_io_context(kernel);
2365 
2366 		mutex_lock(&context->io_mutex);
2367 		start = context->cwd;
2368 		if (start != NULL)
2369 			inc_vnode_ref_count(start);
2370 		mutex_unlock(&context->io_mutex);
2371 
2372 		if (start == NULL)
2373 			return B_ERROR;
2374 	}
2375 
2376 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2377 		_parentID);
2378 }
2379 
2380 
2381 /*! Returns the vnode in the next to last segment of the path, and returns
2382 	the last portion in filename.
2383 	The path buffer must be able to store at least one additional character.
2384 */
2385 static status_t
2386 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2387 	bool kernel)
2388 {
2389 	status_t status = get_dir_path_and_leaf(path, filename);
2390 	if (status != B_OK)
2391 		return status;
2392 
2393 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2394 }
2395 
2396 
2397 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2398 		   to by a FD + path pair.
2399 
2400 	\a path must be given in either case. \a fd might be omitted, in which
2401 	case \a path is either an absolute path or one relative to the current
2402 	directory. If both a supplied and \a path is relative it is reckoned off
2403 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2404 	ignored.
2405 
2406 	The caller has the responsibility to call put_vnode() on the returned
2407 	directory vnode.
2408 
2409 	\param fd The FD. May be < 0.
2410 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2411 	       is modified by this function. It must have at least room for a
2412 	       string one character longer than the path it contains.
2413 	\param _vnode A pointer to a variable the directory vnode shall be written
2414 		   into.
2415 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2416 		   the leaf name of the specified entry will be written.
2417 	\param kernel \c true, if invoked from inside the kernel, \c false if
2418 		   invoked from userland.
2419 	\return \c B_OK, if everything went fine, another error code otherwise.
2420 */
2421 static status_t
2422 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2423 	char* filename, bool kernel)
2424 {
2425 	if (!path)
2426 		return B_BAD_VALUE;
2427 	if (*path == '\0')
2428 		return B_ENTRY_NOT_FOUND;
2429 	if (fd < 0)
2430 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2431 
2432 	status_t status = get_dir_path_and_leaf(path, filename);
2433 	if (status != B_OK)
2434 		return status;
2435 
2436 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2437 }
2438 
2439 
2440 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2441 		   to by a vnode + path pair.
2442 
2443 	\a path must be given in either case. \a vnode might be omitted, in which
2444 	case \a path is either an absolute path or one relative to the current
2445 	directory. If both a supplied and \a path is relative it is reckoned off
2446 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2447 	ignored.
2448 
2449 	The caller has the responsibility to call put_vnode() on the returned
2450 	directory vnode.
2451 
2452 	\param vnode The vnode. May be \c NULL.
2453 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2454 	       is modified by this function. It must have at least room for a
2455 	       string one character longer than the path it contains.
2456 	\param _vnode A pointer to a variable the directory vnode shall be written
2457 		   into.
2458 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2459 		   the leaf name of the specified entry will be written.
2460 	\param kernel \c true, if invoked from inside the kernel, \c false if
2461 		   invoked from userland.
2462 	\return \c B_OK, if everything went fine, another error code otherwise.
2463 */
2464 static status_t
2465 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2466 	struct vnode** _vnode, char* filename, bool kernel)
2467 {
2468 	if (!path)
2469 		return B_BAD_VALUE;
2470 	if (*path == '\0')
2471 		return B_ENTRY_NOT_FOUND;
2472 	if (vnode == NULL || path[0] == '/')
2473 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2474 
2475 	status_t status = get_dir_path_and_leaf(path, filename);
2476 	if (status != B_OK)
2477 		return status;
2478 
2479 	inc_vnode_ref_count(vnode);
2480 		// vnode_path_to_vnode() always decrements the ref count
2481 
2482 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2483 }
2484 
2485 
2486 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2487 */
2488 static status_t
2489 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2490 	size_t bufferSize, struct io_context* ioContext)
2491 {
2492 	if (bufferSize < sizeof(struct dirent))
2493 		return B_BAD_VALUE;
2494 
2495 	// See if the vnode is covering another vnode and move to the covered
2496 	// vnode so we get the underlying file system
2497 	VNodePutter vnodePutter;
2498 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2499 		vnode = coveredVnode;
2500 		vnodePutter.SetTo(vnode);
2501 	}
2502 
2503 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2504 		// The FS supports getting the name of a vnode.
2505 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2506 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2507 			return B_OK;
2508 	}
2509 
2510 	// The FS doesn't support getting the name of a vnode. So we search the
2511 	// parent directory for the vnode, if the caller let us.
2512 
2513 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2514 		return B_UNSUPPORTED;
2515 
2516 	void* cookie;
2517 
2518 	status_t status = FS_CALL(parent, open_dir, &cookie);
2519 	if (status >= B_OK) {
2520 		while (true) {
2521 			uint32 num = 1;
2522 			// We use the FS hook directly instead of dir_read(), since we don't
2523 			// want the entries to be fixed. We have already resolved vnode to
2524 			// the covered node.
2525 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2526 				&num);
2527 			if (status != B_OK)
2528 				break;
2529 			if (num == 0) {
2530 				status = B_ENTRY_NOT_FOUND;
2531 				break;
2532 			}
2533 
2534 			if (vnode->id == buffer->d_ino) {
2535 				// found correct entry!
2536 				break;
2537 			}
2538 		}
2539 
2540 		FS_CALL(parent, close_dir, cookie);
2541 		FS_CALL(parent, free_dir_cookie, cookie);
2542 	}
2543 	return status;
2544 }
2545 
2546 
2547 static status_t
2548 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2549 	size_t nameSize, bool kernel)
2550 {
2551 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2552 	struct dirent* dirent = (struct dirent*)buffer;
2553 
2554 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2555 		get_current_io_context(kernel));
2556 	if (status != B_OK)
2557 		return status;
2558 
2559 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2560 		return B_BUFFER_OVERFLOW;
2561 
2562 	return B_OK;
2563 }
2564 
2565 
2566 /*!	Gets the full path to a given directory vnode.
2567 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2568 	file system doesn't support this call, it will fall back to iterating
2569 	through the parent directory to get the name of the child.
2570 
2571 	To protect against circular loops, it supports a maximum tree depth
2572 	of 256 levels.
2573 
2574 	Note that the path may not be correct the time this function returns!
2575 	It doesn't use any locking to prevent returning the correct path, as
2576 	paths aren't safe anyway: the path to a file can change at any time.
2577 
2578 	It might be a good idea, though, to check if the returned path exists
2579 	in the calling function (it's not done here because of efficiency)
2580 */
2581 static status_t
2582 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2583 	bool kernel)
2584 {
2585 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2586 
2587 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2588 		return B_BAD_VALUE;
2589 
2590 	if (!S_ISDIR(vnode->Type()))
2591 		return B_NOT_A_DIRECTORY;
2592 
2593 	char* path = buffer;
2594 	int32 insert = bufferSize;
2595 	int32 maxLevel = 256;
2596 	int32 length;
2597 	status_t status = B_OK;
2598 	struct io_context* ioContext = get_current_io_context(kernel);
2599 
2600 	// we don't use get_vnode() here because this call is more
2601 	// efficient and does all we need from get_vnode()
2602 	inc_vnode_ref_count(vnode);
2603 
2604 	path[--insert] = '\0';
2605 		// the path is filled right to left
2606 
2607 	while (true) {
2608 		// If the node is the context's root, bail out. Otherwise resolve mount
2609 		// points.
2610 		if (vnode == ioContext->root)
2611 			break;
2612 
2613 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2614 			put_vnode(vnode);
2615 			vnode = coveredVnode;
2616 		}
2617 
2618 		// lookup the parent vnode
2619 		struct vnode* parentVnode;
2620 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2621 		if (status != B_OK)
2622 			goto out;
2623 
2624 		if (parentVnode == vnode) {
2625 			// The caller apparently got their hands on a node outside of their
2626 			// context's root. Now we've hit the global root.
2627 			put_vnode(parentVnode);
2628 			break;
2629 		}
2630 
2631 		// get the node's name
2632 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2633 			// also used for fs_read_dir()
2634 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2635 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2636 			sizeof(nameBuffer), ioContext);
2637 
2638 		// release the current vnode, we only need its parent from now on
2639 		put_vnode(vnode);
2640 		vnode = parentVnode;
2641 
2642 		if (status != B_OK)
2643 			goto out;
2644 
2645 		// TODO: add an explicit check for loops in about 10 levels to do
2646 		// real loop detection
2647 
2648 		// don't go deeper as 'maxLevel' to prevent circular loops
2649 		if (maxLevel-- < 0) {
2650 			status = B_LINK_LIMIT;
2651 			goto out;
2652 		}
2653 
2654 		// add the name in front of the current path
2655 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2656 		length = strlen(name);
2657 		insert -= length;
2658 		if (insert <= 0) {
2659 			status = B_RESULT_NOT_REPRESENTABLE;
2660 			goto out;
2661 		}
2662 		memcpy(path + insert, name, length);
2663 		path[--insert] = '/';
2664 	}
2665 
2666 	// the root dir will result in an empty path: fix it
2667 	if (path[insert] == '\0')
2668 		path[--insert] = '/';
2669 
2670 	TRACE(("  path is: %s\n", path + insert));
2671 
2672 	// move the path to the start of the buffer
2673 	length = bufferSize - insert;
2674 	memmove(buffer, path + insert, length);
2675 
2676 out:
2677 	put_vnode(vnode);
2678 	return status;
2679 }
2680 
2681 
2682 /*!	Checks the length of every path component, and adds a '.'
2683 	if the path ends in a slash.
2684 	The given path buffer must be able to store at least one
2685 	additional character.
2686 */
2687 static status_t
2688 check_path(char* to)
2689 {
2690 	int32 length = 0;
2691 
2692 	// check length of every path component
2693 
2694 	while (*to) {
2695 		char* begin;
2696 		if (*to == '/')
2697 			to++, length++;
2698 
2699 		begin = to;
2700 		while (*to != '/' && *to)
2701 			to++, length++;
2702 
2703 		if (to - begin > B_FILE_NAME_LENGTH)
2704 			return B_NAME_TOO_LONG;
2705 	}
2706 
2707 	if (length == 0)
2708 		return B_ENTRY_NOT_FOUND;
2709 
2710 	// complete path if there is a slash at the end
2711 
2712 	if (*(to - 1) == '/') {
2713 		if (length > B_PATH_NAME_LENGTH - 2)
2714 			return B_NAME_TOO_LONG;
2715 
2716 		to[0] = '.';
2717 		to[1] = '\0';
2718 	}
2719 
2720 	return B_OK;
2721 }
2722 
2723 
2724 static struct file_descriptor*
2725 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2726 {
2727 	struct file_descriptor* descriptor
2728 		= get_fd(get_current_io_context(kernel), fd);
2729 	if (descriptor == NULL)
2730 		return NULL;
2731 
2732 	struct vnode* vnode = fd_vnode(descriptor);
2733 	if (vnode == NULL) {
2734 		put_fd(descriptor);
2735 		return NULL;
2736 	}
2737 
2738 	// ToDo: when we can close a file descriptor at any point, investigate
2739 	//	if this is still valid to do (accessing the vnode without ref_count
2740 	//	or locking)
2741 	*_vnode = vnode;
2742 	return descriptor;
2743 }
2744 
2745 
2746 static struct vnode*
2747 get_vnode_from_fd(int fd, bool kernel)
2748 {
2749 	struct file_descriptor* descriptor;
2750 	struct vnode* vnode;
2751 
2752 	descriptor = get_fd(get_current_io_context(kernel), fd);
2753 	if (descriptor == NULL)
2754 		return NULL;
2755 
2756 	vnode = fd_vnode(descriptor);
2757 	if (vnode != NULL)
2758 		inc_vnode_ref_count(vnode);
2759 
2760 	put_fd(descriptor);
2761 	return vnode;
2762 }
2763 
2764 
2765 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2766 	only the path will be considered. In this case, the \a path must not be
2767 	NULL.
2768 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2769 	and should be NULL for files.
2770 */
2771 static status_t
2772 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2773 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2774 {
2775 	if (fd < 0 && !path)
2776 		return B_BAD_VALUE;
2777 
2778 	if (path != NULL && *path == '\0')
2779 		return B_ENTRY_NOT_FOUND;
2780 
2781 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2782 		// no FD or absolute path
2783 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2784 	}
2785 
2786 	// FD only, or FD + relative path
2787 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2788 	if (vnode == NULL)
2789 		return B_FILE_ERROR;
2790 
2791 	if (path != NULL) {
2792 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2793 			_vnode, _parentID);
2794 	}
2795 
2796 	// there is no relative path to take into account
2797 
2798 	*_vnode = vnode;
2799 	if (_parentID)
2800 		*_parentID = -1;
2801 
2802 	return B_OK;
2803 }
2804 
2805 
2806 static int
2807 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2808 	void* cookie, int openMode, bool kernel)
2809 {
2810 	struct file_descriptor* descriptor;
2811 	int fd;
2812 
2813 	// If the vnode is locked, we don't allow creating a new file/directory
2814 	// file_descriptor for it
2815 	if (vnode && vnode->mandatory_locked_by != NULL
2816 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2817 		return B_BUSY;
2818 
2819 	descriptor = alloc_fd();
2820 	if (!descriptor)
2821 		return B_NO_MEMORY;
2822 
2823 	if (vnode)
2824 		descriptor->u.vnode = vnode;
2825 	else
2826 		descriptor->u.mount = mount;
2827 	descriptor->cookie = cookie;
2828 
2829 	switch (type) {
2830 		// vnode types
2831 		case FDTYPE_FILE:
2832 			descriptor->ops = &sFileOps;
2833 			break;
2834 		case FDTYPE_DIR:
2835 			descriptor->ops = &sDirectoryOps;
2836 			break;
2837 		case FDTYPE_ATTR:
2838 			descriptor->ops = &sAttributeOps;
2839 			break;
2840 		case FDTYPE_ATTR_DIR:
2841 			descriptor->ops = &sAttributeDirectoryOps;
2842 			break;
2843 
2844 		// mount types
2845 		case FDTYPE_INDEX_DIR:
2846 			descriptor->ops = &sIndexDirectoryOps;
2847 			break;
2848 		case FDTYPE_QUERY:
2849 			descriptor->ops = &sQueryOps;
2850 			break;
2851 
2852 		default:
2853 			panic("get_new_fd() called with unknown type %d\n", type);
2854 			break;
2855 	}
2856 	descriptor->type = type;
2857 	descriptor->open_mode = openMode;
2858 
2859 	io_context* context = get_current_io_context(kernel);
2860 	fd = new_fd(context, descriptor);
2861 	if (fd < 0) {
2862 		free(descriptor);
2863 		return B_NO_MORE_FDS;
2864 	}
2865 
2866 	mutex_lock(&context->io_mutex);
2867 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2868 	mutex_unlock(&context->io_mutex);
2869 
2870 	return fd;
2871 }
2872 
2873 
2874 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2875 	vfs_normalize_path(). See there for more documentation.
2876 */
2877 static status_t
2878 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2879 {
2880 	VNodePutter dirPutter;
2881 	struct vnode* dir = NULL;
2882 	status_t error;
2883 
2884 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2885 		// get dir vnode + leaf name
2886 		struct vnode* nextDir;
2887 		char leaf[B_FILE_NAME_LENGTH];
2888 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2889 		if (error != B_OK)
2890 			return error;
2891 
2892 		dir = nextDir;
2893 		strcpy(path, leaf);
2894 		dirPutter.SetTo(dir);
2895 
2896 		// get file vnode, if we shall resolve links
2897 		bool fileExists = false;
2898 		struct vnode* fileVnode;
2899 		VNodePutter fileVnodePutter;
2900 		if (traverseLink) {
2901 			inc_vnode_ref_count(dir);
2902 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2903 					NULL) == B_OK) {
2904 				fileVnodePutter.SetTo(fileVnode);
2905 				fileExists = true;
2906 			}
2907 		}
2908 
2909 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2910 			// we're done -- construct the path
2911 			bool hasLeaf = true;
2912 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2913 				// special cases "." and ".." -- get the dir, forget the leaf
2914 				inc_vnode_ref_count(dir);
2915 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2916 					&nextDir, NULL);
2917 				if (error != B_OK)
2918 					return error;
2919 				dir = nextDir;
2920 				dirPutter.SetTo(dir);
2921 				hasLeaf = false;
2922 			}
2923 
2924 			// get the directory path
2925 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2926 			if (error != B_OK)
2927 				return error;
2928 
2929 			// append the leaf name
2930 			if (hasLeaf) {
2931 				// insert a directory separator if this is not the file system
2932 				// root
2933 				if ((strcmp(path, "/") != 0
2934 					&& strlcat(path, "/", pathSize) >= pathSize)
2935 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2936 					return B_NAME_TOO_LONG;
2937 				}
2938 			}
2939 
2940 			return B_OK;
2941 		}
2942 
2943 		// read link
2944 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2945 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2946 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2947 			if (error != B_OK)
2948 				return error;
2949 			path[bufferSize] = '\0';
2950 		} else
2951 			return B_BAD_VALUE;
2952 	}
2953 
2954 	return B_LINK_LIMIT;
2955 }
2956 
2957 
2958 static status_t
2959 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2960 	struct io_context* ioContext)
2961 {
2962 	// Make sure the IO context root is not bypassed.
2963 	if (parent == ioContext->root) {
2964 		*_device = parent->device;
2965 		*_node = parent->id;
2966 		return B_OK;
2967 	}
2968 
2969 	inc_vnode_ref_count(parent);
2970 		// vnode_path_to_vnode() puts the node
2971 
2972 	// ".." is guaranteed not to be clobbered by this call
2973 	struct vnode* vnode;
2974 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2975 		ioContext, &vnode, NULL);
2976 	if (status == B_OK) {
2977 		*_device = vnode->device;
2978 		*_node = vnode->id;
2979 		put_vnode(vnode);
2980 	}
2981 
2982 	return status;
2983 }
2984 
2985 
2986 #ifdef ADD_DEBUGGER_COMMANDS
2987 
2988 
2989 static void
2990 _dump_advisory_locking(advisory_locking* locking)
2991 {
2992 	if (locking == NULL)
2993 		return;
2994 
2995 	kprintf("   lock:        %" B_PRId32, locking->lock);
2996 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2997 
2998 	int32 index = 0;
2999 	LockList::Iterator iterator = locking->locks.GetIterator();
3000 	while (iterator.HasNext()) {
3001 		struct advisory_lock* lock = iterator.Next();
3002 
3003 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3004 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3005 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3006 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3007 	}
3008 }
3009 
3010 
3011 static void
3012 _dump_mount(struct fs_mount* mount)
3013 {
3014 	kprintf("MOUNT: %p\n", mount);
3015 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3016 	kprintf(" device_name:   %s\n", mount->device_name);
3017 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3018 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3019 	kprintf(" partition:     %p\n", mount->partition);
3020 	kprintf(" lock:          %p\n", &mount->rlock);
3021 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3022 		mount->owns_file_device ? " owns_file_device" : "");
3023 
3024 	fs_volume* volume = mount->volume;
3025 	while (volume != NULL) {
3026 		kprintf(" volume %p:\n", volume);
3027 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3028 		kprintf("  private_volume:   %p\n", volume->private_volume);
3029 		kprintf("  ops:              %p\n", volume->ops);
3030 		kprintf("  file_system:      %p\n", volume->file_system);
3031 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3032 		volume = volume->super_volume;
3033 	}
3034 
3035 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3036 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3037 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3038 	set_debug_variable("_partition", (addr_t)mount->partition);
3039 }
3040 
3041 
3042 static bool
3043 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3044 	const char* name)
3045 {
3046 	bool insertSlash = buffer[bufferSize] != '\0';
3047 	size_t nameLength = strlen(name);
3048 
3049 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3050 		return false;
3051 
3052 	if (insertSlash)
3053 		buffer[--bufferSize] = '/';
3054 
3055 	bufferSize -= nameLength;
3056 	memcpy(buffer + bufferSize, name, nameLength);
3057 
3058 	return true;
3059 }
3060 
3061 
3062 static bool
3063 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3064 	ino_t nodeID)
3065 {
3066 	if (bufferSize == 0)
3067 		return false;
3068 
3069 	bool insertSlash = buffer[bufferSize] != '\0';
3070 	if (insertSlash)
3071 		buffer[--bufferSize] = '/';
3072 
3073 	size_t size = snprintf(buffer, bufferSize,
3074 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3075 	if (size > bufferSize) {
3076 		if (insertSlash)
3077 			bufferSize++;
3078 		return false;
3079 	}
3080 
3081 	if (size < bufferSize)
3082 		memmove(buffer + bufferSize - size, buffer, size);
3083 
3084 	bufferSize -= size;
3085 	return true;
3086 }
3087 
3088 
3089 static char*
3090 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3091 	bool& _truncated)
3092 {
3093 	// null-terminate the path
3094 	buffer[--bufferSize] = '\0';
3095 
3096 	while (true) {
3097 		while (vnode->covers != NULL)
3098 			vnode = vnode->covers;
3099 
3100 		if (vnode == sRoot) {
3101 			_truncated = bufferSize == 0;
3102 			if (!_truncated)
3103 				buffer[--bufferSize] = '/';
3104 			return buffer + bufferSize;
3105 		}
3106 
3107 		// resolve the name
3108 		ino_t dirID;
3109 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3110 			vnode->id, dirID);
3111 		if (name == NULL) {
3112 			// Failed to resolve the name -- prepend "<dev,node>/".
3113 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3114 				vnode->mount->id, vnode->id);
3115 			return buffer + bufferSize;
3116 		}
3117 
3118 		// prepend the name
3119 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3120 			_truncated = true;
3121 			return buffer + bufferSize;
3122 		}
3123 
3124 		// resolve the directory node
3125 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3126 		if (nextVnode == NULL) {
3127 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3128 				vnode->mount->id, dirID);
3129 			return buffer + bufferSize;
3130 		}
3131 
3132 		vnode = nextVnode;
3133 	}
3134 }
3135 
3136 
3137 static void
3138 _dump_vnode(struct vnode* vnode, bool printPath)
3139 {
3140 	kprintf("VNODE: %p\n", vnode);
3141 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3142 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3143 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3144 	kprintf(" private_node:  %p\n", vnode->private_node);
3145 	kprintf(" mount:         %p\n", vnode->mount);
3146 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3147 	kprintf(" covers:        %p\n", vnode->covers);
3148 	kprintf(" cache:         %p\n", vnode->cache);
3149 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3150 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3151 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3152 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3153 
3154 	_dump_advisory_locking(vnode->advisory_locking);
3155 
3156 	if (printPath) {
3157 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3158 		if (buffer != NULL) {
3159 			bool truncated;
3160 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3161 				B_PATH_NAME_LENGTH, truncated);
3162 			if (path != NULL) {
3163 				kprintf(" path:          ");
3164 				if (truncated)
3165 					kputs("<truncated>/");
3166 				kputs(path);
3167 				kputs("\n");
3168 			} else
3169 				kprintf("Failed to resolve vnode path.\n");
3170 
3171 			debug_free(buffer);
3172 		} else
3173 			kprintf("Failed to allocate memory for constructing the path.\n");
3174 	}
3175 
3176 	set_debug_variable("_node", (addr_t)vnode->private_node);
3177 	set_debug_variable("_mount", (addr_t)vnode->mount);
3178 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3179 	set_debug_variable("_covers", (addr_t)vnode->covers);
3180 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3181 }
3182 
3183 
3184 static int
3185 dump_mount(int argc, char** argv)
3186 {
3187 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3188 		kprintf("usage: %s [id|address]\n", argv[0]);
3189 		return 0;
3190 	}
3191 
3192 	ulong val = parse_expression(argv[1]);
3193 	uint32 id = val;
3194 
3195 	struct fs_mount* mount = sMountsTable->Lookup(id);
3196 	if (mount == NULL) {
3197 		if (IS_USER_ADDRESS(id)) {
3198 			kprintf("fs_mount not found\n");
3199 			return 0;
3200 		}
3201 		mount = (fs_mount*)val;
3202 	}
3203 
3204 	_dump_mount(mount);
3205 	return 0;
3206 }
3207 
3208 
3209 static int
3210 dump_mounts(int argc, char** argv)
3211 {
3212 	if (argc != 1) {
3213 		kprintf("usage: %s\n", argv[0]);
3214 		return 0;
3215 	}
3216 
3217 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3218 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3219 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3220 
3221 	struct fs_mount* mount;
3222 
3223 	MountTable::Iterator iterator(sMountsTable);
3224 	while (iterator.HasNext()) {
3225 		mount = iterator.Next();
3226 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3227 			mount->root_vnode->covers, mount->volume->private_volume,
3228 			mount->volume->file_system_name);
3229 
3230 		fs_volume* volume = mount->volume;
3231 		while (volume->super_volume != NULL) {
3232 			volume = volume->super_volume;
3233 			kprintf("                                     %p %s\n",
3234 				volume->private_volume, volume->file_system_name);
3235 		}
3236 	}
3237 
3238 	return 0;
3239 }
3240 
3241 
3242 static int
3243 dump_vnode(int argc, char** argv)
3244 {
3245 	bool printPath = false;
3246 	int argi = 1;
3247 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3248 		printPath = true;
3249 		argi++;
3250 	}
3251 
3252 	if (argi >= argc || argi + 2 < argc) {
3253 		print_debugger_command_usage(argv[0]);
3254 		return 0;
3255 	}
3256 
3257 	struct vnode* vnode = NULL;
3258 
3259 	if (argi + 1 == argc) {
3260 		vnode = (struct vnode*)parse_expression(argv[argi]);
3261 		if (IS_USER_ADDRESS(vnode)) {
3262 			kprintf("invalid vnode address\n");
3263 			return 0;
3264 		}
3265 		_dump_vnode(vnode, printPath);
3266 		return 0;
3267 	}
3268 
3269 	dev_t device = parse_expression(argv[argi]);
3270 	ino_t id = parse_expression(argv[argi + 1]);
3271 
3272 	VnodeTable::Iterator iterator(sVnodeTable);
3273 	while (iterator.HasNext()) {
3274 		vnode = iterator.Next();
3275 		if (vnode->id != id || vnode->device != device)
3276 			continue;
3277 
3278 		_dump_vnode(vnode, printPath);
3279 	}
3280 
3281 	return 0;
3282 }
3283 
3284 
3285 static int
3286 dump_vnodes(int argc, char** argv)
3287 {
3288 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3289 		kprintf("usage: %s [device]\n", argv[0]);
3290 		return 0;
3291 	}
3292 
3293 	// restrict dumped nodes to a certain device if requested
3294 	dev_t device = parse_expression(argv[1]);
3295 
3296 	struct vnode* vnode;
3297 
3298 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3299 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3300 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3301 
3302 	VnodeTable::Iterator iterator(sVnodeTable);
3303 	while (iterator.HasNext()) {
3304 		vnode = iterator.Next();
3305 		if (vnode->device != device)
3306 			continue;
3307 
3308 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3309 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3310 			vnode->private_node, vnode->advisory_locking,
3311 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3312 			vnode->IsUnpublished() ? "u" : "-");
3313 	}
3314 
3315 	return 0;
3316 }
3317 
3318 
3319 static int
3320 dump_vnode_caches(int argc, char** argv)
3321 {
3322 	struct vnode* vnode;
3323 
3324 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3325 		kprintf("usage: %s [device]\n", argv[0]);
3326 		return 0;
3327 	}
3328 
3329 	// restrict dumped nodes to a certain device if requested
3330 	dev_t device = -1;
3331 	if (argc > 1)
3332 		device = parse_expression(argv[1]);
3333 
3334 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3335 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3336 
3337 	VnodeTable::Iterator iterator(sVnodeTable);
3338 	while (iterator.HasNext()) {
3339 		vnode = iterator.Next();
3340 		if (vnode->cache == NULL)
3341 			continue;
3342 		if (device != -1 && vnode->device != device)
3343 			continue;
3344 
3345 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3346 			vnode, vnode->device, vnode->id, vnode->cache,
3347 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3348 			vnode->cache->page_count);
3349 	}
3350 
3351 	return 0;
3352 }
3353 
3354 
3355 int
3356 dump_io_context(int argc, char** argv)
3357 {
3358 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3359 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3360 		return 0;
3361 	}
3362 
3363 	struct io_context* context = NULL;
3364 
3365 	if (argc > 1) {
3366 		ulong num = parse_expression(argv[1]);
3367 		if (IS_KERNEL_ADDRESS(num))
3368 			context = (struct io_context*)num;
3369 		else {
3370 			Team* team = team_get_team_struct_locked(num);
3371 			if (team == NULL) {
3372 				kprintf("could not find team with ID %lu\n", num);
3373 				return 0;
3374 			}
3375 			context = (struct io_context*)team->io_context;
3376 		}
3377 	} else
3378 		context = get_current_io_context(true);
3379 
3380 	kprintf("I/O CONTEXT: %p\n", context);
3381 	kprintf(" root vnode:\t%p\n", context->root);
3382 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3383 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3384 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3385 
3386 	if (context->num_used_fds) {
3387 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3388 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3389 	}
3390 
3391 	for (uint32 i = 0; i < context->table_size; i++) {
3392 		struct file_descriptor* fd = context->fds[i];
3393 		if (fd == NULL)
3394 			continue;
3395 
3396 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3397 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3398 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3399 			fd->pos, fd->cookie,
3400 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3401 				? "mount" : "vnode",
3402 			fd->u.vnode);
3403 	}
3404 
3405 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3406 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3407 
3408 	set_debug_variable("_cwd", (addr_t)context->cwd);
3409 
3410 	return 0;
3411 }
3412 
3413 
3414 int
3415 dump_vnode_usage(int argc, char** argv)
3416 {
3417 	if (argc != 1) {
3418 		kprintf("usage: %s\n", argv[0]);
3419 		return 0;
3420 	}
3421 
3422 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3423 		sUnusedVnodes, kMaxUnusedVnodes);
3424 
3425 	uint32 count = sVnodeTable->CountElements();
3426 
3427 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3428 		count - sUnusedVnodes);
3429 	return 0;
3430 }
3431 
3432 #endif	// ADD_DEBUGGER_COMMANDS
3433 
3434 
3435 /*!	Clears memory specified by an iovec array.
3436 */
3437 static void
3438 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3439 {
3440 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3441 		size_t length = std::min(vecs[i].iov_len, bytes);
3442 		memset(vecs[i].iov_base, 0, length);
3443 		bytes -= length;
3444 	}
3445 }
3446 
3447 
3448 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3449 	and calls the file system hooks to read/write the request to disk.
3450 */
3451 static status_t
3452 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3453 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3454 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3455 	bool doWrite)
3456 {
3457 	if (fileVecCount == 0) {
3458 		// There are no file vecs at this offset, so we're obviously trying
3459 		// to access the file outside of its bounds
3460 		return B_BAD_VALUE;
3461 	}
3462 
3463 	size_t numBytes = *_numBytes;
3464 	uint32 fileVecIndex;
3465 	size_t vecOffset = *_vecOffset;
3466 	uint32 vecIndex = *_vecIndex;
3467 	status_t status;
3468 	size_t size;
3469 
3470 	if (!doWrite && vecOffset == 0) {
3471 		// now directly read the data from the device
3472 		// the first file_io_vec can be read directly
3473 
3474 		if (fileVecs[0].length < (off_t)numBytes)
3475 			size = fileVecs[0].length;
3476 		else
3477 			size = numBytes;
3478 
3479 		if (fileVecs[0].offset >= 0) {
3480 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3481 				&vecs[vecIndex], vecCount - vecIndex, &size);
3482 		} else {
3483 			// sparse read
3484 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3485 			status = B_OK;
3486 		}
3487 		if (status != B_OK)
3488 			return status;
3489 
3490 		// TODO: this is a work-around for buggy device drivers!
3491 		//	When our own drivers honour the length, we can:
3492 		//	a) also use this direct I/O for writes (otherwise, it would
3493 		//	   overwrite precious data)
3494 		//	b) panic if the term below is true (at least for writes)
3495 		if ((off_t)size > fileVecs[0].length) {
3496 			//dprintf("warning: device driver %p doesn't respect total length "
3497 			//	"in read_pages() call!\n", ref->device);
3498 			size = fileVecs[0].length;
3499 		}
3500 
3501 		ASSERT((off_t)size <= fileVecs[0].length);
3502 
3503 		// If the file portion was contiguous, we're already done now
3504 		if (size == numBytes)
3505 			return B_OK;
3506 
3507 		// if we reached the end of the file, we can return as well
3508 		if ((off_t)size != fileVecs[0].length) {
3509 			*_numBytes = size;
3510 			return B_OK;
3511 		}
3512 
3513 		fileVecIndex = 1;
3514 
3515 		// first, find out where we have to continue in our iovecs
3516 		for (; vecIndex < vecCount; vecIndex++) {
3517 			if (size < vecs[vecIndex].iov_len)
3518 				break;
3519 
3520 			size -= vecs[vecIndex].iov_len;
3521 		}
3522 
3523 		vecOffset = size;
3524 	} else {
3525 		fileVecIndex = 0;
3526 		size = 0;
3527 	}
3528 
3529 	// Too bad, let's process the rest of the file_io_vecs
3530 
3531 	size_t totalSize = size;
3532 	size_t bytesLeft = numBytes - size;
3533 
3534 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3535 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3536 		off_t fileOffset = fileVec.offset;
3537 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3538 
3539 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3540 			fileLeft));
3541 
3542 		// process the complete fileVec
3543 		while (fileLeft > 0) {
3544 			iovec tempVecs[MAX_TEMP_IO_VECS];
3545 			uint32 tempCount = 0;
3546 
3547 			// size tracks how much of what is left of the current fileVec
3548 			// (fileLeft) has been assigned to tempVecs
3549 			size = 0;
3550 
3551 			// assign what is left of the current fileVec to the tempVecs
3552 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3553 					&& tempCount < MAX_TEMP_IO_VECS;) {
3554 				// try to satisfy one iovec per iteration (or as much as
3555 				// possible)
3556 
3557 				// bytes left of the current iovec
3558 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3559 				if (vecLeft == 0) {
3560 					vecOffset = 0;
3561 					vecIndex++;
3562 					continue;
3563 				}
3564 
3565 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3566 					vecIndex, vecOffset, size));
3567 
3568 				// actually available bytes
3569 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3570 
3571 				tempVecs[tempCount].iov_base
3572 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3573 				tempVecs[tempCount].iov_len = tempVecSize;
3574 				tempCount++;
3575 
3576 				size += tempVecSize;
3577 				vecOffset += tempVecSize;
3578 			}
3579 
3580 			size_t bytes = size;
3581 
3582 			if (fileOffset == -1) {
3583 				if (doWrite) {
3584 					panic("sparse write attempt: vnode %p", vnode);
3585 					status = B_IO_ERROR;
3586 				} else {
3587 					// sparse read
3588 					zero_iovecs(tempVecs, tempCount, bytes);
3589 					status = B_OK;
3590 				}
3591 			} else if (doWrite) {
3592 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3593 					tempVecs, tempCount, &bytes);
3594 			} else {
3595 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3596 					tempVecs, tempCount, &bytes);
3597 			}
3598 			if (status != B_OK)
3599 				return status;
3600 
3601 			totalSize += bytes;
3602 			bytesLeft -= size;
3603 			if (fileOffset >= 0)
3604 				fileOffset += size;
3605 			fileLeft -= size;
3606 			//dprintf("-> file left = %Lu\n", fileLeft);
3607 
3608 			if (size != bytes || vecIndex >= vecCount) {
3609 				// there are no more bytes or iovecs, let's bail out
3610 				*_numBytes = totalSize;
3611 				return B_OK;
3612 			}
3613 		}
3614 	}
3615 
3616 	*_vecIndex = vecIndex;
3617 	*_vecOffset = vecOffset;
3618 	*_numBytes = totalSize;
3619 	return B_OK;
3620 }
3621 
3622 
3623 static bool
3624 is_user_in_group(gid_t gid)
3625 {
3626 	if (gid == getegid())
3627 		return true;
3628 
3629 	gid_t groups[NGROUPS_MAX];
3630 	int groupCount = getgroups(NGROUPS_MAX, groups);
3631 	for (int i = 0; i < groupCount; i++) {
3632 		if (gid == groups[i])
3633 			return true;
3634 	}
3635 
3636 	return false;
3637 }
3638 
3639 
3640 static status_t
3641 free_io_context(io_context* context)
3642 {
3643 	uint32 i;
3644 
3645 	TIOC(FreeIOContext(context));
3646 
3647 	if (context->root)
3648 		put_vnode(context->root);
3649 
3650 	if (context->cwd)
3651 		put_vnode(context->cwd);
3652 
3653 	mutex_lock(&context->io_mutex);
3654 
3655 	for (i = 0; i < context->table_size; i++) {
3656 		if (struct file_descriptor* descriptor = context->fds[i]) {
3657 			close_fd(context, descriptor);
3658 			put_fd(descriptor);
3659 		}
3660 	}
3661 
3662 	mutex_destroy(&context->io_mutex);
3663 
3664 	remove_node_monitors(context);
3665 	free(context->fds);
3666 	free(context);
3667 
3668 	return B_OK;
3669 }
3670 
3671 
3672 static status_t
3673 resize_monitor_table(struct io_context* context, const int newSize)
3674 {
3675 	int	status = B_OK;
3676 
3677 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3678 		return B_BAD_VALUE;
3679 
3680 	mutex_lock(&context->io_mutex);
3681 
3682 	if ((size_t)newSize < context->num_monitors) {
3683 		status = B_BUSY;
3684 		goto out;
3685 	}
3686 	context->max_monitors = newSize;
3687 
3688 out:
3689 	mutex_unlock(&context->io_mutex);
3690 	return status;
3691 }
3692 
3693 
3694 //	#pragma mark - public API for file systems
3695 
3696 
3697 extern "C" status_t
3698 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3699 	fs_vnode_ops* ops)
3700 {
3701 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3702 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3703 
3704 	if (privateNode == NULL)
3705 		return B_BAD_VALUE;
3706 
3707 	int32 tries = BUSY_VNODE_RETRIES;
3708 restart:
3709 	// create the node
3710 	bool nodeCreated;
3711 	struct vnode* vnode;
3712 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3713 		nodeCreated);
3714 	if (status != B_OK)
3715 		return status;
3716 
3717 	WriteLocker nodeLocker(sVnodeLock, true);
3718 		// create_new_vnode_and_lock() has locked for us
3719 
3720 	if (!nodeCreated && vnode->IsBusy()) {
3721 		nodeLocker.Unlock();
3722 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3723 			return B_BUSY;
3724 		goto restart;
3725 	}
3726 
3727 	// file system integrity check:
3728 	// test if the vnode already exists and bail out if this is the case!
3729 	if (!nodeCreated) {
3730 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3731 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3732 			vnode->private_node);
3733 		return B_ERROR;
3734 	}
3735 
3736 	vnode->private_node = privateNode;
3737 	vnode->ops = ops;
3738 	vnode->SetUnpublished(true);
3739 
3740 	TRACE(("returns: %s\n", strerror(status)));
3741 
3742 	return status;
3743 }
3744 
3745 
3746 extern "C" status_t
3747 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3748 	fs_vnode_ops* ops, int type, uint32 flags)
3749 {
3750 	FUNCTION(("publish_vnode()\n"));
3751 
3752 	int32 tries = BUSY_VNODE_RETRIES;
3753 restart:
3754 	WriteLocker locker(sVnodeLock);
3755 
3756 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3757 
3758 	bool nodeCreated = false;
3759 	if (vnode == NULL) {
3760 		if (privateNode == NULL)
3761 			return B_BAD_VALUE;
3762 
3763 		// create the node
3764 		locker.Unlock();
3765 			// create_new_vnode_and_lock() will re-lock for us on success
3766 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3767 			nodeCreated);
3768 		if (status != B_OK)
3769 			return status;
3770 
3771 		locker.SetTo(sVnodeLock, true);
3772 	}
3773 
3774 	if (nodeCreated) {
3775 		vnode->private_node = privateNode;
3776 		vnode->ops = ops;
3777 		vnode->SetUnpublished(true);
3778 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3779 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3780 		// already known, but not published
3781 	} else if (vnode->IsBusy()) {
3782 		locker.Unlock();
3783 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3784 			return B_BUSY;
3785 		goto restart;
3786 	} else
3787 		return B_BAD_VALUE;
3788 
3789 	bool publishSpecialSubNode = false;
3790 
3791 	vnode->SetType(type);
3792 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3793 	publishSpecialSubNode = is_special_node_type(type)
3794 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3795 
3796 	status_t status = B_OK;
3797 
3798 	// create sub vnodes, if necessary
3799 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3800 		locker.Unlock();
3801 
3802 		fs_volume* subVolume = volume;
3803 		if (volume->sub_volume != NULL) {
3804 			while (status == B_OK && subVolume->sub_volume != NULL) {
3805 				subVolume = subVolume->sub_volume;
3806 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3807 					vnode);
3808 			}
3809 		}
3810 
3811 		if (status == B_OK && publishSpecialSubNode)
3812 			status = create_special_sub_node(vnode, flags);
3813 
3814 		if (status != B_OK) {
3815 			// error -- clean up the created sub vnodes
3816 			while (subVolume->super_volume != volume) {
3817 				subVolume = subVolume->super_volume;
3818 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3819 			}
3820 		}
3821 
3822 		if (status == B_OK) {
3823 			ReadLocker vnodesReadLocker(sVnodeLock);
3824 			AutoLocker<Vnode> nodeLocker(vnode);
3825 			vnode->SetBusy(false);
3826 			vnode->SetUnpublished(false);
3827 		} else {
3828 			locker.Lock();
3829 			sVnodeTable->Remove(vnode);
3830 			remove_vnode_from_mount_list(vnode, vnode->mount);
3831 			free(vnode);
3832 		}
3833 	} else {
3834 		// we still hold the write lock -- mark the node unbusy and published
3835 		vnode->SetBusy(false);
3836 		vnode->SetUnpublished(false);
3837 	}
3838 
3839 	TRACE(("returns: %s\n", strerror(status)));
3840 
3841 	return status;
3842 }
3843 
3844 
3845 extern "C" status_t
3846 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3847 {
3848 	struct vnode* vnode;
3849 
3850 	if (volume == NULL)
3851 		return B_BAD_VALUE;
3852 
3853 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3854 	if (status != B_OK)
3855 		return status;
3856 
3857 	// If this is a layered FS, we need to get the node cookie for the requested
3858 	// layer.
3859 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3860 		fs_vnode resolvedNode;
3861 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3862 			&resolvedNode);
3863 		if (status != B_OK) {
3864 			panic("get_vnode(): Failed to get super node for vnode %p, "
3865 				"volume: %p", vnode, volume);
3866 			put_vnode(vnode);
3867 			return status;
3868 		}
3869 
3870 		if (_privateNode != NULL)
3871 			*_privateNode = resolvedNode.private_node;
3872 	} else if (_privateNode != NULL)
3873 		*_privateNode = vnode->private_node;
3874 
3875 	return B_OK;
3876 }
3877 
3878 
3879 extern "C" status_t
3880 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3881 {
3882 	struct vnode* vnode;
3883 
3884 	rw_lock_read_lock(&sVnodeLock);
3885 	vnode = lookup_vnode(volume->id, vnodeID);
3886 	rw_lock_read_unlock(&sVnodeLock);
3887 
3888 	if (vnode == NULL)
3889 		return B_BAD_VALUE;
3890 
3891 	inc_vnode_ref_count(vnode);
3892 	return B_OK;
3893 }
3894 
3895 
3896 extern "C" status_t
3897 put_vnode(fs_volume* volume, ino_t vnodeID)
3898 {
3899 	struct vnode* vnode;
3900 
3901 	rw_lock_read_lock(&sVnodeLock);
3902 	vnode = lookup_vnode(volume->id, vnodeID);
3903 	rw_lock_read_unlock(&sVnodeLock);
3904 
3905 	if (vnode == NULL)
3906 		return B_BAD_VALUE;
3907 
3908 	dec_vnode_ref_count(vnode, false, true);
3909 	return B_OK;
3910 }
3911 
3912 
3913 extern "C" status_t
3914 remove_vnode(fs_volume* volume, ino_t vnodeID)
3915 {
3916 	ReadLocker locker(sVnodeLock);
3917 
3918 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3919 	if (vnode == NULL)
3920 		return B_ENTRY_NOT_FOUND;
3921 
3922 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3923 		// this vnode is in use
3924 		return B_BUSY;
3925 	}
3926 
3927 	vnode->Lock();
3928 
3929 	vnode->SetRemoved(true);
3930 	bool removeUnpublished = false;
3931 
3932 	if (vnode->IsUnpublished()) {
3933 		// prepare the vnode for deletion
3934 		removeUnpublished = true;
3935 		vnode->SetBusy(true);
3936 	}
3937 
3938 	vnode->Unlock();
3939 	locker.Unlock();
3940 
3941 	if (removeUnpublished) {
3942 		// If the vnode hasn't been published yet, we delete it here
3943 		atomic_add(&vnode->ref_count, -1);
3944 		free_vnode(vnode, true);
3945 	}
3946 
3947 	return B_OK;
3948 }
3949 
3950 
3951 extern "C" status_t
3952 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3953 {
3954 	struct vnode* vnode;
3955 
3956 	rw_lock_read_lock(&sVnodeLock);
3957 
3958 	vnode = lookup_vnode(volume->id, vnodeID);
3959 	if (vnode) {
3960 		AutoLocker<Vnode> nodeLocker(vnode);
3961 		vnode->SetRemoved(false);
3962 	}
3963 
3964 	rw_lock_read_unlock(&sVnodeLock);
3965 	return B_OK;
3966 }
3967 
3968 
3969 extern "C" status_t
3970 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3971 {
3972 	ReadLocker _(sVnodeLock);
3973 
3974 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3975 		if (_removed != NULL)
3976 			*_removed = vnode->IsRemoved();
3977 		return B_OK;
3978 	}
3979 
3980 	return B_BAD_VALUE;
3981 }
3982 
3983 
3984 extern "C" fs_volume*
3985 volume_for_vnode(fs_vnode* _vnode)
3986 {
3987 	if (_vnode == NULL)
3988 		return NULL;
3989 
3990 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3991 	return vnode->mount->volume;
3992 }
3993 
3994 
3995 extern "C" status_t
3996 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3997 	uid_t nodeUserID)
3998 {
3999 	// get node permissions
4000 	int userPermissions = (mode & S_IRWXU) >> 6;
4001 	int groupPermissions = (mode & S_IRWXG) >> 3;
4002 	int otherPermissions = mode & S_IRWXO;
4003 
4004 	// get the node permissions for this uid/gid
4005 	int permissions = 0;
4006 	uid_t uid = geteuid();
4007 
4008 	if (uid == 0) {
4009 		// user is root
4010 		// root has always read/write permission, but at least one of the
4011 		// X bits must be set for execute permission
4012 		permissions = userPermissions | groupPermissions | otherPermissions
4013 			| S_IROTH | S_IWOTH;
4014 		if (S_ISDIR(mode))
4015 			permissions |= S_IXOTH;
4016 	} else if (uid == nodeUserID) {
4017 		// user is node owner
4018 		permissions = userPermissions;
4019 	} else if (is_user_in_group(nodeGroupID)) {
4020 		// user is in owning group
4021 		permissions = groupPermissions;
4022 	} else {
4023 		// user is one of the others
4024 		permissions = otherPermissions;
4025 	}
4026 
4027 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4028 }
4029 
4030 
4031 #if 0
4032 extern "C" status_t
4033 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4034 	size_t* _numBytes)
4035 {
4036 	struct file_descriptor* descriptor;
4037 	struct vnode* vnode;
4038 
4039 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4040 	if (descriptor == NULL)
4041 		return B_FILE_ERROR;
4042 
4043 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4044 		count, 0, _numBytes);
4045 
4046 	put_fd(descriptor);
4047 	return status;
4048 }
4049 
4050 
4051 extern "C" status_t
4052 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4053 	size_t* _numBytes)
4054 {
4055 	struct file_descriptor* descriptor;
4056 	struct vnode* vnode;
4057 
4058 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4059 	if (descriptor == NULL)
4060 		return B_FILE_ERROR;
4061 
4062 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4063 		count, 0, _numBytes);
4064 
4065 	put_fd(descriptor);
4066 	return status;
4067 }
4068 #endif
4069 
4070 
4071 extern "C" status_t
4072 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4073 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4074 	size_t* _bytes)
4075 {
4076 	struct file_descriptor* descriptor;
4077 	struct vnode* vnode;
4078 
4079 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4080 	if (descriptor == NULL)
4081 		return B_FILE_ERROR;
4082 
4083 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4084 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4085 		false);
4086 
4087 	put_fd(descriptor);
4088 	return status;
4089 }
4090 
4091 
4092 extern "C" status_t
4093 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4094 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4095 	size_t* _bytes)
4096 {
4097 	struct file_descriptor* descriptor;
4098 	struct vnode* vnode;
4099 
4100 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4101 	if (descriptor == NULL)
4102 		return B_FILE_ERROR;
4103 
4104 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4105 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4106 		true);
4107 
4108 	put_fd(descriptor);
4109 	return status;
4110 }
4111 
4112 
4113 extern "C" status_t
4114 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4115 {
4116 	// lookup mount -- the caller is required to make sure that the mount
4117 	// won't go away
4118 	MutexLocker locker(sMountMutex);
4119 	struct fs_mount* mount = find_mount(mountID);
4120 	if (mount == NULL)
4121 		return B_BAD_VALUE;
4122 	locker.Unlock();
4123 
4124 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4125 }
4126 
4127 
4128 extern "C" status_t
4129 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4130 {
4131 	// lookup mount -- the caller is required to make sure that the mount
4132 	// won't go away
4133 	MutexLocker locker(sMountMutex);
4134 	struct fs_mount* mount = find_mount(mountID);
4135 	if (mount == NULL)
4136 		return B_BAD_VALUE;
4137 	locker.Unlock();
4138 
4139 	return mount->entry_cache.Add(dirID, name, -1, true);
4140 }
4141 
4142 
4143 extern "C" status_t
4144 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4145 {
4146 	// lookup mount -- the caller is required to make sure that the mount
4147 	// won't go away
4148 	MutexLocker locker(sMountMutex);
4149 	struct fs_mount* mount = find_mount(mountID);
4150 	if (mount == NULL)
4151 		return B_BAD_VALUE;
4152 	locker.Unlock();
4153 
4154 	return mount->entry_cache.Remove(dirID, name);
4155 }
4156 
4157 
4158 //	#pragma mark - private VFS API
4159 //	Functions the VFS exports for other parts of the kernel
4160 
4161 
4162 /*! Acquires another reference to the vnode that has to be released
4163 	by calling vfs_put_vnode().
4164 */
4165 void
4166 vfs_acquire_vnode(struct vnode* vnode)
4167 {
4168 	inc_vnode_ref_count(vnode);
4169 }
4170 
4171 
4172 /*! This is currently called from file_cache_create() only.
4173 	It's probably a temporary solution as long as devfs requires that
4174 	fs_read_pages()/fs_write_pages() are called with the standard
4175 	open cookie and not with a device cookie.
4176 	If that's done differently, remove this call; it has no other
4177 	purpose.
4178 */
4179 extern "C" status_t
4180 vfs_get_cookie_from_fd(int fd, void** _cookie)
4181 {
4182 	struct file_descriptor* descriptor;
4183 
4184 	descriptor = get_fd(get_current_io_context(true), fd);
4185 	if (descriptor == NULL)
4186 		return B_FILE_ERROR;
4187 
4188 	*_cookie = descriptor->cookie;
4189 	return B_OK;
4190 }
4191 
4192 
4193 extern "C" status_t
4194 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4195 {
4196 	*vnode = get_vnode_from_fd(fd, kernel);
4197 
4198 	if (*vnode == NULL)
4199 		return B_FILE_ERROR;
4200 
4201 	return B_NO_ERROR;
4202 }
4203 
4204 
4205 extern "C" status_t
4206 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4207 {
4208 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4209 		path, kernel));
4210 
4211 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4212 	if (pathBuffer.InitCheck() != B_OK)
4213 		return B_NO_MEMORY;
4214 
4215 	char* buffer = pathBuffer.LockBuffer();
4216 	strlcpy(buffer, path, pathBuffer.BufferSize());
4217 
4218 	struct vnode* vnode;
4219 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4220 	if (status != B_OK)
4221 		return status;
4222 
4223 	*_vnode = vnode;
4224 	return B_OK;
4225 }
4226 
4227 
4228 extern "C" status_t
4229 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4230 {
4231 	struct vnode* vnode = NULL;
4232 
4233 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4234 	if (status != B_OK)
4235 		return status;
4236 
4237 	*_vnode = vnode;
4238 	return B_OK;
4239 }
4240 
4241 
4242 extern "C" status_t
4243 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4244 	const char* name, struct vnode** _vnode)
4245 {
4246 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4247 }
4248 
4249 
4250 extern "C" void
4251 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4252 {
4253 	*_mountID = vnode->device;
4254 	*_vnodeID = vnode->id;
4255 }
4256 
4257 
4258 /*!
4259 	Helper function abstracting the process of "converting" a given
4260 	vnode-pointer to a fs_vnode-pointer.
4261 	Currently only used in bindfs.
4262 */
4263 extern "C" fs_vnode*
4264 vfs_fsnode_for_vnode(struct vnode* vnode)
4265 {
4266 	return vnode;
4267 }
4268 
4269 
4270 /*!
4271 	Calls fs_open() on the given vnode and returns a new
4272 	file descriptor for it
4273 */
4274 int
4275 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4276 {
4277 	return open_vnode(vnode, openMode, kernel);
4278 }
4279 
4280 
4281 /*!	Looks up a vnode with the given mount and vnode ID.
4282 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4283 	to the node.
4284 	It's currently only be used by file_cache_create().
4285 */
4286 extern "C" status_t
4287 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4288 {
4289 	rw_lock_read_lock(&sVnodeLock);
4290 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4291 	rw_lock_read_unlock(&sVnodeLock);
4292 
4293 	if (vnode == NULL)
4294 		return B_ERROR;
4295 
4296 	*_vnode = vnode;
4297 	return B_OK;
4298 }
4299 
4300 
4301 extern "C" status_t
4302 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4303 	bool traverseLeafLink, bool kernel, void** _node)
4304 {
4305 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4306 		volume, path, kernel));
4307 
4308 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4309 	if (pathBuffer.InitCheck() != B_OK)
4310 		return B_NO_MEMORY;
4311 
4312 	fs_mount* mount;
4313 	status_t status = get_mount(volume->id, &mount);
4314 	if (status != B_OK)
4315 		return status;
4316 
4317 	char* buffer = pathBuffer.LockBuffer();
4318 	strlcpy(buffer, path, pathBuffer.BufferSize());
4319 
4320 	struct vnode* vnode = mount->root_vnode;
4321 
4322 	if (buffer[0] == '/')
4323 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4324 	else {
4325 		inc_vnode_ref_count(vnode);
4326 			// vnode_path_to_vnode() releases a reference to the starting vnode
4327 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4328 			kernel, &vnode, NULL);
4329 	}
4330 
4331 	put_mount(mount);
4332 
4333 	if (status != B_OK)
4334 		return status;
4335 
4336 	if (vnode->device != volume->id) {
4337 		// wrong mount ID - must not gain access on foreign file system nodes
4338 		put_vnode(vnode);
4339 		return B_BAD_VALUE;
4340 	}
4341 
4342 	// Use get_vnode() to resolve the cookie for the right layer.
4343 	status = get_vnode(volume, vnode->id, _node);
4344 	put_vnode(vnode);
4345 
4346 	return status;
4347 }
4348 
4349 
4350 status_t
4351 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4352 	struct stat* stat, bool kernel)
4353 {
4354 	status_t status;
4355 
4356 	if (path != NULL) {
4357 		// path given: get the stat of the node referred to by (fd, path)
4358 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
4359 		if (pathBuffer.InitCheck() != B_OK)
4360 			return B_NO_MEMORY;
4361 
4362 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4363 			traverseLeafLink, stat, kernel);
4364 	} else {
4365 		// no path given: get the FD and use the FD operation
4366 		struct file_descriptor* descriptor
4367 			= get_fd(get_current_io_context(kernel), fd);
4368 		if (descriptor == NULL)
4369 			return B_FILE_ERROR;
4370 
4371 		if (descriptor->ops->fd_read_stat)
4372 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4373 		else
4374 			status = B_UNSUPPORTED;
4375 
4376 		put_fd(descriptor);
4377 	}
4378 
4379 	return status;
4380 }
4381 
4382 
4383 /*!	Finds the full path to the file that contains the module \a moduleName,
4384 	puts it into \a pathBuffer, and returns B_OK for success.
4385 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4386 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4387 	\a pathBuffer is clobbered in any case and must not be relied on if this
4388 	functions returns unsuccessfully.
4389 	\a basePath and \a pathBuffer must not point to the same space.
4390 */
4391 status_t
4392 vfs_get_module_path(const char* basePath, const char* moduleName,
4393 	char* pathBuffer, size_t bufferSize)
4394 {
4395 	struct vnode* dir;
4396 	struct vnode* file;
4397 	status_t status;
4398 	size_t length;
4399 	char* path;
4400 
4401 	if (bufferSize == 0
4402 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4403 		return B_BUFFER_OVERFLOW;
4404 
4405 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4406 	if (status != B_OK)
4407 		return status;
4408 
4409 	// the path buffer had been clobbered by the above call
4410 	length = strlcpy(pathBuffer, basePath, bufferSize);
4411 	if (pathBuffer[length - 1] != '/')
4412 		pathBuffer[length++] = '/';
4413 
4414 	path = pathBuffer + length;
4415 	bufferSize -= length;
4416 
4417 	while (moduleName) {
4418 		char* nextPath = strchr(moduleName, '/');
4419 		if (nextPath == NULL)
4420 			length = strlen(moduleName);
4421 		else {
4422 			length = nextPath - moduleName;
4423 			nextPath++;
4424 		}
4425 
4426 		if (length + 1 >= bufferSize) {
4427 			status = B_BUFFER_OVERFLOW;
4428 			goto err;
4429 		}
4430 
4431 		memcpy(path, moduleName, length);
4432 		path[length] = '\0';
4433 		moduleName = nextPath;
4434 
4435 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4436 		if (status != B_OK) {
4437 			// vnode_path_to_vnode() has already released the reference to dir
4438 			return status;
4439 		}
4440 
4441 		if (S_ISDIR(file->Type())) {
4442 			// goto the next directory
4443 			path[length] = '/';
4444 			path[length + 1] = '\0';
4445 			path += length + 1;
4446 			bufferSize -= length + 1;
4447 
4448 			dir = file;
4449 		} else if (S_ISREG(file->Type())) {
4450 			// it's a file so it should be what we've searched for
4451 			put_vnode(file);
4452 
4453 			return B_OK;
4454 		} else {
4455 			TRACE(("vfs_get_module_path(): something is strange here: "
4456 				"0x%08" B_PRIx32 "...\n", file->Type()));
4457 			status = B_ERROR;
4458 			dir = file;
4459 			goto err;
4460 		}
4461 	}
4462 
4463 	// if we got here, the moduleName just pointed to a directory, not to
4464 	// a real module - what should we do in this case?
4465 	status = B_ENTRY_NOT_FOUND;
4466 
4467 err:
4468 	put_vnode(dir);
4469 	return status;
4470 }
4471 
4472 
4473 /*!	\brief Normalizes a given path.
4474 
4475 	The path must refer to an existing or non-existing entry in an existing
4476 	directory, that is chopping off the leaf component the remaining path must
4477 	refer to an existing directory.
4478 
4479 	The returned will be canonical in that it will be absolute, will not
4480 	contain any "." or ".." components or duplicate occurrences of '/'s,
4481 	and none of the directory components will by symbolic links.
4482 
4483 	Any two paths referring to the same entry, will result in the same
4484 	normalized path (well, that is pretty much the definition of `normalized',
4485 	isn't it :-).
4486 
4487 	\param path The path to be normalized.
4488 	\param buffer The buffer into which the normalized path will be written.
4489 		   May be the same one as \a path.
4490 	\param bufferSize The size of \a buffer.
4491 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4492 	\param kernel \c true, if the IO context of the kernel shall be used,
4493 		   otherwise that of the team this thread belongs to. Only relevant,
4494 		   if the path is relative (to get the CWD).
4495 	\return \c B_OK if everything went fine, another error code otherwise.
4496 */
4497 status_t
4498 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4499 	bool traverseLink, bool kernel)
4500 {
4501 	if (!path || !buffer || bufferSize < 1)
4502 		return B_BAD_VALUE;
4503 
4504 	if (path != buffer) {
4505 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4506 			return B_BUFFER_OVERFLOW;
4507 	}
4508 
4509 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4510 }
4511 
4512 
4513 /*!	\brief Gets the parent of the passed in node.
4514 
4515 	Gets the parent of the passed in node, and correctly resolves covered
4516 	nodes.
4517 */
4518 extern "C" status_t
4519 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4520 {
4521 	return resolve_covered_parent(parent, device, node,
4522 		get_current_io_context(true));
4523 }
4524 
4525 
4526 /*!	\brief Creates a special node in the file system.
4527 
4528 	The caller gets a reference to the newly created node (which is passed
4529 	back through \a _createdVnode) and is responsible for releasing it.
4530 
4531 	\param path The path where to create the entry for the node. Can be \c NULL,
4532 		in which case the node is created without an entry in the root FS -- it
4533 		will automatically be deleted when the last reference has been released.
4534 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4535 		the target file system will just create the node with its standard
4536 		operations. Depending on the type of the node a subnode might be created
4537 		automatically, though.
4538 	\param mode The type and permissions for the node to be created.
4539 	\param flags Flags to be passed to the creating FS.
4540 	\param kernel \c true, if called in the kernel context (relevant only if
4541 		\a path is not \c NULL and not absolute).
4542 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4543 		file system creating the node, with the private data pointer and
4544 		operations for the super node. Can be \c NULL.
4545 	\param _createVnode Pointer to pre-allocated storage where to store the
4546 		pointer to the newly created node.
4547 	\return \c B_OK, if everything went fine, another error code otherwise.
4548 */
4549 status_t
4550 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4551 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4552 	struct vnode** _createdVnode)
4553 {
4554 	struct vnode* dirNode;
4555 	char _leaf[B_FILE_NAME_LENGTH];
4556 	char* leaf = NULL;
4557 
4558 	if (path) {
4559 		// We've got a path. Get the dir vnode and the leaf name.
4560 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4561 		if (tmpPathBuffer.InitCheck() != B_OK)
4562 			return B_NO_MEMORY;
4563 
4564 		char* tmpPath = tmpPathBuffer.LockBuffer();
4565 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4566 			return B_NAME_TOO_LONG;
4567 
4568 		// get the dir vnode and the leaf name
4569 		leaf = _leaf;
4570 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4571 		if (error != B_OK)
4572 			return error;
4573 	} else {
4574 		// No path. Create the node in the root FS.
4575 		dirNode = sRoot;
4576 		inc_vnode_ref_count(dirNode);
4577 	}
4578 
4579 	VNodePutter _(dirNode);
4580 
4581 	// check support for creating special nodes
4582 	if (!HAS_FS_CALL(dirNode, create_special_node))
4583 		return B_UNSUPPORTED;
4584 
4585 	// create the node
4586 	fs_vnode superVnode;
4587 	ino_t nodeID;
4588 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4589 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4590 	if (status != B_OK)
4591 		return status;
4592 
4593 	// lookup the node
4594 	rw_lock_read_lock(&sVnodeLock);
4595 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4596 	rw_lock_read_unlock(&sVnodeLock);
4597 
4598 	if (*_createdVnode == NULL) {
4599 		panic("vfs_create_special_node(): lookup of node failed");
4600 		return B_ERROR;
4601 	}
4602 
4603 	return B_OK;
4604 }
4605 
4606 
4607 extern "C" void
4608 vfs_put_vnode(struct vnode* vnode)
4609 {
4610 	put_vnode(vnode);
4611 }
4612 
4613 
4614 extern "C" status_t
4615 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4616 {
4617 	// Get current working directory from io context
4618 	struct io_context* context = get_current_io_context(false);
4619 	status_t status = B_OK;
4620 
4621 	mutex_lock(&context->io_mutex);
4622 
4623 	if (context->cwd != NULL) {
4624 		*_mountID = context->cwd->device;
4625 		*_vnodeID = context->cwd->id;
4626 	} else
4627 		status = B_ERROR;
4628 
4629 	mutex_unlock(&context->io_mutex);
4630 	return status;
4631 }
4632 
4633 
4634 status_t
4635 vfs_unmount(dev_t mountID, uint32 flags)
4636 {
4637 	return fs_unmount(NULL, mountID, flags, true);
4638 }
4639 
4640 
4641 extern "C" status_t
4642 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4643 {
4644 	struct vnode* vnode;
4645 
4646 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4647 	if (status != B_OK)
4648 		return status;
4649 
4650 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4651 	put_vnode(vnode);
4652 	return B_OK;
4653 }
4654 
4655 
4656 extern "C" void
4657 vfs_free_unused_vnodes(int32 level)
4658 {
4659 	vnode_low_resource_handler(NULL,
4660 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4661 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4662 		level);
4663 }
4664 
4665 
4666 extern "C" bool
4667 vfs_can_page(struct vnode* vnode, void* cookie)
4668 {
4669 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4670 
4671 	if (HAS_FS_CALL(vnode, can_page))
4672 		return FS_CALL(vnode, can_page, cookie);
4673 	return false;
4674 }
4675 
4676 
4677 extern "C" status_t
4678 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4679 	const generic_io_vec* vecs, size_t count, uint32 flags,
4680 	generic_size_t* _numBytes)
4681 {
4682 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4683 		vecs, pos));
4684 
4685 #if VFS_PAGES_IO_TRACING
4686 	generic_size_t bytesRequested = *_numBytes;
4687 #endif
4688 
4689 	IORequest request;
4690 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4691 	if (status == B_OK) {
4692 		status = vfs_vnode_io(vnode, cookie, &request);
4693 		if (status == B_OK)
4694 			status = request.Wait();
4695 		*_numBytes = request.TransferredBytes();
4696 	}
4697 
4698 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4699 		status, *_numBytes));
4700 
4701 	return status;
4702 }
4703 
4704 
4705 extern "C" status_t
4706 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4707 	const generic_io_vec* vecs, size_t count, uint32 flags,
4708 	generic_size_t* _numBytes)
4709 {
4710 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4711 		vecs, pos));
4712 
4713 #if VFS_PAGES_IO_TRACING
4714 	generic_size_t bytesRequested = *_numBytes;
4715 #endif
4716 
4717 	IORequest request;
4718 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4719 	if (status == B_OK) {
4720 		status = vfs_vnode_io(vnode, cookie, &request);
4721 		if (status == B_OK)
4722 			status = request.Wait();
4723 		*_numBytes = request.TransferredBytes();
4724 	}
4725 
4726 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4727 		status, *_numBytes));
4728 
4729 	return status;
4730 }
4731 
4732 
4733 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4734 	created if \a allocate is \c true.
4735 	In case it's successful, it will also grab a reference to the cache
4736 	it returns.
4737 */
4738 extern "C" status_t
4739 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4740 {
4741 	if (vnode->cache != NULL) {
4742 		vnode->cache->AcquireRef();
4743 		*_cache = vnode->cache;
4744 		return B_OK;
4745 	}
4746 
4747 	rw_lock_read_lock(&sVnodeLock);
4748 	vnode->Lock();
4749 
4750 	status_t status = B_OK;
4751 
4752 	// The cache could have been created in the meantime
4753 	if (vnode->cache == NULL) {
4754 		if (allocate) {
4755 			// TODO: actually the vnode needs to be busy already here, or
4756 			//	else this won't work...
4757 			bool wasBusy = vnode->IsBusy();
4758 			vnode->SetBusy(true);
4759 
4760 			vnode->Unlock();
4761 			rw_lock_read_unlock(&sVnodeLock);
4762 
4763 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4764 
4765 			rw_lock_read_lock(&sVnodeLock);
4766 			vnode->Lock();
4767 			vnode->SetBusy(wasBusy);
4768 		} else
4769 			status = B_BAD_VALUE;
4770 	}
4771 
4772 	vnode->Unlock();
4773 	rw_lock_read_unlock(&sVnodeLock);
4774 
4775 	if (status == B_OK) {
4776 		vnode->cache->AcquireRef();
4777 		*_cache = vnode->cache;
4778 	}
4779 
4780 	return status;
4781 }
4782 
4783 
4784 status_t
4785 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4786 	file_io_vec* vecs, size_t* _count)
4787 {
4788 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4789 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4790 
4791 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4792 }
4793 
4794 
4795 status_t
4796 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4797 {
4798 	status_t status = FS_CALL(vnode, read_stat, stat);
4799 
4800 	// fill in the st_dev and st_ino fields
4801 	if (status == B_OK) {
4802 		stat->st_dev = vnode->device;
4803 		stat->st_ino = vnode->id;
4804 		// the rdev field must stay unset for non-special files
4805 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4806 			stat->st_rdev = -1;
4807 	}
4808 
4809 	return status;
4810 }
4811 
4812 
4813 status_t
4814 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4815 {
4816 	struct vnode* vnode;
4817 	status_t status = get_vnode(device, inode, &vnode, true, false);
4818 	if (status != B_OK)
4819 		return status;
4820 
4821 	status = vfs_stat_vnode(vnode, stat);
4822 
4823 	put_vnode(vnode);
4824 	return status;
4825 }
4826 
4827 
4828 status_t
4829 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4830 {
4831 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4832 }
4833 
4834 
4835 status_t
4836 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4837 	bool kernel, char* path, size_t pathLength)
4838 {
4839 	struct vnode* vnode;
4840 	status_t status;
4841 
4842 	// filter invalid leaf names
4843 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4844 		return B_BAD_VALUE;
4845 
4846 	// get the vnode matching the dir's node_ref
4847 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4848 		// special cases "." and "..": we can directly get the vnode of the
4849 		// referenced directory
4850 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4851 		leaf = NULL;
4852 	} else
4853 		status = get_vnode(device, inode, &vnode, true, false);
4854 	if (status != B_OK)
4855 		return status;
4856 
4857 	// get the directory path
4858 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4859 	put_vnode(vnode);
4860 		// we don't need the vnode anymore
4861 	if (status != B_OK)
4862 		return status;
4863 
4864 	// append the leaf name
4865 	if (leaf) {
4866 		// insert a directory separator if this is not the file system root
4867 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4868 				>= pathLength)
4869 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4870 			return B_NAME_TOO_LONG;
4871 		}
4872 	}
4873 
4874 	return B_OK;
4875 }
4876 
4877 
4878 /*!	If the given descriptor locked its vnode, that lock will be released. */
4879 void
4880 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4881 {
4882 	struct vnode* vnode = fd_vnode(descriptor);
4883 
4884 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4885 		vnode->mandatory_locked_by = NULL;
4886 }
4887 
4888 
4889 /*!	Releases any POSIX locks on the file descriptor. */
4890 status_t
4891 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4892 {
4893 	struct vnode* vnode = descriptor->u.vnode;
4894 	if (vnode == NULL)
4895 		return B_OK;
4896 
4897 	if (HAS_FS_CALL(vnode, release_lock))
4898 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4899 
4900 	return release_advisory_lock(vnode, context, NULL, NULL);
4901 }
4902 
4903 
4904 /*!	Closes all file descriptors of the specified I/O context that
4905 	have the O_CLOEXEC flag set.
4906 */
4907 void
4908 vfs_exec_io_context(io_context* context)
4909 {
4910 	uint32 i;
4911 
4912 	for (i = 0; i < context->table_size; i++) {
4913 		mutex_lock(&context->io_mutex);
4914 
4915 		struct file_descriptor* descriptor = context->fds[i];
4916 		bool remove = false;
4917 
4918 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4919 			context->fds[i] = NULL;
4920 			context->num_used_fds--;
4921 
4922 			remove = true;
4923 		}
4924 
4925 		mutex_unlock(&context->io_mutex);
4926 
4927 		if (remove) {
4928 			close_fd(context, descriptor);
4929 			put_fd(descriptor);
4930 		}
4931 	}
4932 }
4933 
4934 
4935 /*! Sets up a new io_control structure, and inherits the properties
4936 	of the parent io_control if it is given.
4937 */
4938 io_context*
4939 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4940 {
4941 	io_context* context = (io_context*)malloc(sizeof(io_context));
4942 	if (context == NULL)
4943 		return NULL;
4944 
4945 	TIOC(NewIOContext(context, parentContext));
4946 
4947 	memset(context, 0, sizeof(io_context));
4948 	context->ref_count = 1;
4949 
4950 	MutexLocker parentLocker;
4951 
4952 	size_t tableSize;
4953 	if (parentContext != NULL) {
4954 		parentLocker.SetTo(parentContext->io_mutex, false);
4955 		tableSize = parentContext->table_size;
4956 	} else
4957 		tableSize = DEFAULT_FD_TABLE_SIZE;
4958 
4959 	// allocate space for FDs and their close-on-exec flag
4960 	context->fds = (file_descriptor**)malloc(
4961 		sizeof(struct file_descriptor*) * tableSize
4962 		+ sizeof(struct select_sync*) * tableSize
4963 		+ (tableSize + 7) / 8);
4964 	if (context->fds == NULL) {
4965 		free(context);
4966 		return NULL;
4967 	}
4968 
4969 	context->select_infos = (select_info**)(context->fds + tableSize);
4970 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4971 
4972 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4973 		+ sizeof(struct select_sync*) * tableSize
4974 		+ (tableSize + 7) / 8);
4975 
4976 	mutex_init(&context->io_mutex, "I/O context");
4977 
4978 	// Copy all parent file descriptors
4979 
4980 	if (parentContext != NULL) {
4981 		size_t i;
4982 
4983 		mutex_lock(&sIOContextRootLock);
4984 		context->root = parentContext->root;
4985 		if (context->root)
4986 			inc_vnode_ref_count(context->root);
4987 		mutex_unlock(&sIOContextRootLock);
4988 
4989 		context->cwd = parentContext->cwd;
4990 		if (context->cwd)
4991 			inc_vnode_ref_count(context->cwd);
4992 
4993 		if (parentContext->inherit_fds) {
4994 			for (i = 0; i < tableSize; i++) {
4995 				struct file_descriptor* descriptor = parentContext->fds[i];
4996 
4997 				if (descriptor != NULL
4998 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4999 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5000 					if (closeOnExec && purgeCloseOnExec)
5001 						continue;
5002 
5003 					TFD(InheritFD(context, i, descriptor, parentContext));
5004 
5005 					context->fds[i] = descriptor;
5006 					context->num_used_fds++;
5007 					atomic_add(&descriptor->ref_count, 1);
5008 					atomic_add(&descriptor->open_count, 1);
5009 
5010 					if (closeOnExec)
5011 						fd_set_close_on_exec(context, i, true);
5012 				}
5013 			}
5014 		}
5015 
5016 		parentLocker.Unlock();
5017 	} else {
5018 		context->root = sRoot;
5019 		context->cwd = sRoot;
5020 
5021 		if (context->root)
5022 			inc_vnode_ref_count(context->root);
5023 
5024 		if (context->cwd)
5025 			inc_vnode_ref_count(context->cwd);
5026 	}
5027 
5028 	context->table_size = tableSize;
5029 	context->inherit_fds = parentContext != NULL;
5030 
5031 	list_init(&context->node_monitors);
5032 	context->max_monitors = DEFAULT_NODE_MONITORS;
5033 
5034 	return context;
5035 }
5036 
5037 
5038 void
5039 vfs_get_io_context(io_context* context)
5040 {
5041 	atomic_add(&context->ref_count, 1);
5042 }
5043 
5044 
5045 void
5046 vfs_put_io_context(io_context* context)
5047 {
5048 	if (atomic_add(&context->ref_count, -1) == 1)
5049 		free_io_context(context);
5050 }
5051 
5052 
5053 status_t
5054 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5055 {
5056 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5057 		return B_BAD_VALUE;
5058 
5059 	TIOC(ResizeIOContext(context, newSize));
5060 
5061 	MutexLocker _(context->io_mutex);
5062 
5063 	uint32 oldSize = context->table_size;
5064 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5065 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5066 
5067 	// If the tables shrink, make sure none of the fds being dropped are in use.
5068 	if (newSize < oldSize) {
5069 		for (uint32 i = oldSize; i-- > newSize;) {
5070 			if (context->fds[i])
5071 				return B_BUSY;
5072 		}
5073 	}
5074 
5075 	// store pointers to the old tables
5076 	file_descriptor** oldFDs = context->fds;
5077 	select_info** oldSelectInfos = context->select_infos;
5078 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5079 
5080 	// allocate new tables
5081 	file_descriptor** newFDs = (file_descriptor**)malloc(
5082 		sizeof(struct file_descriptor*) * newSize
5083 		+ sizeof(struct select_sync*) * newSize
5084 		+ newCloseOnExitBitmapSize);
5085 	if (newFDs == NULL)
5086 		return B_NO_MEMORY;
5087 
5088 	context->fds = newFDs;
5089 	context->select_infos = (select_info**)(context->fds + newSize);
5090 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5091 	context->table_size = newSize;
5092 
5093 	// copy entries from old tables
5094 	uint32 toCopy = min_c(oldSize, newSize);
5095 
5096 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5097 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5098 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5099 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5100 
5101 	// clear additional entries, if the tables grow
5102 	if (newSize > oldSize) {
5103 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5104 		memset(context->select_infos + oldSize, 0,
5105 			sizeof(void*) * (newSize - oldSize));
5106 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5107 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5108 	}
5109 
5110 	free(oldFDs);
5111 
5112 	return B_OK;
5113 }
5114 
5115 
5116 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5117 
5118 	Given an arbitrary vnode (identified by mount and node ID), the function
5119 	checks, whether the vnode is covered by another vnode. If it is, the
5120 	function returns the mount and node ID of the covering vnode. Otherwise
5121 	it simply returns the supplied mount and node ID.
5122 
5123 	In case of error (e.g. the supplied node could not be found) the variables
5124 	for storing the resolved mount and node ID remain untouched and an error
5125 	code is returned.
5126 
5127 	\param mountID The mount ID of the vnode in question.
5128 	\param nodeID The node ID of the vnode in question.
5129 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5130 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5131 	\return
5132 	- \c B_OK, if everything went fine,
5133 	- another error code, if something went wrong.
5134 */
5135 status_t
5136 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5137 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5138 {
5139 	// get the node
5140 	struct vnode* node;
5141 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5142 	if (error != B_OK)
5143 		return error;
5144 
5145 	// resolve the node
5146 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5147 		put_vnode(node);
5148 		node = coveringNode;
5149 	}
5150 
5151 	// set the return values
5152 	*resolvedMountID = node->device;
5153 	*resolvedNodeID = node->id;
5154 
5155 	put_vnode(node);
5156 
5157 	return B_OK;
5158 }
5159 
5160 
5161 status_t
5162 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5163 	ino_t* _mountPointNodeID)
5164 {
5165 	ReadLocker nodeLocker(sVnodeLock);
5166 	MutexLocker mountLocker(sMountMutex);
5167 
5168 	struct fs_mount* mount = find_mount(mountID);
5169 	if (mount == NULL)
5170 		return B_BAD_VALUE;
5171 
5172 	Vnode* mountPoint = mount->covers_vnode;
5173 
5174 	*_mountPointMountID = mountPoint->device;
5175 	*_mountPointNodeID = mountPoint->id;
5176 
5177 	return B_OK;
5178 }
5179 
5180 
5181 status_t
5182 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5183 	ino_t coveredNodeID)
5184 {
5185 	// get the vnodes
5186 	Vnode* vnode;
5187 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5188 	if (error != B_OK)
5189 		return B_BAD_VALUE;
5190 	VNodePutter vnodePutter(vnode);
5191 
5192 	Vnode* coveredVnode;
5193 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5194 		false);
5195 	if (error != B_OK)
5196 		return B_BAD_VALUE;
5197 	VNodePutter coveredVnodePutter(coveredVnode);
5198 
5199 	// establish the covered/covering links
5200 	WriteLocker locker(sVnodeLock);
5201 
5202 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5203 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5204 		return B_BUSY;
5205 	}
5206 
5207 	vnode->covers = coveredVnode;
5208 	vnode->SetCovering(true);
5209 
5210 	coveredVnode->covered_by = vnode;
5211 	coveredVnode->SetCovered(true);
5212 
5213 	// the vnodes do now reference each other
5214 	inc_vnode_ref_count(vnode);
5215 	inc_vnode_ref_count(coveredVnode);
5216 
5217 	return B_OK;
5218 }
5219 
5220 
5221 int
5222 vfs_getrlimit(int resource, struct rlimit* rlp)
5223 {
5224 	if (!rlp)
5225 		return B_BAD_ADDRESS;
5226 
5227 	switch (resource) {
5228 		case RLIMIT_NOFILE:
5229 		{
5230 			struct io_context* context = get_current_io_context(false);
5231 			MutexLocker _(context->io_mutex);
5232 
5233 			rlp->rlim_cur = context->table_size;
5234 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5235 			return 0;
5236 		}
5237 
5238 		case RLIMIT_NOVMON:
5239 		{
5240 			struct io_context* context = get_current_io_context(false);
5241 			MutexLocker _(context->io_mutex);
5242 
5243 			rlp->rlim_cur = context->max_monitors;
5244 			rlp->rlim_max = MAX_NODE_MONITORS;
5245 			return 0;
5246 		}
5247 
5248 		default:
5249 			return B_BAD_VALUE;
5250 	}
5251 }
5252 
5253 
5254 int
5255 vfs_setrlimit(int resource, const struct rlimit* rlp)
5256 {
5257 	if (!rlp)
5258 		return B_BAD_ADDRESS;
5259 
5260 	switch (resource) {
5261 		case RLIMIT_NOFILE:
5262 			/* TODO: check getuid() */
5263 			if (rlp->rlim_max != RLIM_SAVED_MAX
5264 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5265 				return B_NOT_ALLOWED;
5266 
5267 			return vfs_resize_fd_table(get_current_io_context(false),
5268 				rlp->rlim_cur);
5269 
5270 		case RLIMIT_NOVMON:
5271 			/* TODO: check getuid() */
5272 			if (rlp->rlim_max != RLIM_SAVED_MAX
5273 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5274 				return B_NOT_ALLOWED;
5275 
5276 			return resize_monitor_table(get_current_io_context(false),
5277 				rlp->rlim_cur);
5278 
5279 		default:
5280 			return B_BAD_VALUE;
5281 	}
5282 }
5283 
5284 
5285 status_t
5286 vfs_init(kernel_args* args)
5287 {
5288 	vnode::StaticInit();
5289 
5290 	sVnodeTable = new(std::nothrow) VnodeTable();
5291 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5292 		panic("vfs_init: error creating vnode hash table\n");
5293 
5294 	struct vnode dummy_vnode;
5295 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5296 
5297 	struct fs_mount dummyMount;
5298 	sMountsTable = new(std::nothrow) MountTable();
5299 	if (sMountsTable == NULL
5300 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5301 		panic("vfs_init: error creating mounts hash table\n");
5302 
5303 	node_monitor_init();
5304 
5305 	sRoot = NULL;
5306 
5307 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5308 
5309 	if (block_cache_init() != B_OK)
5310 		return B_ERROR;
5311 
5312 #ifdef ADD_DEBUGGER_COMMANDS
5313 	// add some debugger commands
5314 	add_debugger_command_etc("vnode", &dump_vnode,
5315 		"Print info about the specified vnode",
5316 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5317 		"Prints information about the vnode specified by address <vnode> or\n"
5318 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5319 		"constructed and printed. It might not be possible to construct a\n"
5320 		"complete path, though.\n",
5321 		0);
5322 	add_debugger_command("vnodes", &dump_vnodes,
5323 		"list all vnodes (from the specified device)");
5324 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5325 		"list all vnode caches");
5326 	add_debugger_command("mount", &dump_mount,
5327 		"info about the specified fs_mount");
5328 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5329 	add_debugger_command("io_context", &dump_io_context,
5330 		"info about the I/O context");
5331 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5332 		"info about vnode usage");
5333 #endif
5334 
5335 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5336 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5337 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5338 		0);
5339 
5340 	fifo_init();
5341 	file_map_init();
5342 
5343 	return file_cache_init();
5344 }
5345 
5346 
5347 //	#pragma mark - fd_ops implementations
5348 
5349 
5350 /*!
5351 	Calls fs_open() on the given vnode and returns a new
5352 	file descriptor for it
5353 */
5354 static int
5355 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5356 {
5357 	void* cookie;
5358 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5359 	if (status != B_OK)
5360 		return status;
5361 
5362 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5363 	if (fd < 0) {
5364 		FS_CALL(vnode, close, cookie);
5365 		FS_CALL(vnode, free_cookie, cookie);
5366 	}
5367 	return fd;
5368 }
5369 
5370 
5371 /*!
5372 	Calls fs_open() on the given vnode and returns a new
5373 	file descriptor for it
5374 */
5375 static int
5376 create_vnode(struct vnode* directory, const char* name, int openMode,
5377 	int perms, bool kernel)
5378 {
5379 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5380 	status_t status = B_ERROR;
5381 	struct vnode* vnode;
5382 	void* cookie;
5383 	ino_t newID;
5384 
5385 	// This is somewhat tricky: If the entry already exists, the FS responsible
5386 	// for the directory might not necessarily also be the one responsible for
5387 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5388 	// we can actually never call the create() hook without O_EXCL. Instead we
5389 	// try to look the entry up first. If it already exists, we just open the
5390 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5391 	// introduces a race condition, since someone else might have created the
5392 	// entry in the meantime. We hope the respective FS returns the correct
5393 	// error code and retry (up to 3 times) again.
5394 
5395 	for (int i = 0; i < 3 && status != B_OK; i++) {
5396 		// look the node up
5397 		status = lookup_dir_entry(directory, name, &vnode);
5398 		if (status == B_OK) {
5399 			VNodePutter putter(vnode);
5400 
5401 			if ((openMode & O_EXCL) != 0)
5402 				return B_FILE_EXISTS;
5403 
5404 			// If the node is a symlink, we have to follow it, unless
5405 			// O_NOTRAVERSE is set.
5406 			if (S_ISLNK(vnode->Type()) && traverse) {
5407 				putter.Put();
5408 				char clonedName[B_FILE_NAME_LENGTH + 1];
5409 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5410 						>= B_FILE_NAME_LENGTH) {
5411 					return B_NAME_TOO_LONG;
5412 				}
5413 
5414 				inc_vnode_ref_count(directory);
5415 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5416 					kernel, &vnode, NULL);
5417 				if (status != B_OK)
5418 					return status;
5419 
5420 				putter.SetTo(vnode);
5421 			}
5422 
5423 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5424 				return B_LINK_LIMIT;
5425 
5426 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5427 			// on success keep the vnode reference for the FD
5428 			if (fd >= 0)
5429 				putter.Detach();
5430 
5431 			return fd;
5432 		}
5433 
5434 		// it doesn't exist yet -- try to create it
5435 
5436 		if (!HAS_FS_CALL(directory, create))
5437 			return B_READ_ONLY_DEVICE;
5438 
5439 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5440 			&cookie, &newID);
5441 		if (status != B_OK
5442 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5443 			return status;
5444 		}
5445 	}
5446 
5447 	if (status != B_OK)
5448 		return status;
5449 
5450 	// the node has been created successfully
5451 
5452 	rw_lock_read_lock(&sVnodeLock);
5453 	vnode = lookup_vnode(directory->device, newID);
5454 	rw_lock_read_unlock(&sVnodeLock);
5455 
5456 	if (vnode == NULL) {
5457 		panic("vfs: fs_create() returned success but there is no vnode, "
5458 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5459 		return B_BAD_VALUE;
5460 	}
5461 
5462 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5463 	if (fd >= 0)
5464 		return fd;
5465 
5466 	status = fd;
5467 
5468 	// something went wrong, clean up
5469 
5470 	FS_CALL(vnode, close, cookie);
5471 	FS_CALL(vnode, free_cookie, cookie);
5472 	put_vnode(vnode);
5473 
5474 	FS_CALL(directory, unlink, name);
5475 
5476 	return status;
5477 }
5478 
5479 
5480 /*! Calls fs open_dir() on the given vnode and returns a new
5481 	file descriptor for it
5482 */
5483 static int
5484 open_dir_vnode(struct vnode* vnode, bool kernel)
5485 {
5486 	void* cookie;
5487 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5488 	if (status != B_OK)
5489 		return status;
5490 
5491 	// directory is opened, create a fd
5492 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5493 	if (status >= 0)
5494 		return status;
5495 
5496 	FS_CALL(vnode, close_dir, cookie);
5497 	FS_CALL(vnode, free_dir_cookie, cookie);
5498 
5499 	return status;
5500 }
5501 
5502 
5503 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5504 	file descriptor for it.
5505 	Used by attr_dir_open(), and attr_dir_open_fd().
5506 */
5507 static int
5508 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5509 {
5510 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5511 		return B_UNSUPPORTED;
5512 
5513 	void* cookie;
5514 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5515 	if (status != B_OK)
5516 		return status;
5517 
5518 	// directory is opened, create a fd
5519 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5520 		kernel);
5521 	if (status >= 0)
5522 		return status;
5523 
5524 	FS_CALL(vnode, close_attr_dir, cookie);
5525 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5526 
5527 	return status;
5528 }
5529 
5530 
5531 static int
5532 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5533 	int openMode, int perms, bool kernel)
5534 {
5535 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5536 		"kernel %d\n", name, openMode, perms, kernel));
5537 
5538 	// get directory to put the new file in
5539 	struct vnode* directory;
5540 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5541 	if (status != B_OK)
5542 		return status;
5543 
5544 	status = create_vnode(directory, name, openMode, perms, kernel);
5545 	put_vnode(directory);
5546 
5547 	return status;
5548 }
5549 
5550 
5551 static int
5552 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5553 {
5554 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5555 		openMode, perms, kernel));
5556 
5557 	// get directory to put the new file in
5558 	char name[B_FILE_NAME_LENGTH];
5559 	struct vnode* directory;
5560 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5561 		kernel);
5562 	if (status < 0)
5563 		return status;
5564 
5565 	status = create_vnode(directory, name, openMode, perms, kernel);
5566 
5567 	put_vnode(directory);
5568 	return status;
5569 }
5570 
5571 
5572 static int
5573 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5574 	int openMode, bool kernel)
5575 {
5576 	if (name == NULL || *name == '\0')
5577 		return B_BAD_VALUE;
5578 
5579 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5580 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5581 
5582 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5583 
5584 	// get the vnode matching the entry_ref
5585 	struct vnode* vnode;
5586 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5587 		kernel, &vnode);
5588 	if (status != B_OK)
5589 		return status;
5590 
5591 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5592 		put_vnode(vnode);
5593 		return B_LINK_LIMIT;
5594 	}
5595 
5596 	int newFD = open_vnode(vnode, openMode, kernel);
5597 	if (newFD >= 0) {
5598 		// The vnode reference has been transferred to the FD
5599 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5600 			directoryID, vnode->id, name);
5601 	} else
5602 		put_vnode(vnode);
5603 
5604 	return newFD;
5605 }
5606 
5607 
5608 static int
5609 file_open(int fd, char* path, int openMode, bool kernel)
5610 {
5611 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5612 
5613 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5614 		fd, path, openMode, kernel));
5615 
5616 	// get the vnode matching the vnode + path combination
5617 	struct vnode* vnode;
5618 	ino_t parentID;
5619 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5620 		&parentID, kernel);
5621 	if (status != B_OK)
5622 		return status;
5623 
5624 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5625 		put_vnode(vnode);
5626 		return B_LINK_LIMIT;
5627 	}
5628 
5629 	// open the vnode
5630 	int newFD = open_vnode(vnode, openMode, kernel);
5631 	if (newFD >= 0) {
5632 		// The vnode reference has been transferred to the FD
5633 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5634 			vnode->device, parentID, vnode->id, NULL);
5635 	} else
5636 		put_vnode(vnode);
5637 
5638 	return newFD;
5639 }
5640 
5641 
5642 static status_t
5643 file_close(struct file_descriptor* descriptor)
5644 {
5645 	struct vnode* vnode = descriptor->u.vnode;
5646 	status_t status = B_OK;
5647 
5648 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5649 
5650 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5651 		vnode->id);
5652 	if (HAS_FS_CALL(vnode, close)) {
5653 		status = FS_CALL(vnode, close, descriptor->cookie);
5654 	}
5655 
5656 	if (status == B_OK) {
5657 		// remove all outstanding locks for this team
5658 		if (HAS_FS_CALL(vnode, release_lock))
5659 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5660 		else
5661 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5662 	}
5663 	return status;
5664 }
5665 
5666 
5667 static void
5668 file_free_fd(struct file_descriptor* descriptor)
5669 {
5670 	struct vnode* vnode = descriptor->u.vnode;
5671 
5672 	if (vnode != NULL) {
5673 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5674 		put_vnode(vnode);
5675 	}
5676 }
5677 
5678 
5679 static status_t
5680 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5681 	size_t* length)
5682 {
5683 	struct vnode* vnode = descriptor->u.vnode;
5684 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5685 		pos, length, *length));
5686 
5687 	if (S_ISDIR(vnode->Type()))
5688 		return B_IS_A_DIRECTORY;
5689 
5690 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5691 }
5692 
5693 
5694 static status_t
5695 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5696 	size_t* length)
5697 {
5698 	struct vnode* vnode = descriptor->u.vnode;
5699 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5700 		length));
5701 
5702 	if (S_ISDIR(vnode->Type()))
5703 		return B_IS_A_DIRECTORY;
5704 	if (!HAS_FS_CALL(vnode, write))
5705 		return B_READ_ONLY_DEVICE;
5706 
5707 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5708 }
5709 
5710 
5711 static off_t
5712 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5713 {
5714 	struct vnode* vnode = descriptor->u.vnode;
5715 	off_t offset;
5716 	bool isDevice = false;
5717 
5718 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5719 		seekType));
5720 
5721 	// some kinds of files are not seekable
5722 	switch (vnode->Type() & S_IFMT) {
5723 		case S_IFIFO:
5724 		case S_IFSOCK:
5725 			return ESPIPE;
5726 
5727 		// drivers publish block devices as chr, so pick both
5728 		case S_IFBLK:
5729 		case S_IFCHR:
5730 			isDevice = true;
5731 			break;
5732 		// The Open Group Base Specs don't mention any file types besides pipes,
5733 		// fifos, and sockets specially, so we allow seeking them.
5734 		case S_IFREG:
5735 		case S_IFDIR:
5736 		case S_IFLNK:
5737 			break;
5738 	}
5739 
5740 	switch (seekType) {
5741 		case SEEK_SET:
5742 			offset = 0;
5743 			break;
5744 		case SEEK_CUR:
5745 			offset = descriptor->pos;
5746 			break;
5747 		case SEEK_END:
5748 		{
5749 			// stat() the node
5750 			if (!HAS_FS_CALL(vnode, read_stat))
5751 				return B_UNSUPPORTED;
5752 
5753 			struct stat stat;
5754 			status_t status = FS_CALL(vnode, read_stat, &stat);
5755 			if (status != B_OK)
5756 				return status;
5757 
5758 			offset = stat.st_size;
5759 
5760 			if (offset == 0 && isDevice) {
5761 				// stat() on regular drivers doesn't report size
5762 				device_geometry geometry;
5763 
5764 				if (HAS_FS_CALL(vnode, ioctl)) {
5765 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5766 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5767 					if (status == B_OK)
5768 						offset = (off_t)geometry.bytes_per_sector
5769 							* geometry.sectors_per_track
5770 							* geometry.cylinder_count
5771 							* geometry.head_count;
5772 				}
5773 			}
5774 
5775 			break;
5776 		}
5777 		default:
5778 			return B_BAD_VALUE;
5779 	}
5780 
5781 	// assumes off_t is 64 bits wide
5782 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5783 		return B_BUFFER_OVERFLOW;
5784 
5785 	pos += offset;
5786 	if (pos < 0)
5787 		return B_BAD_VALUE;
5788 
5789 	return descriptor->pos = pos;
5790 }
5791 
5792 
5793 static status_t
5794 file_select(struct file_descriptor* descriptor, uint8 event,
5795 	struct selectsync* sync)
5796 {
5797 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5798 
5799 	struct vnode* vnode = descriptor->u.vnode;
5800 
5801 	// If the FS has no select() hook, notify select() now.
5802 	if (!HAS_FS_CALL(vnode, select)) {
5803 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5804 			return notify_select_event(sync, event);
5805 		else
5806 			return B_OK;
5807 	}
5808 
5809 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5810 }
5811 
5812 
5813 static status_t
5814 file_deselect(struct file_descriptor* descriptor, uint8 event,
5815 	struct selectsync* sync)
5816 {
5817 	struct vnode* vnode = descriptor->u.vnode;
5818 
5819 	if (!HAS_FS_CALL(vnode, deselect))
5820 		return B_OK;
5821 
5822 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5823 }
5824 
5825 
5826 static status_t
5827 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5828 	bool kernel)
5829 {
5830 	struct vnode* vnode;
5831 	status_t status;
5832 
5833 	if (name == NULL || *name == '\0')
5834 		return B_BAD_VALUE;
5835 
5836 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5837 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5838 
5839 	status = get_vnode(mountID, parentID, &vnode, true, false);
5840 	if (status != B_OK)
5841 		return status;
5842 
5843 	if (HAS_FS_CALL(vnode, create_dir))
5844 		status = FS_CALL(vnode, create_dir, name, perms);
5845 	else
5846 		status = B_READ_ONLY_DEVICE;
5847 
5848 	put_vnode(vnode);
5849 	return status;
5850 }
5851 
5852 
5853 static status_t
5854 dir_create(int fd, char* path, int perms, bool kernel)
5855 {
5856 	char filename[B_FILE_NAME_LENGTH];
5857 	struct vnode* vnode;
5858 	status_t status;
5859 
5860 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5861 		kernel));
5862 
5863 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5864 	if (status < 0)
5865 		return status;
5866 
5867 	if (HAS_FS_CALL(vnode, create_dir)) {
5868 		status = FS_CALL(vnode, create_dir, filename, perms);
5869 	} else
5870 		status = B_READ_ONLY_DEVICE;
5871 
5872 	put_vnode(vnode);
5873 	return status;
5874 }
5875 
5876 
5877 static int
5878 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5879 {
5880 	FUNCTION(("dir_open_entry_ref()\n"));
5881 
5882 	if (name && name[0] == '\0')
5883 		return B_BAD_VALUE;
5884 
5885 	// get the vnode matching the entry_ref/node_ref
5886 	struct vnode* vnode;
5887 	status_t status;
5888 	if (name) {
5889 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5890 			&vnode);
5891 	} else
5892 		status = get_vnode(mountID, parentID, &vnode, true, false);
5893 	if (status != B_OK)
5894 		return status;
5895 
5896 	int newFD = open_dir_vnode(vnode, kernel);
5897 	if (newFD >= 0) {
5898 		// The vnode reference has been transferred to the FD
5899 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5900 			vnode->id, name);
5901 	} else
5902 		put_vnode(vnode);
5903 
5904 	return newFD;
5905 }
5906 
5907 
5908 static int
5909 dir_open(int fd, char* path, bool kernel)
5910 {
5911 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5912 		kernel));
5913 
5914 	// get the vnode matching the vnode + path combination
5915 	struct vnode* vnode = NULL;
5916 	ino_t parentID;
5917 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5918 		kernel);
5919 	if (status != B_OK)
5920 		return status;
5921 
5922 	// open the dir
5923 	int newFD = open_dir_vnode(vnode, kernel);
5924 	if (newFD >= 0) {
5925 		// The vnode reference has been transferred to the FD
5926 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5927 			parentID, vnode->id, NULL);
5928 	} else
5929 		put_vnode(vnode);
5930 
5931 	return newFD;
5932 }
5933 
5934 
5935 static status_t
5936 dir_close(struct file_descriptor* descriptor)
5937 {
5938 	struct vnode* vnode = descriptor->u.vnode;
5939 
5940 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5941 
5942 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5943 		vnode->id);
5944 	if (HAS_FS_CALL(vnode, close_dir))
5945 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5946 
5947 	return B_OK;
5948 }
5949 
5950 
5951 static void
5952 dir_free_fd(struct file_descriptor* descriptor)
5953 {
5954 	struct vnode* vnode = descriptor->u.vnode;
5955 
5956 	if (vnode != NULL) {
5957 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5958 		put_vnode(vnode);
5959 	}
5960 }
5961 
5962 
5963 static status_t
5964 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5965 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5966 {
5967 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5968 		bufferSize, _count);
5969 }
5970 
5971 
5972 static status_t
5973 fix_dirent(struct vnode* parent, struct dirent* entry,
5974 	struct io_context* ioContext)
5975 {
5976 	// set d_pdev and d_pino
5977 	entry->d_pdev = parent->device;
5978 	entry->d_pino = parent->id;
5979 
5980 	// If this is the ".." entry and the directory covering another vnode,
5981 	// we need to replace d_dev and d_ino with the actual values.
5982 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5983 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
5984 			ioContext);
5985 	}
5986 
5987 	// resolve covered vnodes
5988 	ReadLocker _(&sVnodeLock);
5989 
5990 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5991 	if (vnode != NULL && vnode->covered_by != NULL) {
5992 		do {
5993 			vnode = vnode->covered_by;
5994 		} while (vnode->covered_by != NULL);
5995 
5996 		entry->d_dev = vnode->device;
5997 		entry->d_ino = vnode->id;
5998 	}
5999 
6000 	return B_OK;
6001 }
6002 
6003 
6004 static status_t
6005 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6006 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6007 {
6008 	if (!HAS_FS_CALL(vnode, read_dir))
6009 		return B_UNSUPPORTED;
6010 
6011 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6012 		_count);
6013 	if (error != B_OK)
6014 		return error;
6015 
6016 	// we need to adjust the read dirents
6017 	uint32 count = *_count;
6018 	for (uint32 i = 0; i < count; i++) {
6019 		error = fix_dirent(vnode, buffer, ioContext);
6020 		if (error != B_OK)
6021 			return error;
6022 
6023 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6024 	}
6025 
6026 	return error;
6027 }
6028 
6029 
6030 static status_t
6031 dir_rewind(struct file_descriptor* descriptor)
6032 {
6033 	struct vnode* vnode = descriptor->u.vnode;
6034 
6035 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6036 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6037 	}
6038 
6039 	return B_UNSUPPORTED;
6040 }
6041 
6042 
6043 static status_t
6044 dir_remove(int fd, char* path, bool kernel)
6045 {
6046 	char name[B_FILE_NAME_LENGTH];
6047 	struct vnode* directory;
6048 	status_t status;
6049 
6050 	if (path != NULL) {
6051 		// we need to make sure our path name doesn't stop with "/", ".",
6052 		// or ".."
6053 		char* lastSlash;
6054 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6055 			char* leaf = lastSlash + 1;
6056 			if (!strcmp(leaf, ".."))
6057 				return B_NOT_ALLOWED;
6058 
6059 			// omit multiple slashes
6060 			while (lastSlash > path && lastSlash[-1] == '/')
6061 				lastSlash--;
6062 
6063 			if (leaf[0]
6064 				&& strcmp(leaf, ".")) {
6065 				break;
6066 			}
6067 			// "name/" -> "name", or "name/." -> "name"
6068 			lastSlash[0] = '\0';
6069 		}
6070 
6071 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6072 			return B_NOT_ALLOWED;
6073 	}
6074 
6075 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6076 	if (status != B_OK)
6077 		return status;
6078 
6079 	if (HAS_FS_CALL(directory, remove_dir))
6080 		status = FS_CALL(directory, remove_dir, name);
6081 	else
6082 		status = B_READ_ONLY_DEVICE;
6083 
6084 	put_vnode(directory);
6085 	return status;
6086 }
6087 
6088 
6089 static status_t
6090 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6091 	size_t length)
6092 {
6093 	struct vnode* vnode = descriptor->u.vnode;
6094 
6095 	if (HAS_FS_CALL(vnode, ioctl))
6096 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6097 
6098 	return B_DEV_INVALID_IOCTL;
6099 }
6100 
6101 
6102 static status_t
6103 common_fcntl(int fd, int op, size_t argument, bool kernel)
6104 {
6105 	struct flock flock;
6106 
6107 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6108 		fd, op, argument, kernel ? "kernel" : "user"));
6109 
6110 	struct io_context* context = get_current_io_context(kernel);
6111 
6112 	struct file_descriptor* descriptor = get_fd(context, fd);
6113 	if (descriptor == NULL)
6114 		return B_FILE_ERROR;
6115 
6116 	struct vnode* vnode = fd_vnode(descriptor);
6117 
6118 	status_t status = B_OK;
6119 
6120 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6121 		if (descriptor->type != FDTYPE_FILE)
6122 			status = B_BAD_VALUE;
6123 		else if (kernel)
6124 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6125 		else if (user_memcpy(&flock, (struct flock*)argument,
6126 				sizeof(struct flock)) != B_OK)
6127 			status = B_BAD_ADDRESS;
6128 		if (status != B_OK) {
6129 			put_fd(descriptor);
6130 			return status;
6131 		}
6132 	}
6133 
6134 	switch (op) {
6135 		case F_SETFD:
6136 		{
6137 			// Set file descriptor flags
6138 
6139 			// O_CLOEXEC is the only flag available at this time
6140 			mutex_lock(&context->io_mutex);
6141 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6142 			mutex_unlock(&context->io_mutex);
6143 
6144 			status = B_OK;
6145 			break;
6146 		}
6147 
6148 		case F_GETFD:
6149 		{
6150 			// Get file descriptor flags
6151 			mutex_lock(&context->io_mutex);
6152 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6153 			mutex_unlock(&context->io_mutex);
6154 			break;
6155 		}
6156 
6157 		case F_SETFL:
6158 			// Set file descriptor open mode
6159 
6160 			// we only accept changes to O_APPEND and O_NONBLOCK
6161 			argument &= O_APPEND | O_NONBLOCK;
6162 			if (descriptor->ops->fd_set_flags != NULL) {
6163 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6164 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6165 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6166 					(int)argument);
6167 			} else
6168 				status = B_UNSUPPORTED;
6169 
6170 			if (status == B_OK) {
6171 				// update this descriptor's open_mode field
6172 				descriptor->open_mode = (descriptor->open_mode
6173 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6174 			}
6175 
6176 			break;
6177 
6178 		case F_GETFL:
6179 			// Get file descriptor open mode
6180 			status = descriptor->open_mode;
6181 			break;
6182 
6183 		case F_DUPFD:
6184 		case F_DUPFD_CLOEXEC:
6185 		{
6186 			status = new_fd_etc(context, descriptor, (int)argument);
6187 			if (status >= 0) {
6188 				mutex_lock(&context->io_mutex);
6189 				fd_set_close_on_exec(context, fd, op == F_DUPFD_CLOEXEC);
6190 				mutex_unlock(&context->io_mutex);
6191 
6192 				atomic_add(&descriptor->ref_count, 1);
6193 			}
6194 			break;
6195 		}
6196 
6197 		case F_GETLK:
6198 			if (vnode != NULL) {
6199 				struct flock normalizedLock;
6200 
6201 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6202 				status = normalize_flock(descriptor, &normalizedLock);
6203 				if (status != B_OK)
6204 					break;
6205 
6206 				if (HAS_FS_CALL(vnode, test_lock)) {
6207 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6208 						&normalizedLock);
6209 				} else
6210 					status = test_advisory_lock(vnode, &normalizedLock);
6211 				if (status == B_OK) {
6212 					if (normalizedLock.l_type == F_UNLCK) {
6213 						// no conflicting lock found, copy back the same struct
6214 						// we were given except change type to F_UNLCK
6215 						flock.l_type = F_UNLCK;
6216 						if (kernel) {
6217 							memcpy((struct flock*)argument, &flock,
6218 								sizeof(struct flock));
6219 						} else {
6220 							status = user_memcpy((struct flock*)argument,
6221 								&flock, sizeof(struct flock));
6222 						}
6223 					} else {
6224 						// a conflicting lock was found, copy back its range and
6225 						// type
6226 						if (normalizedLock.l_len == OFF_MAX)
6227 							normalizedLock.l_len = 0;
6228 
6229 						if (kernel) {
6230 							memcpy((struct flock*)argument,
6231 								&normalizedLock, sizeof(struct flock));
6232 						} else {
6233 							status = user_memcpy((struct flock*)argument,
6234 								&normalizedLock, sizeof(struct flock));
6235 						}
6236 					}
6237 				}
6238 			} else
6239 				status = B_BAD_VALUE;
6240 			break;
6241 
6242 		case F_SETLK:
6243 		case F_SETLKW:
6244 			status = normalize_flock(descriptor, &flock);
6245 			if (status != B_OK)
6246 				break;
6247 
6248 			if (vnode == NULL) {
6249 				status = B_BAD_VALUE;
6250 			} else if (flock.l_type == F_UNLCK) {
6251 				if (HAS_FS_CALL(vnode, release_lock)) {
6252 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6253 						&flock);
6254 				} else {
6255 					status = release_advisory_lock(vnode, context, NULL,
6256 						&flock);
6257 				}
6258 			} else {
6259 				// the open mode must match the lock type
6260 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6261 						&& flock.l_type == F_WRLCK)
6262 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6263 						&& flock.l_type == F_RDLCK))
6264 					status = B_FILE_ERROR;
6265 				else {
6266 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6267 						status = FS_CALL(vnode, acquire_lock,
6268 							descriptor->cookie, &flock, op == F_SETLKW);
6269 					} else {
6270 						status = acquire_advisory_lock(vnode, context, NULL,
6271 							&flock, op == F_SETLKW);
6272 					}
6273 				}
6274 			}
6275 			break;
6276 
6277 		// ToDo: add support for more ops?
6278 
6279 		default:
6280 			status = B_BAD_VALUE;
6281 	}
6282 
6283 	put_fd(descriptor);
6284 	return status;
6285 }
6286 
6287 
6288 static status_t
6289 common_sync(int fd, bool kernel)
6290 {
6291 	struct file_descriptor* descriptor;
6292 	struct vnode* vnode;
6293 	status_t status;
6294 
6295 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6296 
6297 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6298 	if (descriptor == NULL)
6299 		return B_FILE_ERROR;
6300 
6301 	if (HAS_FS_CALL(vnode, fsync))
6302 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6303 	else
6304 		status = B_UNSUPPORTED;
6305 
6306 	put_fd(descriptor);
6307 	return status;
6308 }
6309 
6310 
6311 static status_t
6312 common_lock_node(int fd, bool kernel)
6313 {
6314 	struct file_descriptor* descriptor;
6315 	struct vnode* vnode;
6316 
6317 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6318 	if (descriptor == NULL)
6319 		return B_FILE_ERROR;
6320 
6321 	status_t status = B_OK;
6322 
6323 	// We need to set the locking atomically - someone
6324 	// else might set one at the same time
6325 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6326 			(file_descriptor*)NULL) != NULL)
6327 		status = B_BUSY;
6328 
6329 	put_fd(descriptor);
6330 	return status;
6331 }
6332 
6333 
6334 static status_t
6335 common_unlock_node(int fd, bool kernel)
6336 {
6337 	struct file_descriptor* descriptor;
6338 	struct vnode* vnode;
6339 
6340 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6341 	if (descriptor == NULL)
6342 		return B_FILE_ERROR;
6343 
6344 	status_t status = B_OK;
6345 
6346 	// We need to set the locking atomically - someone
6347 	// else might set one at the same time
6348 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6349 			(file_descriptor*)NULL, descriptor) != descriptor)
6350 		status = B_BAD_VALUE;
6351 
6352 	put_fd(descriptor);
6353 	return status;
6354 }
6355 
6356 
6357 static status_t
6358 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6359 	bool kernel)
6360 {
6361 	struct vnode* vnode;
6362 	status_t status;
6363 
6364 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6365 	if (status != B_OK)
6366 		return status;
6367 
6368 	if (HAS_FS_CALL(vnode, read_symlink)) {
6369 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6370 	} else
6371 		status = B_BAD_VALUE;
6372 
6373 	put_vnode(vnode);
6374 	return status;
6375 }
6376 
6377 
6378 static status_t
6379 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6380 	bool kernel)
6381 {
6382 	// path validity checks have to be in the calling function!
6383 	char name[B_FILE_NAME_LENGTH];
6384 	struct vnode* vnode;
6385 	status_t status;
6386 
6387 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6388 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6389 
6390 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6391 	if (status != B_OK)
6392 		return status;
6393 
6394 	if (HAS_FS_CALL(vnode, create_symlink))
6395 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6396 	else {
6397 		status = HAS_FS_CALL(vnode, write)
6398 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6399 	}
6400 
6401 	put_vnode(vnode);
6402 
6403 	return status;
6404 }
6405 
6406 
6407 static status_t
6408 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6409 	bool traverseLeafLink, bool kernel)
6410 {
6411 	// path validity checks have to be in the calling function!
6412 
6413 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6414 		toPath, kernel));
6415 
6416 	char name[B_FILE_NAME_LENGTH];
6417 	struct vnode* directory;
6418 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6419 		kernel);
6420 	if (status != B_OK)
6421 		return status;
6422 
6423 	struct vnode* vnode;
6424 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6425 		kernel);
6426 	if (status != B_OK)
6427 		goto err;
6428 
6429 	if (directory->mount != vnode->mount) {
6430 		status = B_CROSS_DEVICE_LINK;
6431 		goto err1;
6432 	}
6433 
6434 	if (HAS_FS_CALL(directory, link))
6435 		status = FS_CALL(directory, link, name, vnode);
6436 	else
6437 		status = B_READ_ONLY_DEVICE;
6438 
6439 err1:
6440 	put_vnode(vnode);
6441 err:
6442 	put_vnode(directory);
6443 
6444 	return status;
6445 }
6446 
6447 
6448 static status_t
6449 common_unlink(int fd, char* path, bool kernel)
6450 {
6451 	char filename[B_FILE_NAME_LENGTH];
6452 	struct vnode* vnode;
6453 	status_t status;
6454 
6455 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6456 		kernel));
6457 
6458 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6459 	if (status < 0)
6460 		return status;
6461 
6462 	if (HAS_FS_CALL(vnode, unlink))
6463 		status = FS_CALL(vnode, unlink, filename);
6464 	else
6465 		status = B_READ_ONLY_DEVICE;
6466 
6467 	put_vnode(vnode);
6468 
6469 	return status;
6470 }
6471 
6472 
6473 static status_t
6474 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6475 {
6476 	struct vnode* vnode;
6477 	status_t status;
6478 
6479 	// TODO: honor effectiveUserGroup argument
6480 
6481 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6482 	if (status != B_OK)
6483 		return status;
6484 
6485 	if (HAS_FS_CALL(vnode, access))
6486 		status = FS_CALL(vnode, access, mode);
6487 	else
6488 		status = B_OK;
6489 
6490 	put_vnode(vnode);
6491 
6492 	return status;
6493 }
6494 
6495 
6496 static status_t
6497 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6498 {
6499 	struct vnode* fromVnode;
6500 	struct vnode* toVnode;
6501 	char fromName[B_FILE_NAME_LENGTH];
6502 	char toName[B_FILE_NAME_LENGTH];
6503 	status_t status;
6504 
6505 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6506 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6507 
6508 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6509 	if (status != B_OK)
6510 		return status;
6511 
6512 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6513 	if (status != B_OK)
6514 		goto err1;
6515 
6516 	if (fromVnode->device != toVnode->device) {
6517 		status = B_CROSS_DEVICE_LINK;
6518 		goto err2;
6519 	}
6520 
6521 	if (fromName[0] == '\0' || toName[0] == '\0'
6522 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6523 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6524 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6525 		status = B_BAD_VALUE;
6526 		goto err2;
6527 	}
6528 
6529 	if (HAS_FS_CALL(fromVnode, rename))
6530 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6531 	else
6532 		status = B_READ_ONLY_DEVICE;
6533 
6534 err2:
6535 	put_vnode(toVnode);
6536 err1:
6537 	put_vnode(fromVnode);
6538 
6539 	return status;
6540 }
6541 
6542 
6543 static status_t
6544 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6545 {
6546 	struct vnode* vnode = descriptor->u.vnode;
6547 
6548 	FUNCTION(("common_read_stat: stat %p\n", stat));
6549 
6550 	// TODO: remove this once all file systems properly set them!
6551 	stat->st_crtim.tv_nsec = 0;
6552 	stat->st_ctim.tv_nsec = 0;
6553 	stat->st_mtim.tv_nsec = 0;
6554 	stat->st_atim.tv_nsec = 0;
6555 
6556 	return vfs_stat_vnode(vnode, stat);
6557 }
6558 
6559 
6560 static status_t
6561 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6562 	int statMask)
6563 {
6564 	struct vnode* vnode = descriptor->u.vnode;
6565 
6566 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6567 		vnode, stat, statMask));
6568 
6569 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
6570 		return B_BAD_VALUE;
6571 
6572 	if (!HAS_FS_CALL(vnode, write_stat))
6573 		return B_READ_ONLY_DEVICE;
6574 
6575 	return FS_CALL(vnode, write_stat, stat, statMask);
6576 }
6577 
6578 
6579 static status_t
6580 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6581 	struct stat* stat, bool kernel)
6582 {
6583 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6584 		stat));
6585 
6586 	struct vnode* vnode;
6587 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6588 		NULL, kernel);
6589 	if (status != B_OK)
6590 		return status;
6591 
6592 	status = vfs_stat_vnode(vnode, stat);
6593 
6594 	put_vnode(vnode);
6595 	return status;
6596 }
6597 
6598 
6599 static status_t
6600 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6601 	const struct stat* stat, int statMask, bool kernel)
6602 {
6603 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6604 		"kernel %d\n", fd, path, stat, statMask, kernel));
6605 
6606 	struct vnode* vnode;
6607 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6608 		NULL, kernel);
6609 	if (status != B_OK)
6610 		return status;
6611 
6612 	if (HAS_FS_CALL(vnode, write_stat))
6613 		status = FS_CALL(vnode, write_stat, stat, statMask);
6614 	else
6615 		status = B_READ_ONLY_DEVICE;
6616 
6617 	put_vnode(vnode);
6618 
6619 	return status;
6620 }
6621 
6622 
6623 static int
6624 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6625 {
6626 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6627 		kernel));
6628 
6629 	struct vnode* vnode;
6630 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6631 		NULL, kernel);
6632 	if (status != B_OK)
6633 		return status;
6634 
6635 	status = open_attr_dir_vnode(vnode, kernel);
6636 	if (status < 0)
6637 		put_vnode(vnode);
6638 
6639 	return status;
6640 }
6641 
6642 
6643 static status_t
6644 attr_dir_close(struct file_descriptor* descriptor)
6645 {
6646 	struct vnode* vnode = descriptor->u.vnode;
6647 
6648 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6649 
6650 	if (HAS_FS_CALL(vnode, close_attr_dir))
6651 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6652 
6653 	return B_OK;
6654 }
6655 
6656 
6657 static void
6658 attr_dir_free_fd(struct file_descriptor* descriptor)
6659 {
6660 	struct vnode* vnode = descriptor->u.vnode;
6661 
6662 	if (vnode != NULL) {
6663 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6664 		put_vnode(vnode);
6665 	}
6666 }
6667 
6668 
6669 static status_t
6670 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6671 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6672 {
6673 	struct vnode* vnode = descriptor->u.vnode;
6674 
6675 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6676 
6677 	if (HAS_FS_CALL(vnode, read_attr_dir))
6678 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6679 			bufferSize, _count);
6680 
6681 	return B_UNSUPPORTED;
6682 }
6683 
6684 
6685 static status_t
6686 attr_dir_rewind(struct file_descriptor* descriptor)
6687 {
6688 	struct vnode* vnode = descriptor->u.vnode;
6689 
6690 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6691 
6692 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6693 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6694 
6695 	return B_UNSUPPORTED;
6696 }
6697 
6698 
6699 static int
6700 attr_create(int fd, char* path, const char* name, uint32 type,
6701 	int openMode, bool kernel)
6702 {
6703 	if (name == NULL || *name == '\0')
6704 		return B_BAD_VALUE;
6705 
6706 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6707 	struct vnode* vnode;
6708 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6709 		kernel);
6710 	if (status != B_OK)
6711 		return status;
6712 
6713 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6714 		status = B_LINK_LIMIT;
6715 		goto err;
6716 	}
6717 
6718 	if (!HAS_FS_CALL(vnode, create_attr)) {
6719 		status = B_READ_ONLY_DEVICE;
6720 		goto err;
6721 	}
6722 
6723 	void* cookie;
6724 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6725 	if (status != B_OK)
6726 		goto err;
6727 
6728 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6729 	if (fd >= 0)
6730 		return fd;
6731 
6732 	status = fd;
6733 
6734 	FS_CALL(vnode, close_attr, cookie);
6735 	FS_CALL(vnode, free_attr_cookie, cookie);
6736 
6737 	FS_CALL(vnode, remove_attr, name);
6738 
6739 err:
6740 	put_vnode(vnode);
6741 
6742 	return status;
6743 }
6744 
6745 
6746 static int
6747 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6748 {
6749 	if (name == NULL || *name == '\0')
6750 		return B_BAD_VALUE;
6751 
6752 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6753 	struct vnode* vnode;
6754 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6755 		kernel);
6756 	if (status != B_OK)
6757 		return status;
6758 
6759 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6760 		status = B_LINK_LIMIT;
6761 		goto err;
6762 	}
6763 
6764 	if (!HAS_FS_CALL(vnode, open_attr)) {
6765 		status = B_UNSUPPORTED;
6766 		goto err;
6767 	}
6768 
6769 	void* cookie;
6770 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6771 	if (status != B_OK)
6772 		goto err;
6773 
6774 	// now we only need a file descriptor for this attribute and we're done
6775 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6776 	if (fd >= 0)
6777 		return fd;
6778 
6779 	status = fd;
6780 
6781 	FS_CALL(vnode, close_attr, cookie);
6782 	FS_CALL(vnode, free_attr_cookie, cookie);
6783 
6784 err:
6785 	put_vnode(vnode);
6786 
6787 	return status;
6788 }
6789 
6790 
6791 static status_t
6792 attr_close(struct file_descriptor* descriptor)
6793 {
6794 	struct vnode* vnode = descriptor->u.vnode;
6795 
6796 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6797 
6798 	if (HAS_FS_CALL(vnode, close_attr))
6799 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6800 
6801 	return B_OK;
6802 }
6803 
6804 
6805 static void
6806 attr_free_fd(struct file_descriptor* descriptor)
6807 {
6808 	struct vnode* vnode = descriptor->u.vnode;
6809 
6810 	if (vnode != NULL) {
6811 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6812 		put_vnode(vnode);
6813 	}
6814 }
6815 
6816 
6817 static status_t
6818 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6819 	size_t* length)
6820 {
6821 	struct vnode* vnode = descriptor->u.vnode;
6822 
6823 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6824 		pos, length, *length));
6825 
6826 	if (!HAS_FS_CALL(vnode, read_attr))
6827 		return B_UNSUPPORTED;
6828 
6829 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6830 }
6831 
6832 
6833 static status_t
6834 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6835 	size_t* length)
6836 {
6837 	struct vnode* vnode = descriptor->u.vnode;
6838 
6839 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6840 		length));
6841 
6842 	if (!HAS_FS_CALL(vnode, write_attr))
6843 		return B_UNSUPPORTED;
6844 
6845 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6846 }
6847 
6848 
6849 static off_t
6850 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6851 {
6852 	off_t offset;
6853 
6854 	switch (seekType) {
6855 		case SEEK_SET:
6856 			offset = 0;
6857 			break;
6858 		case SEEK_CUR:
6859 			offset = descriptor->pos;
6860 			break;
6861 		case SEEK_END:
6862 		{
6863 			struct vnode* vnode = descriptor->u.vnode;
6864 			if (!HAS_FS_CALL(vnode, read_stat))
6865 				return B_UNSUPPORTED;
6866 
6867 			struct stat stat;
6868 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6869 				&stat);
6870 			if (status != B_OK)
6871 				return status;
6872 
6873 			offset = stat.st_size;
6874 			break;
6875 		}
6876 		default:
6877 			return B_BAD_VALUE;
6878 	}
6879 
6880 	// assumes off_t is 64 bits wide
6881 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6882 		return B_BUFFER_OVERFLOW;
6883 
6884 	pos += offset;
6885 	if (pos < 0)
6886 		return B_BAD_VALUE;
6887 
6888 	return descriptor->pos = pos;
6889 }
6890 
6891 
6892 static status_t
6893 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6894 {
6895 	struct vnode* vnode = descriptor->u.vnode;
6896 
6897 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6898 
6899 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6900 		return B_UNSUPPORTED;
6901 
6902 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6903 }
6904 
6905 
6906 static status_t
6907 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6908 	int statMask)
6909 {
6910 	struct vnode* vnode = descriptor->u.vnode;
6911 
6912 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6913 
6914 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6915 		return B_READ_ONLY_DEVICE;
6916 
6917 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6918 }
6919 
6920 
6921 static status_t
6922 attr_remove(int fd, const char* name, bool kernel)
6923 {
6924 	struct file_descriptor* descriptor;
6925 	struct vnode* vnode;
6926 	status_t status;
6927 
6928 	if (name == NULL || *name == '\0')
6929 		return B_BAD_VALUE;
6930 
6931 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6932 		kernel));
6933 
6934 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6935 	if (descriptor == NULL)
6936 		return B_FILE_ERROR;
6937 
6938 	if (HAS_FS_CALL(vnode, remove_attr))
6939 		status = FS_CALL(vnode, remove_attr, name);
6940 	else
6941 		status = B_READ_ONLY_DEVICE;
6942 
6943 	put_fd(descriptor);
6944 
6945 	return status;
6946 }
6947 
6948 
6949 static status_t
6950 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6951 	bool kernel)
6952 {
6953 	struct file_descriptor* fromDescriptor;
6954 	struct file_descriptor* toDescriptor;
6955 	struct vnode* fromVnode;
6956 	struct vnode* toVnode;
6957 	status_t status;
6958 
6959 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6960 		|| *toName == '\0')
6961 		return B_BAD_VALUE;
6962 
6963 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6964 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6965 
6966 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6967 	if (fromDescriptor == NULL)
6968 		return B_FILE_ERROR;
6969 
6970 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6971 	if (toDescriptor == NULL) {
6972 		status = B_FILE_ERROR;
6973 		goto err;
6974 	}
6975 
6976 	// are the files on the same volume?
6977 	if (fromVnode->device != toVnode->device) {
6978 		status = B_CROSS_DEVICE_LINK;
6979 		goto err1;
6980 	}
6981 
6982 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6983 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6984 	} else
6985 		status = B_READ_ONLY_DEVICE;
6986 
6987 err1:
6988 	put_fd(toDescriptor);
6989 err:
6990 	put_fd(fromDescriptor);
6991 
6992 	return status;
6993 }
6994 
6995 
6996 static int
6997 index_dir_open(dev_t mountID, bool kernel)
6998 {
6999 	struct fs_mount* mount;
7000 	void* cookie;
7001 
7002 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7003 		kernel));
7004 
7005 	status_t status = get_mount(mountID, &mount);
7006 	if (status != B_OK)
7007 		return status;
7008 
7009 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7010 		status = B_UNSUPPORTED;
7011 		goto error;
7012 	}
7013 
7014 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7015 	if (status != B_OK)
7016 		goto error;
7017 
7018 	// get fd for the index directory
7019 	int fd;
7020 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7021 	if (fd >= 0)
7022 		return fd;
7023 
7024 	// something went wrong
7025 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7026 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7027 
7028 	status = fd;
7029 
7030 error:
7031 	put_mount(mount);
7032 	return status;
7033 }
7034 
7035 
7036 static status_t
7037 index_dir_close(struct file_descriptor* descriptor)
7038 {
7039 	struct fs_mount* mount = descriptor->u.mount;
7040 
7041 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7042 
7043 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7044 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7045 
7046 	return B_OK;
7047 }
7048 
7049 
7050 static void
7051 index_dir_free_fd(struct file_descriptor* descriptor)
7052 {
7053 	struct fs_mount* mount = descriptor->u.mount;
7054 
7055 	if (mount != NULL) {
7056 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7057 		put_mount(mount);
7058 	}
7059 }
7060 
7061 
7062 static status_t
7063 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7064 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7065 {
7066 	struct fs_mount* mount = descriptor->u.mount;
7067 
7068 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7069 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7070 			bufferSize, _count);
7071 	}
7072 
7073 	return B_UNSUPPORTED;
7074 }
7075 
7076 
7077 static status_t
7078 index_dir_rewind(struct file_descriptor* descriptor)
7079 {
7080 	struct fs_mount* mount = descriptor->u.mount;
7081 
7082 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7083 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7084 
7085 	return B_UNSUPPORTED;
7086 }
7087 
7088 
7089 static status_t
7090 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7091 	bool kernel)
7092 {
7093 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7094 		mountID, name, kernel));
7095 
7096 	struct fs_mount* mount;
7097 	status_t status = get_mount(mountID, &mount);
7098 	if (status != B_OK)
7099 		return status;
7100 
7101 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7102 		status = B_READ_ONLY_DEVICE;
7103 		goto out;
7104 	}
7105 
7106 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7107 
7108 out:
7109 	put_mount(mount);
7110 	return status;
7111 }
7112 
7113 
7114 #if 0
7115 static status_t
7116 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7117 {
7118 	struct vnode* vnode = descriptor->u.vnode;
7119 
7120 	// ToDo: currently unused!
7121 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7122 	if (!HAS_FS_CALL(vnode, read_index_stat))
7123 		return B_UNSUPPORTED;
7124 
7125 	return B_UNSUPPORTED;
7126 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7127 }
7128 
7129 
7130 static void
7131 index_free_fd(struct file_descriptor* descriptor)
7132 {
7133 	struct vnode* vnode = descriptor->u.vnode;
7134 
7135 	if (vnode != NULL) {
7136 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7137 		put_vnode(vnode);
7138 	}
7139 }
7140 #endif
7141 
7142 
7143 static status_t
7144 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7145 	bool kernel)
7146 {
7147 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7148 		mountID, name, kernel));
7149 
7150 	struct fs_mount* mount;
7151 	status_t status = get_mount(mountID, &mount);
7152 	if (status != B_OK)
7153 		return status;
7154 
7155 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7156 		status = B_UNSUPPORTED;
7157 		goto out;
7158 	}
7159 
7160 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7161 
7162 out:
7163 	put_mount(mount);
7164 	return status;
7165 }
7166 
7167 
7168 static status_t
7169 index_remove(dev_t mountID, const char* name, bool kernel)
7170 {
7171 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7172 		mountID, name, kernel));
7173 
7174 	struct fs_mount* mount;
7175 	status_t status = get_mount(mountID, &mount);
7176 	if (status != B_OK)
7177 		return status;
7178 
7179 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7180 		status = B_READ_ONLY_DEVICE;
7181 		goto out;
7182 	}
7183 
7184 	status = FS_MOUNT_CALL(mount, remove_index, name);
7185 
7186 out:
7187 	put_mount(mount);
7188 	return status;
7189 }
7190 
7191 
7192 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7193 		It would be nice if the FS would find some more kernel support
7194 		for them.
7195 		For example, query parsing should be moved into the kernel.
7196 */
7197 static int
7198 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7199 	int32 token, bool kernel)
7200 {
7201 	struct fs_mount* mount;
7202 	void* cookie;
7203 
7204 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7205 		device, query, kernel));
7206 
7207 	status_t status = get_mount(device, &mount);
7208 	if (status != B_OK)
7209 		return status;
7210 
7211 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7212 		status = B_UNSUPPORTED;
7213 		goto error;
7214 	}
7215 
7216 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7217 		&cookie);
7218 	if (status != B_OK)
7219 		goto error;
7220 
7221 	// get fd for the index directory
7222 	int fd;
7223 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7224 	if (fd >= 0)
7225 		return fd;
7226 
7227 	status = fd;
7228 
7229 	// something went wrong
7230 	FS_MOUNT_CALL(mount, close_query, cookie);
7231 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7232 
7233 error:
7234 	put_mount(mount);
7235 	return status;
7236 }
7237 
7238 
7239 static status_t
7240 query_close(struct file_descriptor* descriptor)
7241 {
7242 	struct fs_mount* mount = descriptor->u.mount;
7243 
7244 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7245 
7246 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7247 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7248 
7249 	return B_OK;
7250 }
7251 
7252 
7253 static void
7254 query_free_fd(struct file_descriptor* descriptor)
7255 {
7256 	struct fs_mount* mount = descriptor->u.mount;
7257 
7258 	if (mount != NULL) {
7259 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7260 		put_mount(mount);
7261 	}
7262 }
7263 
7264 
7265 static status_t
7266 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7267 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7268 {
7269 	struct fs_mount* mount = descriptor->u.mount;
7270 
7271 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7272 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7273 			bufferSize, _count);
7274 	}
7275 
7276 	return B_UNSUPPORTED;
7277 }
7278 
7279 
7280 static status_t
7281 query_rewind(struct file_descriptor* descriptor)
7282 {
7283 	struct fs_mount* mount = descriptor->u.mount;
7284 
7285 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7286 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7287 
7288 	return B_UNSUPPORTED;
7289 }
7290 
7291 
7292 //	#pragma mark - General File System functions
7293 
7294 
7295 static dev_t
7296 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7297 	const char* args, bool kernel)
7298 {
7299 	struct ::fs_mount* mount;
7300 	status_t status = B_OK;
7301 	fs_volume* volume = NULL;
7302 	int32 layer = 0;
7303 	Vnode* coveredNode = NULL;
7304 
7305 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7306 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7307 
7308 	// The path is always safe, we just have to make sure that fsName is
7309 	// almost valid - we can't make any assumptions about args, though.
7310 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7311 	// We'll get it from the DDM later.
7312 	if (fsName == NULL) {
7313 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7314 			return B_BAD_VALUE;
7315 	} else if (fsName[0] == '\0')
7316 		return B_BAD_VALUE;
7317 
7318 	RecursiveLocker mountOpLocker(sMountOpLock);
7319 
7320 	// Helper to delete a newly created file device on failure.
7321 	// Not exactly beautiful, but helps to keep the code below cleaner.
7322 	struct FileDeviceDeleter {
7323 		FileDeviceDeleter() : id(-1) {}
7324 		~FileDeviceDeleter()
7325 		{
7326 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7327 		}
7328 
7329 		partition_id id;
7330 	} fileDeviceDeleter;
7331 
7332 	// If the file system is not a "virtual" one, the device argument should
7333 	// point to a real file/device (if given at all).
7334 	// get the partition
7335 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7336 	KPartition* partition = NULL;
7337 	KPath normalizedDevice;
7338 	bool newlyCreatedFileDevice = false;
7339 
7340 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7341 		// normalize the device path
7342 		status = normalizedDevice.SetTo(device, true);
7343 		if (status != B_OK)
7344 			return status;
7345 
7346 		// get a corresponding partition from the DDM
7347 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7348 		if (partition == NULL) {
7349 			// Partition not found: This either means, the user supplied
7350 			// an invalid path, or the path refers to an image file. We try
7351 			// to let the DDM create a file device for the path.
7352 			partition_id deviceID = ddm->CreateFileDevice(
7353 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7354 			if (deviceID >= 0) {
7355 				partition = ddm->RegisterPartition(deviceID);
7356 				if (newlyCreatedFileDevice)
7357 					fileDeviceDeleter.id = deviceID;
7358 			}
7359 		}
7360 
7361 		if (!partition) {
7362 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7363 				normalizedDevice.Path()));
7364 			return B_ENTRY_NOT_FOUND;
7365 		}
7366 
7367 		device = normalizedDevice.Path();
7368 			// correct path to file device
7369 	}
7370 	PartitionRegistrar partitionRegistrar(partition, true);
7371 
7372 	// Write lock the partition's device. For the time being, we keep the lock
7373 	// until we're done mounting -- not nice, but ensure, that no-one is
7374 	// interfering.
7375 	// TODO: Just mark the partition busy while mounting!
7376 	KDiskDevice* diskDevice = NULL;
7377 	if (partition) {
7378 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7379 		if (!diskDevice) {
7380 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7381 			return B_ERROR;
7382 		}
7383 	}
7384 
7385 	DeviceWriteLocker writeLocker(diskDevice, true);
7386 		// this takes over the write lock acquired before
7387 
7388 	if (partition != NULL) {
7389 		// make sure, that the partition is not busy
7390 		if (partition->IsBusy()) {
7391 			TRACE(("fs_mount(): Partition is busy.\n"));
7392 			return B_BUSY;
7393 		}
7394 
7395 		// if no FS name had been supplied, we get it from the partition
7396 		if (fsName == NULL) {
7397 			KDiskSystem* diskSystem = partition->DiskSystem();
7398 			if (!diskSystem) {
7399 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7400 					"recognize it.\n"));
7401 				return B_BAD_VALUE;
7402 			}
7403 
7404 			if (!diskSystem->IsFileSystem()) {
7405 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7406 					"partitioning system.\n"));
7407 				return B_BAD_VALUE;
7408 			}
7409 
7410 			// The disk system name will not change, and the KDiskSystem
7411 			// object will not go away while the disk device is locked (and
7412 			// the partition has a reference to it), so this is safe.
7413 			fsName = diskSystem->Name();
7414 		}
7415 	}
7416 
7417 	mount = new(std::nothrow) (struct ::fs_mount);
7418 	if (mount == NULL)
7419 		return B_NO_MEMORY;
7420 
7421 	mount->device_name = strdup(device);
7422 		// "device" can be NULL
7423 
7424 	status = mount->entry_cache.Init();
7425 	if (status != B_OK)
7426 		goto err1;
7427 
7428 	// initialize structure
7429 	mount->id = sNextMountID++;
7430 	mount->partition = NULL;
7431 	mount->root_vnode = NULL;
7432 	mount->covers_vnode = NULL;
7433 	mount->unmounting = false;
7434 	mount->owns_file_device = false;
7435 	mount->volume = NULL;
7436 
7437 	// build up the volume(s)
7438 	while (true) {
7439 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7440 		if (layerFSName == NULL) {
7441 			if (layer == 0) {
7442 				status = B_NO_MEMORY;
7443 				goto err1;
7444 			}
7445 
7446 			break;
7447 		}
7448 		MemoryDeleter layerFSNameDeleter(layerFSName);
7449 
7450 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7451 		if (volume == NULL) {
7452 			status = B_NO_MEMORY;
7453 			goto err1;
7454 		}
7455 
7456 		volume->id = mount->id;
7457 		volume->partition = partition != NULL ? partition->ID() : -1;
7458 		volume->layer = layer++;
7459 		volume->private_volume = NULL;
7460 		volume->ops = NULL;
7461 		volume->sub_volume = NULL;
7462 		volume->super_volume = NULL;
7463 		volume->file_system = NULL;
7464 		volume->file_system_name = NULL;
7465 
7466 		volume->file_system_name = get_file_system_name(layerFSName);
7467 		if (volume->file_system_name == NULL) {
7468 			status = B_NO_MEMORY;
7469 			free(volume);
7470 			goto err1;
7471 		}
7472 
7473 		volume->file_system = get_file_system(layerFSName);
7474 		if (volume->file_system == NULL) {
7475 			status = B_DEVICE_NOT_FOUND;
7476 			free(volume->file_system_name);
7477 			free(volume);
7478 			goto err1;
7479 		}
7480 
7481 		if (mount->volume == NULL)
7482 			mount->volume = volume;
7483 		else {
7484 			volume->super_volume = mount->volume;
7485 			mount->volume->sub_volume = volume;
7486 			mount->volume = volume;
7487 		}
7488 	}
7489 
7490 	// insert mount struct into list before we call FS's mount() function
7491 	// so that vnodes can be created for this mount
7492 	mutex_lock(&sMountMutex);
7493 	sMountsTable->Insert(mount);
7494 	mutex_unlock(&sMountMutex);
7495 
7496 	ino_t rootID;
7497 
7498 	if (!sRoot) {
7499 		// we haven't mounted anything yet
7500 		if (strcmp(path, "/") != 0) {
7501 			status = B_ERROR;
7502 			goto err2;
7503 		}
7504 
7505 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7506 			args, &rootID);
7507 		if (status != B_OK || mount->volume->ops == NULL)
7508 			goto err2;
7509 	} else {
7510 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7511 		if (status != B_OK)
7512 			goto err2;
7513 
7514 		mount->covers_vnode = coveredNode;
7515 
7516 		// make sure covered_vnode is a directory
7517 		if (!S_ISDIR(coveredNode->Type())) {
7518 			status = B_NOT_A_DIRECTORY;
7519 			goto err3;
7520 		}
7521 
7522 		if (coveredNode->IsCovered()) {
7523 			// this is already a covered vnode
7524 			status = B_BUSY;
7525 			goto err3;
7526 		}
7527 
7528 		// mount it/them
7529 		fs_volume* volume = mount->volume;
7530 		while (volume) {
7531 			status = volume->file_system->mount(volume, device, flags, args,
7532 				&rootID);
7533 			if (status != B_OK || volume->ops == NULL) {
7534 				if (status == B_OK && volume->ops == NULL)
7535 					panic("fs_mount: mount() succeeded but ops is NULL!");
7536 				if (volume->sub_volume)
7537 					goto err4;
7538 				goto err3;
7539 			}
7540 
7541 			volume = volume->super_volume;
7542 		}
7543 
7544 		volume = mount->volume;
7545 		while (volume) {
7546 			if (volume->ops->all_layers_mounted != NULL)
7547 				volume->ops->all_layers_mounted(volume);
7548 			volume = volume->super_volume;
7549 		}
7550 	}
7551 
7552 	// the root node is supposed to be owned by the file system - it must
7553 	// exist at this point
7554 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7555 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7556 		panic("fs_mount: file system does not own its root node!\n");
7557 		status = B_ERROR;
7558 		goto err4;
7559 	}
7560 
7561 	// set up the links between the root vnode and the vnode it covers
7562 	rw_lock_write_lock(&sVnodeLock);
7563 	if (coveredNode != NULL) {
7564 		if (coveredNode->IsCovered()) {
7565 			// the vnode is covered now
7566 			status = B_BUSY;
7567 			rw_lock_write_unlock(&sVnodeLock);
7568 			goto err4;
7569 		}
7570 
7571 		mount->root_vnode->covers = coveredNode;
7572 		mount->root_vnode->SetCovering(true);
7573 
7574 		coveredNode->covered_by = mount->root_vnode;
7575 		coveredNode->SetCovered(true);
7576 	}
7577 	rw_lock_write_unlock(&sVnodeLock);
7578 
7579 	if (!sRoot) {
7580 		sRoot = mount->root_vnode;
7581 		mutex_lock(&sIOContextRootLock);
7582 		get_current_io_context(true)->root = sRoot;
7583 		mutex_unlock(&sIOContextRootLock);
7584 		inc_vnode_ref_count(sRoot);
7585 	}
7586 
7587 	// supply the partition (if any) with the mount cookie and mark it mounted
7588 	if (partition) {
7589 		partition->SetMountCookie(mount->volume->private_volume);
7590 		partition->SetVolumeID(mount->id);
7591 
7592 		// keep a partition reference as long as the partition is mounted
7593 		partitionRegistrar.Detach();
7594 		mount->partition = partition;
7595 		mount->owns_file_device = newlyCreatedFileDevice;
7596 		fileDeviceDeleter.id = -1;
7597 	}
7598 
7599 	notify_mount(mount->id,
7600 		coveredNode != NULL ? coveredNode->device : -1,
7601 		coveredNode ? coveredNode->id : -1);
7602 
7603 	return mount->id;
7604 
7605 err4:
7606 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7607 err3:
7608 	if (coveredNode != NULL)
7609 		put_vnode(coveredNode);
7610 err2:
7611 	mutex_lock(&sMountMutex);
7612 	sMountsTable->Remove(mount);
7613 	mutex_unlock(&sMountMutex);
7614 err1:
7615 	delete mount;
7616 
7617 	return status;
7618 }
7619 
7620 
7621 static status_t
7622 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7623 {
7624 	struct fs_mount* mount;
7625 	status_t err;
7626 
7627 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7628 		mountID, kernel));
7629 
7630 	struct vnode* pathVnode = NULL;
7631 	if (path != NULL) {
7632 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7633 		if (err != B_OK)
7634 			return B_ENTRY_NOT_FOUND;
7635 	}
7636 
7637 	RecursiveLocker mountOpLocker(sMountOpLock);
7638 
7639 	// this lock is not strictly necessary, but here in case of KDEBUG
7640 	// to keep the ASSERT in find_mount() working.
7641 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7642 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7643 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7644 	if (mount == NULL) {
7645 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7646 			pathVnode);
7647 	}
7648 
7649 	if (path != NULL) {
7650 		put_vnode(pathVnode);
7651 
7652 		if (mount->root_vnode != pathVnode) {
7653 			// not mountpoint
7654 			return B_BAD_VALUE;
7655 		}
7656 	}
7657 
7658 	// if the volume is associated with a partition, lock the device of the
7659 	// partition as long as we are unmounting
7660 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7661 	KPartition* partition = mount->partition;
7662 	KDiskDevice* diskDevice = NULL;
7663 	if (partition != NULL) {
7664 		if (partition->Device() == NULL) {
7665 			dprintf("fs_unmount(): There is no device!\n");
7666 			return B_ERROR;
7667 		}
7668 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7669 		if (!diskDevice) {
7670 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7671 			return B_ERROR;
7672 		}
7673 	}
7674 	DeviceWriteLocker writeLocker(diskDevice, true);
7675 
7676 	// make sure, that the partition is not busy
7677 	if (partition != NULL) {
7678 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7679 			TRACE(("fs_unmount(): Partition is busy.\n"));
7680 			return B_BUSY;
7681 		}
7682 	}
7683 
7684 	// grab the vnode master mutex to keep someone from creating
7685 	// a vnode while we're figuring out if we can continue
7686 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7687 
7688 	bool disconnectedDescriptors = false;
7689 
7690 	while (true) {
7691 		bool busy = false;
7692 
7693 		// cycle through the list of vnodes associated with this mount and
7694 		// make sure all of them are not busy or have refs on them
7695 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7696 		while (struct vnode* vnode = iterator.Next()) {
7697 			if (vnode->IsBusy()) {
7698 				busy = true;
7699 				break;
7700 			}
7701 
7702 			// check the vnode's ref count -- subtract additional references for
7703 			// covering
7704 			int32 refCount = vnode->ref_count;
7705 			if (vnode->covers != NULL)
7706 				refCount--;
7707 			if (vnode->covered_by != NULL)
7708 				refCount--;
7709 
7710 			if (refCount != 0) {
7711 				// there are still vnodes in use on this mount, so we cannot
7712 				// unmount yet
7713 				busy = true;
7714 				break;
7715 			}
7716 		}
7717 
7718 		if (!busy)
7719 			break;
7720 
7721 		if ((flags & B_FORCE_UNMOUNT) == 0)
7722 			return B_BUSY;
7723 
7724 		if (disconnectedDescriptors) {
7725 			// wait a bit until the last access is finished, and then try again
7726 			vnodesWriteLocker.Unlock();
7727 			snooze(100000);
7728 			// TODO: if there is some kind of bug that prevents the ref counts
7729 			// from getting back to zero, this will fall into an endless loop...
7730 			vnodesWriteLocker.Lock();
7731 			continue;
7732 		}
7733 
7734 		// the file system is still busy - but we're forced to unmount it,
7735 		// so let's disconnect all open file descriptors
7736 
7737 		mount->unmounting = true;
7738 			// prevent new vnodes from being created
7739 
7740 		vnodesWriteLocker.Unlock();
7741 
7742 		disconnect_mount_or_vnode_fds(mount, NULL);
7743 		disconnectedDescriptors = true;
7744 
7745 		vnodesWriteLocker.Lock();
7746 	}
7747 
7748 	// We can safely continue. Mark all of the vnodes busy and this mount
7749 	// structure in unmounting state. Also undo the vnode covers/covered_by
7750 	// links.
7751 	mount->unmounting = true;
7752 
7753 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7754 	while (struct vnode* vnode = iterator.Next()) {
7755 		// Remove all covers/covered_by links from other mounts' nodes to this
7756 		// vnode and adjust the node ref count accordingly. We will release the
7757 		// references to the external vnodes below.
7758 		if (Vnode* coveredNode = vnode->covers) {
7759 			if (Vnode* coveringNode = vnode->covered_by) {
7760 				// We have both covered and covering vnodes, so just remove us
7761 				// from the chain.
7762 				coveredNode->covered_by = coveringNode;
7763 				coveringNode->covers = coveredNode;
7764 				vnode->ref_count -= 2;
7765 
7766 				vnode->covered_by = NULL;
7767 				vnode->covers = NULL;
7768 				vnode->SetCovering(false);
7769 				vnode->SetCovered(false);
7770 			} else {
7771 				// We only have a covered vnode. Remove its link to us.
7772 				coveredNode->covered_by = NULL;
7773 				coveredNode->SetCovered(false);
7774 				vnode->ref_count--;
7775 
7776 				// If the other node is an external vnode, we keep its link
7777 				// link around so we can put the reference later on. Otherwise
7778 				// we get rid of it right now.
7779 				if (coveredNode->mount == mount) {
7780 					vnode->covers = NULL;
7781 					coveredNode->ref_count--;
7782 				}
7783 			}
7784 		} else if (Vnode* coveringNode = vnode->covered_by) {
7785 			// We only have a covering vnode. Remove its link to us.
7786 			coveringNode->covers = NULL;
7787 			coveringNode->SetCovering(false);
7788 			vnode->ref_count--;
7789 
7790 			// If the other node is an external vnode, we keep its link
7791 			// link around so we can put the reference later on. Otherwise
7792 			// we get rid of it right now.
7793 			if (coveringNode->mount == mount) {
7794 				vnode->covered_by = NULL;
7795 				coveringNode->ref_count--;
7796 			}
7797 		}
7798 
7799 		vnode->SetBusy(true);
7800 		vnode_to_be_freed(vnode);
7801 	}
7802 
7803 	vnodesWriteLocker.Unlock();
7804 
7805 	// Free all vnodes associated with this mount.
7806 	// They will be removed from the mount list by free_vnode(), so
7807 	// we don't have to do this.
7808 	while (struct vnode* vnode = mount->vnodes.Head()) {
7809 		// Put the references to external covered/covering vnodes we kept above.
7810 		if (Vnode* coveredNode = vnode->covers)
7811 			put_vnode(coveredNode);
7812 		if (Vnode* coveringNode = vnode->covered_by)
7813 			put_vnode(coveringNode);
7814 
7815 		free_vnode(vnode, false);
7816 	}
7817 
7818 	// remove the mount structure from the hash table
7819 	mutex_lock(&sMountMutex);
7820 	sMountsTable->Remove(mount);
7821 	mutex_unlock(&sMountMutex);
7822 
7823 	mountOpLocker.Unlock();
7824 
7825 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7826 	notify_unmount(mount->id);
7827 
7828 	// dereference the partition and mark it unmounted
7829 	if (partition) {
7830 		partition->SetVolumeID(-1);
7831 		partition->SetMountCookie(NULL);
7832 
7833 		if (mount->owns_file_device)
7834 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7835 		partition->Unregister();
7836 	}
7837 
7838 	delete mount;
7839 	return B_OK;
7840 }
7841 
7842 
7843 static status_t
7844 fs_sync(dev_t device)
7845 {
7846 	struct fs_mount* mount;
7847 	status_t status = get_mount(device, &mount);
7848 	if (status != B_OK)
7849 		return status;
7850 
7851 	struct vnode marker;
7852 	memset(&marker, 0, sizeof(marker));
7853 	marker.SetBusy(true);
7854 	marker.SetRemoved(true);
7855 
7856 	// First, synchronize all file caches
7857 
7858 	while (true) {
7859 		WriteLocker locker(sVnodeLock);
7860 			// Note: That's the easy way. Which is probably OK for sync(),
7861 			// since it's a relatively rare call and doesn't need to allow for
7862 			// a lot of concurrency. Using a read lock would be possible, but
7863 			// also more involved, since we had to lock the individual nodes
7864 			// and take care of the locking order, which we might not want to
7865 			// do while holding fs_mount::rlock.
7866 
7867 		// synchronize access to vnode list
7868 		recursive_lock_lock(&mount->rlock);
7869 
7870 		struct vnode* vnode;
7871 		if (!marker.IsRemoved()) {
7872 			vnode = mount->vnodes.GetNext(&marker);
7873 			mount->vnodes.Remove(&marker);
7874 			marker.SetRemoved(true);
7875 		} else
7876 			vnode = mount->vnodes.First();
7877 
7878 		while (vnode != NULL && (vnode->cache == NULL
7879 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7880 			// TODO: we could track writes (and writable mapped vnodes)
7881 			//	and have a simple flag that we could test for here
7882 			vnode = mount->vnodes.GetNext(vnode);
7883 		}
7884 
7885 		if (vnode != NULL) {
7886 			// insert marker vnode again
7887 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7888 			marker.SetRemoved(false);
7889 		}
7890 
7891 		recursive_lock_unlock(&mount->rlock);
7892 
7893 		if (vnode == NULL)
7894 			break;
7895 
7896 		vnode = lookup_vnode(mount->id, vnode->id);
7897 		if (vnode == NULL || vnode->IsBusy())
7898 			continue;
7899 
7900 		if (vnode->ref_count == 0) {
7901 			// this vnode has been unused before
7902 			vnode_used(vnode);
7903 		}
7904 		inc_vnode_ref_count(vnode);
7905 
7906 		locker.Unlock();
7907 
7908 		if (vnode->cache != NULL && !vnode->IsRemoved())
7909 			vnode->cache->WriteModified();
7910 
7911 		put_vnode(vnode);
7912 	}
7913 
7914 	// Let the file systems do their synchronizing work
7915 	if (HAS_FS_MOUNT_CALL(mount, sync))
7916 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7917 
7918 	// Finally, flush the underlying device's write cache (if possible.)
7919 	if (mount->partition != NULL && mount->partition->Device() != NULL)
7920 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
7921 
7922 	put_mount(mount);
7923 	return status;
7924 }
7925 
7926 
7927 static status_t
7928 fs_read_info(dev_t device, struct fs_info* info)
7929 {
7930 	struct fs_mount* mount;
7931 	status_t status = get_mount(device, &mount);
7932 	if (status != B_OK)
7933 		return status;
7934 
7935 	memset(info, 0, sizeof(struct fs_info));
7936 
7937 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7938 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7939 
7940 	// fill in info the file system doesn't (have to) know about
7941 	if (status == B_OK) {
7942 		info->dev = mount->id;
7943 		info->root = mount->root_vnode->id;
7944 
7945 		fs_volume* volume = mount->volume;
7946 		while (volume->super_volume != NULL)
7947 			volume = volume->super_volume;
7948 
7949 		strlcpy(info->fsh_name, volume->file_system_name,
7950 			sizeof(info->fsh_name));
7951 		if (mount->device_name != NULL) {
7952 			strlcpy(info->device_name, mount->device_name,
7953 				sizeof(info->device_name));
7954 		}
7955 	}
7956 
7957 	// if the call is not supported by the file system, there are still
7958 	// the parts that we filled out ourselves
7959 
7960 	put_mount(mount);
7961 	return status;
7962 }
7963 
7964 
7965 static status_t
7966 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7967 {
7968 	struct fs_mount* mount;
7969 	status_t status = get_mount(device, &mount);
7970 	if (status != B_OK)
7971 		return status;
7972 
7973 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7974 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7975 	else
7976 		status = B_READ_ONLY_DEVICE;
7977 
7978 	put_mount(mount);
7979 	return status;
7980 }
7981 
7982 
7983 static dev_t
7984 fs_next_device(int32* _cookie)
7985 {
7986 	struct fs_mount* mount = NULL;
7987 	dev_t device = *_cookie;
7988 
7989 	mutex_lock(&sMountMutex);
7990 
7991 	// Since device IDs are assigned sequentially, this algorithm
7992 	// does work good enough. It makes sure that the device list
7993 	// returned is sorted, and that no device is skipped when an
7994 	// already visited device got unmounted.
7995 
7996 	while (device < sNextMountID) {
7997 		mount = find_mount(device++);
7998 		if (mount != NULL && mount->volume->private_volume != NULL)
7999 			break;
8000 	}
8001 
8002 	*_cookie = device;
8003 
8004 	if (mount != NULL)
8005 		device = mount->id;
8006 	else
8007 		device = B_BAD_VALUE;
8008 
8009 	mutex_unlock(&sMountMutex);
8010 
8011 	return device;
8012 }
8013 
8014 
8015 ssize_t
8016 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8017 	void *buffer, size_t readBytes)
8018 {
8019 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8020 	if (attrFD < 0)
8021 		return attrFD;
8022 
8023 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8024 
8025 	_kern_close(attrFD);
8026 
8027 	return bytesRead;
8028 }
8029 
8030 
8031 static status_t
8032 get_cwd(char* buffer, size_t size, bool kernel)
8033 {
8034 	// Get current working directory from io context
8035 	struct io_context* context = get_current_io_context(kernel);
8036 	status_t status;
8037 
8038 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8039 
8040 	mutex_lock(&context->io_mutex);
8041 
8042 	struct vnode* vnode = context->cwd;
8043 	if (vnode)
8044 		inc_vnode_ref_count(vnode);
8045 
8046 	mutex_unlock(&context->io_mutex);
8047 
8048 	if (vnode) {
8049 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8050 		put_vnode(vnode);
8051 	} else
8052 		status = B_ERROR;
8053 
8054 	return status;
8055 }
8056 
8057 
8058 static status_t
8059 set_cwd(int fd, char* path, bool kernel)
8060 {
8061 	struct io_context* context;
8062 	struct vnode* vnode = NULL;
8063 	struct vnode* oldDirectory;
8064 	status_t status;
8065 
8066 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8067 
8068 	// Get vnode for passed path, and bail if it failed
8069 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8070 	if (status < 0)
8071 		return status;
8072 
8073 	if (!S_ISDIR(vnode->Type())) {
8074 		// nope, can't cwd to here
8075 		status = B_NOT_A_DIRECTORY;
8076 		goto err;
8077 	}
8078 
8079 	// We need to have the permission to enter the directory, too
8080 	if (HAS_FS_CALL(vnode, access)) {
8081 		status = FS_CALL(vnode, access, X_OK);
8082 		if (status != B_OK)
8083 			goto err;
8084 	}
8085 
8086 	// Get current io context and lock
8087 	context = get_current_io_context(kernel);
8088 	mutex_lock(&context->io_mutex);
8089 
8090 	// save the old current working directory first
8091 	oldDirectory = context->cwd;
8092 	context->cwd = vnode;
8093 
8094 	mutex_unlock(&context->io_mutex);
8095 
8096 	if (oldDirectory)
8097 		put_vnode(oldDirectory);
8098 
8099 	return B_NO_ERROR;
8100 
8101 err:
8102 	put_vnode(vnode);
8103 	return status;
8104 }
8105 
8106 
8107 static status_t
8108 user_copy_name(char* to, const char* from, size_t length)
8109 {
8110 	ssize_t len = user_strlcpy(to, from, length);
8111 	if (len < 0)
8112 		return len;
8113 	if (len >= (ssize_t)length)
8114 		return B_NAME_TOO_LONG;
8115 	return B_OK;
8116 }
8117 
8118 
8119 //	#pragma mark - kernel mirrored syscalls
8120 
8121 
8122 dev_t
8123 _kern_mount(const char* path, const char* device, const char* fsName,
8124 	uint32 flags, const char* args, size_t argsLength)
8125 {
8126 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8127 	if (pathBuffer.InitCheck() != B_OK)
8128 		return B_NO_MEMORY;
8129 
8130 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8131 }
8132 
8133 
8134 status_t
8135 _kern_unmount(const char* path, uint32 flags)
8136 {
8137 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8138 	if (pathBuffer.InitCheck() != B_OK)
8139 		return B_NO_MEMORY;
8140 
8141 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8142 }
8143 
8144 
8145 status_t
8146 _kern_read_fs_info(dev_t device, struct fs_info* info)
8147 {
8148 	if (info == NULL)
8149 		return B_BAD_VALUE;
8150 
8151 	return fs_read_info(device, info);
8152 }
8153 
8154 
8155 status_t
8156 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8157 {
8158 	if (info == NULL)
8159 		return B_BAD_VALUE;
8160 
8161 	return fs_write_info(device, info, mask);
8162 }
8163 
8164 
8165 status_t
8166 _kern_sync(void)
8167 {
8168 	// Note: _kern_sync() is also called from _user_sync()
8169 	int32 cookie = 0;
8170 	dev_t device;
8171 	while ((device = next_dev(&cookie)) >= 0) {
8172 		status_t status = fs_sync(device);
8173 		if (status != B_OK && status != B_BAD_VALUE) {
8174 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8175 				strerror(status));
8176 		}
8177 	}
8178 
8179 	return B_OK;
8180 }
8181 
8182 
8183 dev_t
8184 _kern_next_device(int32* _cookie)
8185 {
8186 	return fs_next_device(_cookie);
8187 }
8188 
8189 
8190 status_t
8191 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8192 	size_t infoSize)
8193 {
8194 	if (infoSize != sizeof(fd_info))
8195 		return B_BAD_VALUE;
8196 
8197 	// get the team
8198 	Team* team = Team::Get(teamID);
8199 	if (team == NULL)
8200 		return B_BAD_TEAM_ID;
8201 	BReference<Team> teamReference(team, true);
8202 
8203 	// now that we have a team reference, its I/O context won't go away
8204 	io_context* context = team->io_context;
8205 	MutexLocker contextLocker(context->io_mutex);
8206 
8207 	uint32 slot = *_cookie;
8208 
8209 	struct file_descriptor* descriptor;
8210 	while (slot < context->table_size
8211 		&& (descriptor = context->fds[slot]) == NULL) {
8212 		slot++;
8213 	}
8214 
8215 	if (slot >= context->table_size)
8216 		return B_ENTRY_NOT_FOUND;
8217 
8218 	info->number = slot;
8219 	info->open_mode = descriptor->open_mode;
8220 
8221 	struct vnode* vnode = fd_vnode(descriptor);
8222 	if (vnode != NULL) {
8223 		info->device = vnode->device;
8224 		info->node = vnode->id;
8225 	} else if (descriptor->u.mount != NULL) {
8226 		info->device = descriptor->u.mount->id;
8227 		info->node = -1;
8228 	}
8229 
8230 	*_cookie = slot + 1;
8231 	return B_OK;
8232 }
8233 
8234 
8235 int
8236 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8237 	int perms)
8238 {
8239 	if ((openMode & O_CREAT) != 0) {
8240 		return file_create_entry_ref(device, inode, name, openMode, perms,
8241 			true);
8242 	}
8243 
8244 	return file_open_entry_ref(device, inode, name, openMode, true);
8245 }
8246 
8247 
8248 /*!	\brief Opens a node specified by a FD + path pair.
8249 
8250 	At least one of \a fd and \a path must be specified.
8251 	If only \a fd is given, the function opens the node identified by this
8252 	FD. If only a path is given, this path is opened. If both are given and
8253 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8254 	of the directory (!) identified by \a fd.
8255 
8256 	\param fd The FD. May be < 0.
8257 	\param path The absolute or relative path. May be \c NULL.
8258 	\param openMode The open mode.
8259 	\return A FD referring to the newly opened node, or an error code,
8260 			if an error occurs.
8261 */
8262 int
8263 _kern_open(int fd, const char* path, int openMode, int perms)
8264 {
8265 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8266 	if (pathBuffer.InitCheck() != B_OK)
8267 		return B_NO_MEMORY;
8268 
8269 	if ((openMode & O_CREAT) != 0)
8270 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8271 
8272 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8273 }
8274 
8275 
8276 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8277 
8278 	The supplied name may be \c NULL, in which case directory identified
8279 	by \a device and \a inode will be opened. Otherwise \a device and
8280 	\a inode identify the parent directory of the directory to be opened
8281 	and \a name its entry name.
8282 
8283 	\param device If \a name is specified the ID of the device the parent
8284 		   directory of the directory to be opened resides on, otherwise
8285 		   the device of the directory itself.
8286 	\param inode If \a name is specified the node ID of the parent
8287 		   directory of the directory to be opened, otherwise node ID of the
8288 		   directory itself.
8289 	\param name The entry name of the directory to be opened. If \c NULL,
8290 		   the \a device + \a inode pair identify the node to be opened.
8291 	\return The FD of the newly opened directory or an error code, if
8292 			something went wrong.
8293 */
8294 int
8295 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8296 {
8297 	return dir_open_entry_ref(device, inode, name, true);
8298 }
8299 
8300 
8301 /*!	\brief Opens a directory specified by a FD + path pair.
8302 
8303 	At least one of \a fd and \a path must be specified.
8304 	If only \a fd is given, the function opens the directory identified by this
8305 	FD. If only a path is given, this path is opened. If both are given and
8306 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8307 	of the directory (!) identified by \a fd.
8308 
8309 	\param fd The FD. May be < 0.
8310 	\param path The absolute or relative path. May be \c NULL.
8311 	\return A FD referring to the newly opened directory, or an error code,
8312 			if an error occurs.
8313 */
8314 int
8315 _kern_open_dir(int fd, const char* path)
8316 {
8317 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8318 	if (pathBuffer.InitCheck() != B_OK)
8319 		return B_NO_MEMORY;
8320 
8321 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8322 }
8323 
8324 
8325 status_t
8326 _kern_fcntl(int fd, int op, size_t argument)
8327 {
8328 	return common_fcntl(fd, op, argument, true);
8329 }
8330 
8331 
8332 status_t
8333 _kern_fsync(int fd)
8334 {
8335 	return common_sync(fd, true);
8336 }
8337 
8338 
8339 status_t
8340 _kern_lock_node(int fd)
8341 {
8342 	return common_lock_node(fd, true);
8343 }
8344 
8345 
8346 status_t
8347 _kern_unlock_node(int fd)
8348 {
8349 	return common_unlock_node(fd, true);
8350 }
8351 
8352 
8353 status_t
8354 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8355 	int perms)
8356 {
8357 	return dir_create_entry_ref(device, inode, name, perms, true);
8358 }
8359 
8360 
8361 /*!	\brief Creates a directory specified by a FD + path pair.
8362 
8363 	\a path must always be specified (it contains the name of the new directory
8364 	at least). If only a path is given, this path identifies the location at
8365 	which the directory shall be created. If both \a fd and \a path are given
8366 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8367 	of the directory (!) identified by \a fd.
8368 
8369 	\param fd The FD. May be < 0.
8370 	\param path The absolute or relative path. Must not be \c NULL.
8371 	\param perms The access permissions the new directory shall have.
8372 	\return \c B_OK, if the directory has been created successfully, another
8373 			error code otherwise.
8374 */
8375 status_t
8376 _kern_create_dir(int fd, const char* path, int perms)
8377 {
8378 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8379 	if (pathBuffer.InitCheck() != B_OK)
8380 		return B_NO_MEMORY;
8381 
8382 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8383 }
8384 
8385 
8386 status_t
8387 _kern_remove_dir(int fd, const char* path)
8388 {
8389 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8390 	if (pathBuffer.InitCheck() != B_OK)
8391 		return B_NO_MEMORY;
8392 
8393 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8394 }
8395 
8396 
8397 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8398 
8399 	At least one of \a fd and \a path must be specified.
8400 	If only \a fd is given, the function the symlink to be read is the node
8401 	identified by this FD. If only a path is given, this path identifies the
8402 	symlink to be read. If both are given and the path is absolute, \a fd is
8403 	ignored; a relative path is reckoned off of the directory (!) identified
8404 	by \a fd.
8405 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8406 	will still be updated to reflect the required buffer size.
8407 
8408 	\param fd The FD. May be < 0.
8409 	\param path The absolute or relative path. May be \c NULL.
8410 	\param buffer The buffer into which the contents of the symlink shall be
8411 		   written.
8412 	\param _bufferSize A pointer to the size of the supplied buffer.
8413 	\return The length of the link on success or an appropriate error code
8414 */
8415 status_t
8416 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8417 {
8418 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8419 	if (pathBuffer.InitCheck() != B_OK)
8420 		return B_NO_MEMORY;
8421 
8422 	return common_read_link(fd, pathBuffer.LockBuffer(),
8423 		buffer, _bufferSize, true);
8424 }
8425 
8426 
8427 /*!	\brief Creates a symlink specified by a FD + path pair.
8428 
8429 	\a path must always be specified (it contains the name of the new symlink
8430 	at least). If only a path is given, this path identifies the location at
8431 	which the symlink shall be created. If both \a fd and \a path are given and
8432 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8433 	of the directory (!) identified by \a fd.
8434 
8435 	\param fd The FD. May be < 0.
8436 	\param toPath The absolute or relative path. Must not be \c NULL.
8437 	\param mode The access permissions the new symlink shall have.
8438 	\return \c B_OK, if the symlink has been created successfully, another
8439 			error code otherwise.
8440 */
8441 status_t
8442 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8443 {
8444 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8445 	if (pathBuffer.InitCheck() != B_OK)
8446 		return B_NO_MEMORY;
8447 
8448 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8449 		toPath, mode, true);
8450 }
8451 
8452 
8453 status_t
8454 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8455 	bool traverseLeafLink)
8456 {
8457 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8458 	KPath toPathBuffer(toPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8459 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8460 		return B_NO_MEMORY;
8461 
8462 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8463 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8464 }
8465 
8466 
8467 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8468 
8469 	\a path must always be specified (it contains at least the name of the entry
8470 	to be deleted). If only a path is given, this path identifies the entry
8471 	directly. If both \a fd and \a path are given and the path is absolute,
8472 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8473 	identified by \a fd.
8474 
8475 	\param fd The FD. May be < 0.
8476 	\param path The absolute or relative path. Must not be \c NULL.
8477 	\return \c B_OK, if the entry has been removed successfully, another
8478 			error code otherwise.
8479 */
8480 status_t
8481 _kern_unlink(int fd, const char* path)
8482 {
8483 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8484 	if (pathBuffer.InitCheck() != B_OK)
8485 		return B_NO_MEMORY;
8486 
8487 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8488 }
8489 
8490 
8491 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8492 		   by another FD + path pair.
8493 
8494 	\a oldPath and \a newPath must always be specified (they contain at least
8495 	the name of the entry). If only a path is given, this path identifies the
8496 	entry directly. If both a FD and a path are given and the path is absolute,
8497 	the FD is ignored; a relative path is reckoned off of the directory (!)
8498 	identified by the respective FD.
8499 
8500 	\param oldFD The FD of the old location. May be < 0.
8501 	\param oldPath The absolute or relative path of the old location. Must not
8502 		   be \c NULL.
8503 	\param newFD The FD of the new location. May be < 0.
8504 	\param newPath The absolute or relative path of the new location. Must not
8505 		   be \c NULL.
8506 	\return \c B_OK, if the entry has been moved successfully, another
8507 			error code otherwise.
8508 */
8509 status_t
8510 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8511 {
8512 	KPath oldPathBuffer(oldPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8513 	KPath newPathBuffer(newPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8514 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8515 		return B_NO_MEMORY;
8516 
8517 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8518 		newFD, newPathBuffer.LockBuffer(), true);
8519 }
8520 
8521 
8522 status_t
8523 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8524 {
8525 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8526 	if (pathBuffer.InitCheck() != B_OK)
8527 		return B_NO_MEMORY;
8528 
8529 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8530 		true);
8531 }
8532 
8533 
8534 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8535 
8536 	If only \a fd is given, the stat operation associated with the type
8537 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8538 	given, this path identifies the entry for whose node to retrieve the
8539 	stat data. If both \a fd and \a path are given and the path is absolute,
8540 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8541 	identified by \a fd and specifies the entry whose stat data shall be
8542 	retrieved.
8543 
8544 	\param fd The FD. May be < 0.
8545 	\param path The absolute or relative path. Must not be \c NULL.
8546 	\param traverseLeafLink If \a path is given, \c true specifies that the
8547 		   function shall not stick to symlinks, but traverse them.
8548 	\param stat The buffer the stat data shall be written into.
8549 	\param statSize The size of the supplied stat buffer.
8550 	\return \c B_OK, if the the stat data have been read successfully, another
8551 			error code otherwise.
8552 */
8553 status_t
8554 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8555 	struct stat* stat, size_t statSize)
8556 {
8557 	struct stat completeStat;
8558 	struct stat* originalStat = NULL;
8559 	status_t status;
8560 
8561 	if (statSize > sizeof(struct stat))
8562 		return B_BAD_VALUE;
8563 
8564 	// this supports different stat extensions
8565 	if (statSize < sizeof(struct stat)) {
8566 		originalStat = stat;
8567 		stat = &completeStat;
8568 	}
8569 
8570 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8571 
8572 	if (status == B_OK && originalStat != NULL)
8573 		memcpy(originalStat, stat, statSize);
8574 
8575 	return status;
8576 }
8577 
8578 
8579 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8580 
8581 	If only \a fd is given, the stat operation associated with the type
8582 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8583 	given, this path identifies the entry for whose node to write the
8584 	stat data. If both \a fd and \a path are given and the path is absolute,
8585 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8586 	identified by \a fd and specifies the entry whose stat data shall be
8587 	written.
8588 
8589 	\param fd The FD. May be < 0.
8590 	\param path The absolute or relative path. May be \c NULL.
8591 	\param traverseLeafLink If \a path is given, \c true specifies that the
8592 		   function shall not stick to symlinks, but traverse them.
8593 	\param stat The buffer containing the stat data to be written.
8594 	\param statSize The size of the supplied stat buffer.
8595 	\param statMask A mask specifying which parts of the stat data shall be
8596 		   written.
8597 	\return \c B_OK, if the the stat data have been written successfully,
8598 			another error code otherwise.
8599 */
8600 status_t
8601 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8602 	const struct stat* stat, size_t statSize, int statMask)
8603 {
8604 	struct stat completeStat;
8605 
8606 	if (statSize > sizeof(struct stat))
8607 		return B_BAD_VALUE;
8608 
8609 	// this supports different stat extensions
8610 	if (statSize < sizeof(struct stat)) {
8611 		memset((uint8*)&completeStat + statSize, 0,
8612 			sizeof(struct stat) - statSize);
8613 		memcpy(&completeStat, stat, statSize);
8614 		stat = &completeStat;
8615 	}
8616 
8617 	status_t status;
8618 
8619 	if (path != NULL) {
8620 		// path given: write the stat of the node referred to by (fd, path)
8621 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8622 		if (pathBuffer.InitCheck() != B_OK)
8623 			return B_NO_MEMORY;
8624 
8625 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8626 			traverseLeafLink, stat, statMask, true);
8627 	} else {
8628 		// no path given: get the FD and use the FD operation
8629 		struct file_descriptor* descriptor
8630 			= get_fd(get_current_io_context(true), fd);
8631 		if (descriptor == NULL)
8632 			return B_FILE_ERROR;
8633 
8634 		if (descriptor->ops->fd_write_stat)
8635 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8636 		else
8637 			status = B_UNSUPPORTED;
8638 
8639 		put_fd(descriptor);
8640 	}
8641 
8642 	return status;
8643 }
8644 
8645 
8646 int
8647 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8648 {
8649 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8650 	if (pathBuffer.InitCheck() != B_OK)
8651 		return B_NO_MEMORY;
8652 
8653 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8654 }
8655 
8656 
8657 int
8658 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8659 	int openMode)
8660 {
8661 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8662 	if (pathBuffer.InitCheck() != B_OK)
8663 		return B_NO_MEMORY;
8664 
8665 	if ((openMode & O_CREAT) != 0) {
8666 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8667 			true);
8668 	}
8669 
8670 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8671 }
8672 
8673 
8674 status_t
8675 _kern_remove_attr(int fd, const char* name)
8676 {
8677 	return attr_remove(fd, name, true);
8678 }
8679 
8680 
8681 status_t
8682 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8683 	const char* toName)
8684 {
8685 	return attr_rename(fromFile, fromName, toFile, toName, true);
8686 }
8687 
8688 
8689 int
8690 _kern_open_index_dir(dev_t device)
8691 {
8692 	return index_dir_open(device, true);
8693 }
8694 
8695 
8696 status_t
8697 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8698 {
8699 	return index_create(device, name, type, flags, true);
8700 }
8701 
8702 
8703 status_t
8704 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8705 {
8706 	return index_name_read_stat(device, name, stat, true);
8707 }
8708 
8709 
8710 status_t
8711 _kern_remove_index(dev_t device, const char* name)
8712 {
8713 	return index_remove(device, name, true);
8714 }
8715 
8716 
8717 status_t
8718 _kern_getcwd(char* buffer, size_t size)
8719 {
8720 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8721 
8722 	// Call vfs to get current working directory
8723 	return get_cwd(buffer, size, true);
8724 }
8725 
8726 
8727 status_t
8728 _kern_setcwd(int fd, const char* path)
8729 {
8730 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8731 	if (pathBuffer.InitCheck() != B_OK)
8732 		return B_NO_MEMORY;
8733 
8734 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8735 }
8736 
8737 
8738 //	#pragma mark - userland syscalls
8739 
8740 
8741 dev_t
8742 _user_mount(const char* userPath, const char* userDevice,
8743 	const char* userFileSystem, uint32 flags, const char* userArgs,
8744 	size_t argsLength)
8745 {
8746 	char fileSystem[B_FILE_NAME_LENGTH];
8747 	KPath path, device;
8748 	char* args = NULL;
8749 	status_t status;
8750 
8751 	if (!IS_USER_ADDRESS(userPath)
8752 		|| !IS_USER_ADDRESS(userFileSystem)
8753 		|| !IS_USER_ADDRESS(userDevice))
8754 		return B_BAD_ADDRESS;
8755 
8756 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8757 		return B_NO_MEMORY;
8758 
8759 	status = user_copy_name(path.LockBuffer(), userPath,
8760 		B_PATH_NAME_LENGTH);
8761 	if (status != B_OK)
8762 		return status;
8763 
8764 	if (userFileSystem != NULL) {
8765 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8766 		if (status != B_OK)
8767 			return status;
8768 	}
8769 
8770 	if (userDevice != NULL) {
8771 		status = user_copy_name(device.LockBuffer(), userDevice,
8772 			B_PATH_NAME_LENGTH);
8773 		if (status != B_OK)
8774 			return status;
8775 	}
8776 
8777 	if (userArgs != NULL && argsLength > 0) {
8778 		if (!IS_USER_ADDRESS(userArgs))
8779 			return B_BAD_ADDRESS;
8780 
8781 		// this is a safety restriction
8782 		if (argsLength >= 65536)
8783 			return B_NAME_TOO_LONG;
8784 
8785 		args = (char*)malloc(argsLength + 1);
8786 		if (args == NULL)
8787 			return B_NO_MEMORY;
8788 
8789 		status = user_copy_name(args, userArgs, argsLength + 1);
8790 		if (status != B_OK) {
8791 			free(args);
8792 			return status;
8793 		}
8794 	}
8795 	path.UnlockBuffer();
8796 	device.UnlockBuffer();
8797 
8798 	status = fs_mount(path.LockBuffer(),
8799 		userDevice != NULL ? device.Path() : NULL,
8800 		userFileSystem ? fileSystem : NULL, flags, args, false);
8801 
8802 	free(args);
8803 	return status;
8804 }
8805 
8806 
8807 status_t
8808 _user_unmount(const char* userPath, uint32 flags)
8809 {
8810 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8811 
8812 	if (!IS_USER_ADDRESS(userPath))
8813 		return B_BAD_ADDRESS;
8814 
8815 	if (pathBuffer.InitCheck() != B_OK)
8816 		return B_NO_MEMORY;
8817 
8818 	char* path = pathBuffer.LockBuffer();
8819 
8820 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8821 	if (status != B_OK)
8822 		return status;
8823 
8824 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8825 }
8826 
8827 
8828 status_t
8829 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8830 {
8831 	struct fs_info info;
8832 	status_t status;
8833 
8834 	if (userInfo == NULL)
8835 		return B_BAD_VALUE;
8836 
8837 	if (!IS_USER_ADDRESS(userInfo))
8838 		return B_BAD_ADDRESS;
8839 
8840 	status = fs_read_info(device, &info);
8841 	if (status != B_OK)
8842 		return status;
8843 
8844 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8845 		return B_BAD_ADDRESS;
8846 
8847 	return B_OK;
8848 }
8849 
8850 
8851 status_t
8852 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8853 {
8854 	struct fs_info info;
8855 
8856 	if (userInfo == NULL)
8857 		return B_BAD_VALUE;
8858 
8859 	if (!IS_USER_ADDRESS(userInfo)
8860 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8861 		return B_BAD_ADDRESS;
8862 
8863 	return fs_write_info(device, &info, mask);
8864 }
8865 
8866 
8867 dev_t
8868 _user_next_device(int32* _userCookie)
8869 {
8870 	int32 cookie;
8871 	dev_t device;
8872 
8873 	if (!IS_USER_ADDRESS(_userCookie)
8874 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8875 		return B_BAD_ADDRESS;
8876 
8877 	device = fs_next_device(&cookie);
8878 
8879 	if (device >= B_OK) {
8880 		// update user cookie
8881 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8882 			return B_BAD_ADDRESS;
8883 	}
8884 
8885 	return device;
8886 }
8887 
8888 
8889 status_t
8890 _user_sync(void)
8891 {
8892 	return _kern_sync();
8893 }
8894 
8895 
8896 status_t
8897 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8898 	size_t infoSize)
8899 {
8900 	struct fd_info info;
8901 	uint32 cookie;
8902 
8903 	// only root can do this (or should root's group be enough?)
8904 	if (geteuid() != 0)
8905 		return B_NOT_ALLOWED;
8906 
8907 	if (infoSize != sizeof(fd_info))
8908 		return B_BAD_VALUE;
8909 
8910 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8911 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8912 		return B_BAD_ADDRESS;
8913 
8914 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8915 	if (status != B_OK)
8916 		return status;
8917 
8918 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8919 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8920 		return B_BAD_ADDRESS;
8921 
8922 	return status;
8923 }
8924 
8925 
8926 status_t
8927 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8928 	char* userPath, size_t pathLength)
8929 {
8930 	if (!IS_USER_ADDRESS(userPath))
8931 		return B_BAD_ADDRESS;
8932 
8933 	KPath path(B_PATH_NAME_LENGTH + 1);
8934 	if (path.InitCheck() != B_OK)
8935 		return B_NO_MEMORY;
8936 
8937 	// copy the leaf name onto the stack
8938 	char stackLeaf[B_FILE_NAME_LENGTH];
8939 	if (leaf != NULL) {
8940 		if (!IS_USER_ADDRESS(leaf))
8941 			return B_BAD_ADDRESS;
8942 
8943 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8944 		if (status != B_OK)
8945 			return status;
8946 
8947 		leaf = stackLeaf;
8948 	}
8949 
8950 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8951 		false, path.LockBuffer(), path.BufferSize());
8952 	if (status != B_OK)
8953 		return status;
8954 
8955 	path.UnlockBuffer();
8956 
8957 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8958 	if (length < 0)
8959 		return length;
8960 	if (length >= (int)pathLength)
8961 		return B_BUFFER_OVERFLOW;
8962 
8963 	return B_OK;
8964 }
8965 
8966 
8967 status_t
8968 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8969 {
8970 	if (userPath == NULL || buffer == NULL)
8971 		return B_BAD_VALUE;
8972 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8973 		return B_BAD_ADDRESS;
8974 
8975 	// copy path from userland
8976 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8977 	if (pathBuffer.InitCheck() != B_OK)
8978 		return B_NO_MEMORY;
8979 	char* path = pathBuffer.LockBuffer();
8980 
8981 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8982 	if (status != B_OK)
8983 		return status;
8984 
8985 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8986 		false);
8987 	if (error != B_OK)
8988 		return error;
8989 
8990 	// copy back to userland
8991 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8992 	if (len < 0)
8993 		return len;
8994 	if (len >= B_PATH_NAME_LENGTH)
8995 		return B_BUFFER_OVERFLOW;
8996 
8997 	return B_OK;
8998 }
8999 
9000 
9001 int
9002 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9003 	int openMode, int perms)
9004 {
9005 	char name[B_FILE_NAME_LENGTH];
9006 
9007 	if (userName == NULL || device < 0 || inode < 0)
9008 		return B_BAD_VALUE;
9009 	if (!IS_USER_ADDRESS(userName))
9010 		return B_BAD_ADDRESS;
9011 	status_t status = user_copy_name(name, userName, sizeof(name));
9012 	if (status != B_OK)
9013 		return status;
9014 
9015 	if ((openMode & O_CREAT) != 0) {
9016 		return file_create_entry_ref(device, inode, name, openMode, perms,
9017 			false);
9018 	}
9019 
9020 	return file_open_entry_ref(device, inode, name, openMode, false);
9021 }
9022 
9023 
9024 int
9025 _user_open(int fd, const char* userPath, int openMode, int perms)
9026 {
9027 	KPath path(B_PATH_NAME_LENGTH + 1);
9028 	if (path.InitCheck() != B_OK)
9029 		return B_NO_MEMORY;
9030 
9031 	char* buffer = path.LockBuffer();
9032 
9033 	if (!IS_USER_ADDRESS(userPath))
9034 		return B_BAD_ADDRESS;
9035 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9036 	if (status != B_OK)
9037 		return status;
9038 
9039 	if ((openMode & O_CREAT) != 0)
9040 		return file_create(fd, buffer, openMode, perms, false);
9041 
9042 	return file_open(fd, buffer, openMode, false);
9043 }
9044 
9045 
9046 int
9047 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9048 {
9049 	if (userName != NULL) {
9050 		char name[B_FILE_NAME_LENGTH];
9051 
9052 		if (!IS_USER_ADDRESS(userName))
9053 			return B_BAD_ADDRESS;
9054 		status_t status = user_copy_name(name, userName, sizeof(name));
9055 		if (status != B_OK)
9056 			return status;
9057 
9058 		return dir_open_entry_ref(device, inode, name, false);
9059 	}
9060 	return dir_open_entry_ref(device, inode, NULL, false);
9061 }
9062 
9063 
9064 int
9065 _user_open_dir(int fd, const char* userPath)
9066 {
9067 	if (userPath == NULL)
9068 		return dir_open(fd, NULL, false);
9069 
9070 	KPath path(B_PATH_NAME_LENGTH + 1);
9071 	if (path.InitCheck() != B_OK)
9072 		return B_NO_MEMORY;
9073 
9074 	char* buffer = path.LockBuffer();
9075 
9076 	if (!IS_USER_ADDRESS(userPath))
9077 		return B_BAD_ADDRESS;
9078 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9079 	if (status != B_OK)
9080 		return status;
9081 
9082 	return dir_open(fd, buffer, false);
9083 }
9084 
9085 
9086 /*!	\brief Opens a directory's parent directory and returns the entry name
9087 		   of the former.
9088 
9089 	Aside from that it returns the directory's entry name, this method is
9090 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9091 	equivalent, if \a userName is \c NULL.
9092 
9093 	If a name buffer is supplied and the name does not fit the buffer, the
9094 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9095 
9096 	\param fd A FD referring to a directory.
9097 	\param userName Buffer the directory's entry name shall be written into.
9098 		   May be \c NULL.
9099 	\param nameLength Size of the name buffer.
9100 	\return The file descriptor of the opened parent directory, if everything
9101 			went fine, an error code otherwise.
9102 */
9103 int
9104 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9105 {
9106 	bool kernel = false;
9107 
9108 	if (userName && !IS_USER_ADDRESS(userName))
9109 		return B_BAD_ADDRESS;
9110 
9111 	// open the parent dir
9112 	int parentFD = dir_open(fd, (char*)"..", kernel);
9113 	if (parentFD < 0)
9114 		return parentFD;
9115 	FDCloser fdCloser(parentFD, kernel);
9116 
9117 	if (userName) {
9118 		// get the vnodes
9119 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9120 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9121 		VNodePutter parentVNodePutter(parentVNode);
9122 		VNodePutter dirVNodePutter(dirVNode);
9123 		if (!parentVNode || !dirVNode)
9124 			return B_FILE_ERROR;
9125 
9126 		// get the vnode name
9127 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9128 		struct dirent* buffer = (struct dirent*)_buffer;
9129 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9130 			sizeof(_buffer), get_current_io_context(false));
9131 		if (status != B_OK)
9132 			return status;
9133 
9134 		// copy the name to the userland buffer
9135 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9136 		if (len < 0)
9137 			return len;
9138 		if (len >= (int)nameLength)
9139 			return B_BUFFER_OVERFLOW;
9140 	}
9141 
9142 	return fdCloser.Detach();
9143 }
9144 
9145 
9146 status_t
9147 _user_fcntl(int fd, int op, size_t argument)
9148 {
9149 	status_t status = common_fcntl(fd, op, argument, false);
9150 	if (op == F_SETLKW)
9151 		syscall_restart_handle_post(status);
9152 
9153 	return status;
9154 }
9155 
9156 
9157 status_t
9158 _user_fsync(int fd)
9159 {
9160 	return common_sync(fd, false);
9161 }
9162 
9163 
9164 status_t
9165 _user_flock(int fd, int operation)
9166 {
9167 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9168 
9169 	// Check if the operation is valid
9170 	switch (operation & ~LOCK_NB) {
9171 		case LOCK_UN:
9172 		case LOCK_SH:
9173 		case LOCK_EX:
9174 			break;
9175 
9176 		default:
9177 			return B_BAD_VALUE;
9178 	}
9179 
9180 	struct file_descriptor* descriptor;
9181 	struct vnode* vnode;
9182 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9183 	if (descriptor == NULL)
9184 		return B_FILE_ERROR;
9185 
9186 	if (descriptor->type != FDTYPE_FILE) {
9187 		put_fd(descriptor);
9188 		return B_BAD_VALUE;
9189 	}
9190 
9191 	struct flock flock;
9192 	flock.l_start = 0;
9193 	flock.l_len = OFF_MAX;
9194 	flock.l_whence = 0;
9195 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9196 
9197 	status_t status;
9198 	if ((operation & LOCK_UN) != 0) {
9199 		if (HAS_FS_CALL(vnode, release_lock))
9200 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9201 		else
9202 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9203 	} else {
9204 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9205 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9206 				(operation & LOCK_NB) == 0);
9207 		} else {
9208 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9209 				(operation & LOCK_NB) == 0);
9210 		}
9211 	}
9212 
9213 	syscall_restart_handle_post(status);
9214 
9215 	put_fd(descriptor);
9216 	return status;
9217 }
9218 
9219 
9220 status_t
9221 _user_lock_node(int fd)
9222 {
9223 	return common_lock_node(fd, false);
9224 }
9225 
9226 
9227 status_t
9228 _user_unlock_node(int fd)
9229 {
9230 	return common_unlock_node(fd, false);
9231 }
9232 
9233 
9234 status_t
9235 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9236 	int perms)
9237 {
9238 	char name[B_FILE_NAME_LENGTH];
9239 	status_t status;
9240 
9241 	if (!IS_USER_ADDRESS(userName))
9242 		return B_BAD_ADDRESS;
9243 
9244 	status = user_copy_name(name, userName, sizeof(name));
9245 	if (status != B_OK)
9246 		return status;
9247 
9248 	return dir_create_entry_ref(device, inode, name, perms, false);
9249 }
9250 
9251 
9252 status_t
9253 _user_create_dir(int fd, const char* userPath, int perms)
9254 {
9255 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9256 	if (pathBuffer.InitCheck() != B_OK)
9257 		return B_NO_MEMORY;
9258 
9259 	char* path = pathBuffer.LockBuffer();
9260 
9261 	if (!IS_USER_ADDRESS(userPath))
9262 		return B_BAD_ADDRESS;
9263 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9264 	if (status != B_OK)
9265 		return status;
9266 
9267 	return dir_create(fd, path, perms, false);
9268 }
9269 
9270 
9271 status_t
9272 _user_remove_dir(int fd, const char* userPath)
9273 {
9274 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9275 	if (pathBuffer.InitCheck() != B_OK)
9276 		return B_NO_MEMORY;
9277 
9278 	char* path = pathBuffer.LockBuffer();
9279 
9280 	if (userPath != NULL) {
9281 		if (!IS_USER_ADDRESS(userPath))
9282 			return B_BAD_ADDRESS;
9283 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9284 		if (status != B_OK)
9285 			return status;
9286 	}
9287 
9288 	return dir_remove(fd, userPath ? path : NULL, false);
9289 }
9290 
9291 
9292 status_t
9293 _user_read_link(int fd, const char* userPath, char* userBuffer,
9294 	size_t* userBufferSize)
9295 {
9296 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9297 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9298 		return B_NO_MEMORY;
9299 
9300 	size_t bufferSize;
9301 
9302 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9303 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9304 		return B_BAD_ADDRESS;
9305 
9306 	char* path = pathBuffer.LockBuffer();
9307 	char* buffer = linkBuffer.LockBuffer();
9308 
9309 	if (userPath) {
9310 		if (!IS_USER_ADDRESS(userPath))
9311 			return B_BAD_ADDRESS;
9312 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9313 		if (status != B_OK)
9314 			return status;
9315 
9316 		if (bufferSize > B_PATH_NAME_LENGTH)
9317 			bufferSize = B_PATH_NAME_LENGTH;
9318 	}
9319 
9320 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9321 		&bufferSize, false);
9322 
9323 	// we also update the bufferSize in case of errors
9324 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9325 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9326 		return B_BAD_ADDRESS;
9327 
9328 	if (status != B_OK)
9329 		return status;
9330 
9331 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9332 		return B_BAD_ADDRESS;
9333 
9334 	return B_OK;
9335 }
9336 
9337 
9338 status_t
9339 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9340 	int mode)
9341 {
9342 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9343 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9344 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9345 		return B_NO_MEMORY;
9346 
9347 	char* path = pathBuffer.LockBuffer();
9348 	char* toPath = toPathBuffer.LockBuffer();
9349 
9350 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9351 		return B_BAD_ADDRESS;
9352 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9353 	if (status != B_OK)
9354 		return status;
9355 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9356 	if (status != B_OK)
9357 		return status;
9358 
9359 	return common_create_symlink(fd, path, toPath, mode, false);
9360 }
9361 
9362 
9363 status_t
9364 _user_create_link(int pathFD, const char* userPath, int toFD,
9365 	const char* userToPath, bool traverseLeafLink)
9366 {
9367 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9368 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9369 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9370 		return B_NO_MEMORY;
9371 
9372 	char* path = pathBuffer.LockBuffer();
9373 	char* toPath = toPathBuffer.LockBuffer();
9374 
9375 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9376 		return B_BAD_ADDRESS;
9377 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9378 	if (status != B_OK)
9379 		return status;
9380 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9381 	if (status != B_OK)
9382 		return status;
9383 
9384 	status = check_path(toPath);
9385 	if (status != B_OK)
9386 		return status;
9387 
9388 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9389 		false);
9390 }
9391 
9392 
9393 status_t
9394 _user_unlink(int fd, const char* userPath)
9395 {
9396 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9397 	if (pathBuffer.InitCheck() != B_OK)
9398 		return B_NO_MEMORY;
9399 
9400 	char* path = pathBuffer.LockBuffer();
9401 
9402 	if (!IS_USER_ADDRESS(userPath))
9403 		return B_BAD_ADDRESS;
9404 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9405 	if (status != B_OK)
9406 		return status;
9407 
9408 	return common_unlink(fd, path, false);
9409 }
9410 
9411 
9412 status_t
9413 _user_rename(int oldFD, const char* userOldPath, int newFD,
9414 	const char* userNewPath)
9415 {
9416 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9417 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9418 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9419 		return B_NO_MEMORY;
9420 
9421 	char* oldPath = oldPathBuffer.LockBuffer();
9422 	char* newPath = newPathBuffer.LockBuffer();
9423 
9424 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9425 		return B_BAD_ADDRESS;
9426 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9427 	if (status != B_OK)
9428 		return status;
9429 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9430 	if (status != B_OK)
9431 		return status;
9432 
9433 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9434 }
9435 
9436 
9437 status_t
9438 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9439 {
9440 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9441 	if (pathBuffer.InitCheck() != B_OK)
9442 		return B_NO_MEMORY;
9443 
9444 	char* path = pathBuffer.LockBuffer();
9445 
9446 	if (!IS_USER_ADDRESS(userPath))
9447 		return B_BAD_ADDRESS;
9448 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9449 	if (status != B_OK)
9450 		return status;
9451 
9452 	// split into directory vnode and filename path
9453 	char filename[B_FILE_NAME_LENGTH];
9454 	struct vnode* dir;
9455 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9456 	if (status != B_OK)
9457 		return status;
9458 
9459 	VNodePutter _(dir);
9460 
9461 	// the underlying FS needs to support creating FIFOs
9462 	if (!HAS_FS_CALL(dir, create_special_node))
9463 		return B_UNSUPPORTED;
9464 
9465 	// create the entry	-- the FIFO sub node is set up automatically
9466 	fs_vnode superVnode;
9467 	ino_t nodeID;
9468 	status = FS_CALL(dir, create_special_node, filename, NULL,
9469 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9470 
9471 	// create_special_node() acquired a reference for us that we don't need.
9472 	if (status == B_OK)
9473 		put_vnode(dir->mount->volume, nodeID);
9474 
9475 	return status;
9476 }
9477 
9478 
9479 status_t
9480 _user_create_pipe(int* userFDs)
9481 {
9482 	// rootfs should support creating FIFOs, but let's be sure
9483 	if (!HAS_FS_CALL(sRoot, create_special_node))
9484 		return B_UNSUPPORTED;
9485 
9486 	// create the node	-- the FIFO sub node is set up automatically
9487 	fs_vnode superVnode;
9488 	ino_t nodeID;
9489 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9490 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9491 	if (status != B_OK)
9492 		return status;
9493 
9494 	// We've got one reference to the node and need another one.
9495 	struct vnode* vnode;
9496 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9497 	if (status != B_OK) {
9498 		// that should not happen
9499 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9500 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9501 		return status;
9502 	}
9503 
9504 	// Everything looks good so far. Open two FDs for reading respectively
9505 	// writing.
9506 	int fds[2];
9507 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9508 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9509 
9510 	FDCloser closer0(fds[0], false);
9511 	FDCloser closer1(fds[1], false);
9512 
9513 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9514 
9515 	// copy FDs to userland
9516 	if (status == B_OK) {
9517 		if (!IS_USER_ADDRESS(userFDs)
9518 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9519 			status = B_BAD_ADDRESS;
9520 		}
9521 	}
9522 
9523 	// keep FDs, if everything went fine
9524 	if (status == B_OK) {
9525 		closer0.Detach();
9526 		closer1.Detach();
9527 	}
9528 
9529 	return status;
9530 }
9531 
9532 
9533 status_t
9534 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9535 {
9536 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9537 	if (pathBuffer.InitCheck() != B_OK)
9538 		return B_NO_MEMORY;
9539 
9540 	char* path = pathBuffer.LockBuffer();
9541 
9542 	if (!IS_USER_ADDRESS(userPath))
9543 		return B_BAD_ADDRESS;
9544 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9545 	if (status != B_OK)
9546 		return status;
9547 
9548 	return common_access(fd, path, mode, effectiveUserGroup, false);
9549 }
9550 
9551 
9552 status_t
9553 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9554 	struct stat* userStat, size_t statSize)
9555 {
9556 	struct stat stat;
9557 	status_t status;
9558 
9559 	if (statSize > sizeof(struct stat))
9560 		return B_BAD_VALUE;
9561 
9562 	if (!IS_USER_ADDRESS(userStat))
9563 		return B_BAD_ADDRESS;
9564 
9565 	if (userPath != NULL) {
9566 		// path given: get the stat of the node referred to by (fd, path)
9567 		if (!IS_USER_ADDRESS(userPath))
9568 			return B_BAD_ADDRESS;
9569 
9570 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9571 		if (pathBuffer.InitCheck() != B_OK)
9572 			return B_NO_MEMORY;
9573 
9574 		char* path = pathBuffer.LockBuffer();
9575 
9576 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9577 		if (status != B_OK)
9578 			return status;
9579 
9580 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9581 	} else {
9582 		// no path given: get the FD and use the FD operation
9583 		struct file_descriptor* descriptor
9584 			= get_fd(get_current_io_context(false), fd);
9585 		if (descriptor == NULL)
9586 			return B_FILE_ERROR;
9587 
9588 		if (descriptor->ops->fd_read_stat)
9589 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9590 		else
9591 			status = B_UNSUPPORTED;
9592 
9593 		put_fd(descriptor);
9594 	}
9595 
9596 	if (status != B_OK)
9597 		return status;
9598 
9599 	return user_memcpy(userStat, &stat, statSize);
9600 }
9601 
9602 
9603 status_t
9604 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9605 	const struct stat* userStat, size_t statSize, int statMask)
9606 {
9607 	if (statSize > sizeof(struct stat))
9608 		return B_BAD_VALUE;
9609 
9610 	struct stat stat;
9611 
9612 	if (!IS_USER_ADDRESS(userStat)
9613 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9614 		return B_BAD_ADDRESS;
9615 
9616 	// clear additional stat fields
9617 	if (statSize < sizeof(struct stat))
9618 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9619 
9620 	status_t status;
9621 
9622 	if (userPath != NULL) {
9623 		// path given: write the stat of the node referred to by (fd, path)
9624 		if (!IS_USER_ADDRESS(userPath))
9625 			return B_BAD_ADDRESS;
9626 
9627 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9628 		if (pathBuffer.InitCheck() != B_OK)
9629 			return B_NO_MEMORY;
9630 
9631 		char* path = pathBuffer.LockBuffer();
9632 
9633 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9634 		if (status != B_OK)
9635 			return status;
9636 
9637 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9638 			statMask, false);
9639 	} else {
9640 		// no path given: get the FD and use the FD operation
9641 		struct file_descriptor* descriptor
9642 			= get_fd(get_current_io_context(false), fd);
9643 		if (descriptor == NULL)
9644 			return B_FILE_ERROR;
9645 
9646 		if (descriptor->ops->fd_write_stat) {
9647 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9648 				statMask);
9649 		} else
9650 			status = B_UNSUPPORTED;
9651 
9652 		put_fd(descriptor);
9653 	}
9654 
9655 	return status;
9656 }
9657 
9658 
9659 int
9660 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9661 {
9662 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9663 	if (pathBuffer.InitCheck() != B_OK)
9664 		return B_NO_MEMORY;
9665 
9666 	char* path = pathBuffer.LockBuffer();
9667 
9668 	if (userPath != NULL) {
9669 		if (!IS_USER_ADDRESS(userPath))
9670 			return B_BAD_ADDRESS;
9671 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9672 		if (status != B_OK)
9673 			return status;
9674 	}
9675 
9676 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9677 }
9678 
9679 
9680 ssize_t
9681 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9682 	size_t readBytes)
9683 {
9684 	char attribute[B_FILE_NAME_LENGTH];
9685 
9686 	if (userAttribute == NULL)
9687 		return B_BAD_VALUE;
9688 	if (!IS_USER_ADDRESS(userAttribute))
9689 		return B_BAD_ADDRESS;
9690 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9691 	if (status != B_OK)
9692 		return status;
9693 
9694 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9695 	if (attr < 0)
9696 		return attr;
9697 
9698 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9699 	_user_close(attr);
9700 
9701 	return bytes;
9702 }
9703 
9704 
9705 ssize_t
9706 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9707 	const void* buffer, size_t writeBytes)
9708 {
9709 	char attribute[B_FILE_NAME_LENGTH];
9710 
9711 	if (userAttribute == NULL)
9712 		return B_BAD_VALUE;
9713 	if (!IS_USER_ADDRESS(userAttribute))
9714 		return B_BAD_ADDRESS;
9715 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9716 	if (status != B_OK)
9717 		return status;
9718 
9719 	// Try to support the BeOS typical truncation as well as the position
9720 	// argument
9721 	int attr = attr_create(fd, NULL, attribute, type,
9722 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9723 	if (attr < 0)
9724 		return attr;
9725 
9726 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9727 	_user_close(attr);
9728 
9729 	return bytes;
9730 }
9731 
9732 
9733 status_t
9734 _user_stat_attr(int fd, const char* userAttribute,
9735 	struct attr_info* userAttrInfo)
9736 {
9737 	char attribute[B_FILE_NAME_LENGTH];
9738 
9739 	if (userAttribute == NULL || userAttrInfo == NULL)
9740 		return B_BAD_VALUE;
9741 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9742 		return B_BAD_ADDRESS;
9743 	status_t status = user_copy_name(attribute, userAttribute,
9744 		sizeof(attribute));
9745 	if (status != B_OK)
9746 		return status;
9747 
9748 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9749 	if (attr < 0)
9750 		return attr;
9751 
9752 	struct file_descriptor* descriptor
9753 		= get_fd(get_current_io_context(false), attr);
9754 	if (descriptor == NULL) {
9755 		_user_close(attr);
9756 		return B_FILE_ERROR;
9757 	}
9758 
9759 	struct stat stat;
9760 	if (descriptor->ops->fd_read_stat)
9761 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9762 	else
9763 		status = B_UNSUPPORTED;
9764 
9765 	put_fd(descriptor);
9766 	_user_close(attr);
9767 
9768 	if (status == B_OK) {
9769 		attr_info info;
9770 		info.type = stat.st_type;
9771 		info.size = stat.st_size;
9772 
9773 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9774 			return B_BAD_ADDRESS;
9775 	}
9776 
9777 	return status;
9778 }
9779 
9780 
9781 int
9782 _user_open_attr(int fd, const char* userPath, const char* userName,
9783 	uint32 type, int openMode)
9784 {
9785 	char name[B_FILE_NAME_LENGTH];
9786 
9787 	if (!IS_USER_ADDRESS(userName))
9788 		return B_BAD_ADDRESS;
9789 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9790 	if (status != B_OK)
9791 		return status;
9792 
9793 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9794 	if (pathBuffer.InitCheck() != B_OK)
9795 		return B_NO_MEMORY;
9796 
9797 	char* path = pathBuffer.LockBuffer();
9798 
9799 	if (userPath != NULL) {
9800 		if (!IS_USER_ADDRESS(userPath))
9801 			return B_BAD_ADDRESS;
9802 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9803 		if (status != B_OK)
9804 			return status;
9805 	}
9806 
9807 	if ((openMode & O_CREAT) != 0) {
9808 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9809 			false);
9810 	}
9811 
9812 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9813 }
9814 
9815 
9816 status_t
9817 _user_remove_attr(int fd, const char* userName)
9818 {
9819 	char name[B_FILE_NAME_LENGTH];
9820 
9821 	if (!IS_USER_ADDRESS(userName))
9822 		return B_BAD_ADDRESS;
9823 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9824 	if (status != B_OK)
9825 		return status;
9826 
9827 	return attr_remove(fd, name, false);
9828 }
9829 
9830 
9831 status_t
9832 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9833 	const char* userToName)
9834 {
9835 	if (!IS_USER_ADDRESS(userFromName)
9836 		|| !IS_USER_ADDRESS(userToName))
9837 		return B_BAD_ADDRESS;
9838 
9839 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9840 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9841 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9842 		return B_NO_MEMORY;
9843 
9844 	char* fromName = fromNameBuffer.LockBuffer();
9845 	char* toName = toNameBuffer.LockBuffer();
9846 
9847 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9848 	if (status != B_OK)
9849 		return status;
9850 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9851 	if (status != B_OK)
9852 		return status;
9853 
9854 	return attr_rename(fromFile, fromName, toFile, toName, false);
9855 }
9856 
9857 
9858 int
9859 _user_open_index_dir(dev_t device)
9860 {
9861 	return index_dir_open(device, false);
9862 }
9863 
9864 
9865 status_t
9866 _user_create_index(dev_t device, const char* userName, uint32 type,
9867 	uint32 flags)
9868 {
9869 	char name[B_FILE_NAME_LENGTH];
9870 
9871 	if (!IS_USER_ADDRESS(userName))
9872 		return B_BAD_ADDRESS;
9873 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9874 	if (status != B_OK)
9875 		return status;
9876 
9877 	return index_create(device, name, type, flags, false);
9878 }
9879 
9880 
9881 status_t
9882 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9883 {
9884 	char name[B_FILE_NAME_LENGTH];
9885 	struct stat stat;
9886 	status_t status;
9887 
9888 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9889 		return B_BAD_ADDRESS;
9890 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9891 	if (status != B_OK)
9892 		return status;
9893 
9894 	status = index_name_read_stat(device, name, &stat, false);
9895 	if (status == B_OK) {
9896 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9897 			return B_BAD_ADDRESS;
9898 	}
9899 
9900 	return status;
9901 }
9902 
9903 
9904 status_t
9905 _user_remove_index(dev_t device, const char* userName)
9906 {
9907 	char name[B_FILE_NAME_LENGTH];
9908 
9909 	if (!IS_USER_ADDRESS(userName))
9910 		return B_BAD_ADDRESS;
9911 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9912 	if (status != B_OK)
9913 		return status;
9914 
9915 	return index_remove(device, name, false);
9916 }
9917 
9918 
9919 status_t
9920 _user_getcwd(char* userBuffer, size_t size)
9921 {
9922 	if (size == 0)
9923 		return B_BAD_VALUE;
9924 	if (!IS_USER_ADDRESS(userBuffer))
9925 		return B_BAD_ADDRESS;
9926 
9927 	if (size > kMaxPathLength)
9928 		size = kMaxPathLength;
9929 
9930 	KPath pathBuffer(size);
9931 	if (pathBuffer.InitCheck() != B_OK)
9932 		return B_NO_MEMORY;
9933 
9934 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9935 
9936 	char* path = pathBuffer.LockBuffer();
9937 
9938 	status_t status = get_cwd(path, size, false);
9939 	if (status != B_OK)
9940 		return status;
9941 
9942 	// Copy back the result
9943 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9944 		return B_BAD_ADDRESS;
9945 
9946 	return status;
9947 }
9948 
9949 
9950 status_t
9951 _user_setcwd(int fd, const char* userPath)
9952 {
9953 	TRACE(("user_setcwd: path = %p\n", userPath));
9954 
9955 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9956 	if (pathBuffer.InitCheck() != B_OK)
9957 		return B_NO_MEMORY;
9958 
9959 	char* path = pathBuffer.LockBuffer();
9960 
9961 	if (userPath != NULL) {
9962 		if (!IS_USER_ADDRESS(userPath))
9963 			return B_BAD_ADDRESS;
9964 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9965 		if (status != B_OK)
9966 			return status;
9967 	}
9968 
9969 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9970 }
9971 
9972 
9973 status_t
9974 _user_change_root(const char* userPath)
9975 {
9976 	// only root is allowed to chroot()
9977 	if (geteuid() != 0)
9978 		return B_NOT_ALLOWED;
9979 
9980 	// alloc path buffer
9981 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9982 	if (pathBuffer.InitCheck() != B_OK)
9983 		return B_NO_MEMORY;
9984 
9985 	// copy userland path to kernel
9986 	char* path = pathBuffer.LockBuffer();
9987 	if (userPath != NULL) {
9988 		if (!IS_USER_ADDRESS(userPath))
9989 			return B_BAD_ADDRESS;
9990 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9991 		if (status != B_OK)
9992 			return status;
9993 	}
9994 
9995 	// get the vnode
9996 	struct vnode* vnode;
9997 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9998 	if (status != B_OK)
9999 		return status;
10000 
10001 	// set the new root
10002 	struct io_context* context = get_current_io_context(false);
10003 	mutex_lock(&sIOContextRootLock);
10004 	struct vnode* oldRoot = context->root;
10005 	context->root = vnode;
10006 	mutex_unlock(&sIOContextRootLock);
10007 
10008 	put_vnode(oldRoot);
10009 
10010 	return B_OK;
10011 }
10012 
10013 
10014 int
10015 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10016 	uint32 flags, port_id port, int32 token)
10017 {
10018 	char* query;
10019 
10020 	if (device < 0 || userQuery == NULL || queryLength == 0)
10021 		return B_BAD_VALUE;
10022 
10023 	if (!IS_USER_ADDRESS(userQuery))
10024 		return B_BAD_ADDRESS;
10025 
10026 	// this is a safety restriction
10027 	if (queryLength >= 65536)
10028 		return B_NAME_TOO_LONG;
10029 
10030 	query = (char*)malloc(queryLength + 1);
10031 	if (query == NULL)
10032 		return B_NO_MEMORY;
10033 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
10034 		free(query);
10035 		return B_BAD_ADDRESS;
10036 	}
10037 
10038 	int fd = query_open(device, query, flags, port, token, false);
10039 
10040 	free(query);
10041 	return fd;
10042 }
10043 
10044 
10045 #include "vfs_request_io.cpp"
10046