xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 9a6a20d4689307142a7ed26a1437ba47e244e73f)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <AutoDeleterDrivers.h>
36 #include <block_cache.h>
37 #include <boot/kernel_args.h>
38 #include <debug_heap.h>
39 #include <disk_device_manager/KDiskDevice.h>
40 #include <disk_device_manager/KDiskDeviceManager.h>
41 #include <disk_device_manager/KDiskDeviceUtils.h>
42 #include <disk_device_manager/KDiskSystem.h>
43 #include <fd.h>
44 #include <file_cache.h>
45 #include <fs/node_monitor.h>
46 #include <KPath.h>
47 #include <lock.h>
48 #include <low_resource_manager.h>
49 #include <slab/Slab.h>
50 #include <StackOrHeapArray.h>
51 #include <syscalls.h>
52 #include <syscall_restart.h>
53 #include <tracing.h>
54 #include <util/atomic.h>
55 #include <util/AutoLock.h>
56 #include <util/ThreadAutoLock.h>
57 #include <util/DoublyLinkedList.h>
58 #include <vfs.h>
59 #include <vm/vm.h>
60 #include <vm/VMCache.h>
61 #include <wait_for_objects.h>
62 
63 #include "EntryCache.h"
64 #include "fifo.h"
65 #include "IORequest.h"
66 #include "unused_vnodes.h"
67 #include "vfs_tracing.h"
68 #include "Vnode.h"
69 #include "../cache/vnode_store.h"
70 
71 
72 //#define TRACE_VFS
73 #ifdef TRACE_VFS
74 #	define TRACE(x) dprintf x
75 #	define FUNCTION(x) dprintf x
76 #else
77 #	define TRACE(x) ;
78 #	define FUNCTION(x) ;
79 #endif
80 
81 #define ADD_DEBUGGER_COMMANDS
82 
83 
84 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
85 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
86 
87 #if KDEBUG
88 #	define FS_CALL(vnode, op, params...) \
89 		( HAS_FS_CALL(vnode, op) ? \
90 			vnode->ops->op(vnode->mount->volume, vnode, params) \
91 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
92 #	define FS_CALL_NO_PARAMS(vnode, op) \
93 		( HAS_FS_CALL(vnode, op) ? \
94 			vnode->ops->op(vnode->mount->volume, vnode) \
95 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
96 #	define FS_MOUNT_CALL(mount, op, params...) \
97 		( HAS_FS_MOUNT_CALL(mount, op) ? \
98 			mount->volume->ops->op(mount->volume, params) \
99 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
100 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
101 		( HAS_FS_MOUNT_CALL(mount, op) ? \
102 			mount->volume->ops->op(mount->volume) \
103 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
104 #else
105 #	define FS_CALL(vnode, op, params...) \
106 			vnode->ops->op(vnode->mount->volume, vnode, params)
107 #	define FS_CALL_NO_PARAMS(vnode, op) \
108 			vnode->ops->op(vnode->mount->volume, vnode)
109 #	define FS_MOUNT_CALL(mount, op, params...) \
110 			mount->volume->ops->op(mount->volume, params)
111 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
112 			mount->volume->ops->op(mount->volume)
113 #endif
114 
115 
116 const static size_t kMaxPathLength = 65536;
117 	// The absolute maximum path length (for getcwd() - this is not depending
118 	// on PATH_MAX
119 
120 
121 typedef DoublyLinkedList<vnode> VnodeList;
122 
123 /*!	\brief Structure to manage a mounted file system
124 
125 	Note: The root_vnode and root_vnode->covers fields (what others?) are
126 	initialized in fs_mount() and not changed afterwards. That is as soon
127 	as the mount is mounted and it is made sure it won't be unmounted
128 	(e.g. by holding a reference to a vnode of that mount) (read) access
129 	to those fields is always safe, even without additional locking. Morever
130 	while mounted the mount holds a reference to the root_vnode->covers vnode,
131 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
132 	safe if a reference to vnode is held (note that for the root mount
133 	root_vnode->covers is NULL, though).
134 */
135 struct fs_mount {
136 	fs_mount()
137 		:
138 		volume(NULL),
139 		device_name(NULL)
140 	{
141 		mutex_init(&lock, "mount lock");
142 	}
143 
144 	~fs_mount()
145 	{
146 		mutex_destroy(&lock);
147 		free(device_name);
148 
149 		while (volume) {
150 			fs_volume* superVolume = volume->super_volume;
151 
152 			if (volume->file_system != NULL)
153 				put_module(volume->file_system->info.name);
154 
155 			free(volume->file_system_name);
156 			free(volume);
157 			volume = superVolume;
158 		}
159 	}
160 
161 	struct fs_mount* next;
162 	dev_t			id;
163 	fs_volume*		volume;
164 	char*			device_name;
165 	mutex			lock;	// guards the vnodes list
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 
176 namespace {
177 
178 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
179 	void*			bound_to;
180 	team_id			team;
181 	pid_t			session;
182 	off_t			start;
183 	off_t			end;
184 	bool			shared;
185 };
186 
187 typedef DoublyLinkedList<advisory_lock> LockList;
188 
189 } // namespace
190 
191 
192 struct advisory_locking {
193 	sem_id			lock;
194 	sem_id			wait_sem;
195 	LockList		locks;
196 
197 	advisory_locking()
198 		:
199 		lock(-1),
200 		wait_sem(-1)
201 	{
202 	}
203 
204 	~advisory_locking()
205 	{
206 		if (lock >= 0)
207 			delete_sem(lock);
208 		if (wait_sem >= 0)
209 			delete_sem(wait_sem);
210 	}
211 };
212 
213 /*!	\brief Guards sMountsTable.
214 
215 	The holder is allowed to read/write access the sMountsTable.
216 	Manipulation of the fs_mount structures themselves
217 	(and their destruction) requires different locks though.
218 */
219 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
220 
221 /*!	\brief Guards mount/unmount operations.
222 
223 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
224 	That is locking the lock ensures that no FS is mounted/unmounted. In
225 	particular this means that
226 	- sMountsTable will not be modified,
227 	- the fields immutable after initialization of the fs_mount structures in
228 	  sMountsTable will not be modified,
229 
230 	The thread trying to lock the lock must not hold sVnodeLock or
231 	sMountLock.
232 */
233 static recursive_lock sMountOpLock;
234 
235 /*!	\brief Guards sVnodeTable.
236 
237 	The holder is allowed read/write access to sVnodeTable and to
238 	any unbusy vnode in that table, save to the immutable fields (device, id,
239 	private_node, mount) to which only read-only access is allowed.
240 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
241 	well as the busy, removed, unused flags, and the vnode's type can also be
242 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
243 	locked. Write access to covered_by and covers requires to write lock
244 	sVnodeLock.
245 
246 	The thread trying to acquire the lock must not hold sMountLock.
247 	You must not hold this lock when calling create_sem(), as this might call
248 	vfs_free_unused_vnodes() and thus cause a deadlock.
249 */
250 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
251 
252 /*!	\brief Guards io_context::root.
253 
254 	Must be held when setting or getting the io_context::root field.
255 	The only operation allowed while holding this lock besides getting or
256 	setting the field is inc_vnode_ref_count() on io_context::root.
257 */
258 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
259 
260 
261 namespace {
262 
263 struct vnode_hash_key {
264 	dev_t	device;
265 	ino_t	vnode;
266 };
267 
268 struct VnodeHash {
269 	typedef vnode_hash_key	KeyType;
270 	typedef	struct vnode	ValueType;
271 
272 #define VHASH(mountid, vnodeid) \
273 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
274 
275 	size_t HashKey(KeyType key) const
276 	{
277 		return VHASH(key.device, key.vnode);
278 	}
279 
280 	size_t Hash(ValueType* vnode) const
281 	{
282 		return VHASH(vnode->device, vnode->id);
283 	}
284 
285 #undef VHASH
286 
287 	bool Compare(KeyType key, ValueType* vnode) const
288 	{
289 		return vnode->device == key.device && vnode->id == key.vnode;
290 	}
291 
292 	ValueType*& GetLink(ValueType* value) const
293 	{
294 		return value->next;
295 	}
296 };
297 
298 typedef BOpenHashTable<VnodeHash> VnodeTable;
299 
300 
301 struct MountHash {
302 	typedef dev_t			KeyType;
303 	typedef	struct fs_mount	ValueType;
304 
305 	size_t HashKey(KeyType key) const
306 	{
307 		return key;
308 	}
309 
310 	size_t Hash(ValueType* mount) const
311 	{
312 		return mount->id;
313 	}
314 
315 	bool Compare(KeyType key, ValueType* mount) const
316 	{
317 		return mount->id == key;
318 	}
319 
320 	ValueType*& GetLink(ValueType* value) const
321 	{
322 		return value->next;
323 	}
324 };
325 
326 typedef BOpenHashTable<MountHash> MountTable;
327 
328 } // namespace
329 
330 
331 object_cache* sPathNameCache;
332 object_cache* sVnodeCache;
333 object_cache* sFileDescriptorCache;
334 
335 #define VNODE_HASH_TABLE_SIZE 1024
336 static VnodeTable* sVnodeTable;
337 static struct vnode* sRoot;
338 
339 #define MOUNTS_HASH_TABLE_SIZE 16
340 static MountTable* sMountsTable;
341 static dev_t sNextMountID = 1;
342 
343 #define MAX_TEMP_IO_VECS 8
344 
345 // How long to wait for busy vnodes (10s)
346 #define BUSY_VNODE_RETRIES 2000
347 #define BUSY_VNODE_DELAY 5000
348 
349 mode_t __gUmask = 022;
350 
351 /* function declarations */
352 
353 static void free_unused_vnodes();
354 
355 // file descriptor operation prototypes
356 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
357 	void* buffer, size_t* _bytes);
358 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
359 	const void* buffer, size_t* _bytes);
360 static ssize_t file_readv(struct file_descriptor* descriptor, off_t pos,
361 	const struct iovec *vecs, int count);
362 static ssize_t file_writev(struct file_descriptor* descriptor, off_t pos,
363 	const struct iovec *vecs, int count);
364 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
365 	int seekType);
366 static void file_free_fd(struct file_descriptor* descriptor);
367 static status_t file_close(struct file_descriptor* descriptor);
368 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
369 	struct selectsync* sync);
370 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
371 	struct selectsync* sync);
372 static status_t dir_read(struct io_context* context,
373 	struct file_descriptor* descriptor, struct dirent* buffer,
374 	size_t bufferSize, uint32* _count);
375 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
376 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
377 static status_t dir_rewind(struct file_descriptor* descriptor);
378 static void dir_free_fd(struct file_descriptor* descriptor);
379 static status_t dir_close(struct file_descriptor* descriptor);
380 static status_t attr_dir_read(struct io_context* context,
381 	struct file_descriptor* descriptor, struct dirent* buffer,
382 	size_t bufferSize, uint32* _count);
383 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
384 static void attr_dir_free_fd(struct file_descriptor* descriptor);
385 static status_t attr_dir_close(struct file_descriptor* descriptor);
386 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
387 	void* buffer, size_t* _bytes);
388 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
389 	const void* buffer, size_t* _bytes);
390 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
391 	int seekType);
392 static void attr_free_fd(struct file_descriptor* descriptor);
393 static status_t attr_close(struct file_descriptor* descriptor);
394 static status_t attr_read_stat(struct file_descriptor* descriptor,
395 	struct stat* statData);
396 static status_t attr_write_stat(struct file_descriptor* descriptor,
397 	const struct stat* stat, int statMask);
398 static status_t index_dir_read(struct io_context* context,
399 	struct file_descriptor* descriptor, struct dirent* buffer,
400 	size_t bufferSize, uint32* _count);
401 static status_t index_dir_rewind(struct file_descriptor* descriptor);
402 static void index_dir_free_fd(struct file_descriptor* descriptor);
403 static status_t index_dir_close(struct file_descriptor* descriptor);
404 static status_t query_read(struct io_context* context,
405 	struct file_descriptor* descriptor, struct dirent* buffer,
406 	size_t bufferSize, uint32* _count);
407 static status_t query_rewind(struct file_descriptor* descriptor);
408 static void query_free_fd(struct file_descriptor* descriptor);
409 static status_t query_close(struct file_descriptor* descriptor);
410 
411 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
412 	void* buffer, size_t length);
413 static status_t common_read_stat(struct file_descriptor* descriptor,
414 	struct stat* statData);
415 static status_t common_write_stat(struct file_descriptor* descriptor,
416 	const struct stat* statData, int statMask);
417 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
418 	struct stat* stat, bool kernel);
419 
420 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
421 	bool traverseLeafLink, bool kernel,
422 	VnodePutter& _vnode, ino_t* _parentID, char* leafName = NULL);
423 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
424 	size_t bufferSize, bool kernel);
425 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
426 	VnodePutter& _vnode, ino_t* _parentID, bool kernel);
427 static void inc_vnode_ref_count(struct vnode* vnode);
428 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
429 	bool reenter);
430 static inline void put_vnode(struct vnode* vnode);
431 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
432 	bool kernel);
433 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
434 
435 
436 static struct fd_ops sFileOps = {
437 	file_close,
438 	file_free_fd,
439 	file_read,
440 	file_write,
441 	file_readv,
442 	file_writev,
443 	file_seek,
444 	common_ioctl,
445 	NULL,		// set_flags()
446 	file_select,
447 	file_deselect,
448 	NULL,		// read_dir()
449 	NULL,		// rewind_dir()
450 	common_read_stat,
451 	common_write_stat,
452 };
453 
454 static struct fd_ops sDirectoryOps = {
455 	dir_close,
456 	dir_free_fd,
457 	NULL, NULL,	// read(), write()
458 	NULL, NULL,	// readv(), writev()
459 	NULL,		// seek()
460 	common_ioctl,
461 	NULL,		// set_flags
462 	NULL,		// select()
463 	NULL,		// deselect()
464 	dir_read,
465 	dir_rewind,
466 	common_read_stat,
467 	common_write_stat,
468 };
469 
470 static struct fd_ops sAttributeDirectoryOps = {
471 	attr_dir_close,
472 	attr_dir_free_fd,
473 	NULL, NULL,	// read(), write()
474 	NULL, NULL,	// readv(), writev()
475 	NULL,		// seek()
476 	common_ioctl,
477 	NULL,		// set_flags
478 	NULL,		// select()
479 	NULL,		// deselect()
480 	attr_dir_read,
481 	attr_dir_rewind,
482 	common_read_stat,
483 	common_write_stat,
484 };
485 
486 static struct fd_ops sAttributeOps = {
487 	attr_close,
488 	attr_free_fd,
489 	attr_read,
490 	attr_write,
491 	NULL,		// readv()
492 	NULL,		// writev()
493 	attr_seek,
494 	common_ioctl,
495 	NULL,		// set_flags()
496 	NULL,		// select()
497 	NULL,		// deselect()
498 	NULL,		// read_dir()
499 	NULL,		// rewind_dir()
500 	attr_read_stat,
501 	attr_write_stat,
502 };
503 
504 static struct fd_ops sIndexDirectoryOps = {
505 	index_dir_close,
506 	index_dir_free_fd,
507 	NULL, NULL,	// read(), write()
508 	NULL, NULL,	// readv(), writev()
509 	NULL,		// seek()
510 	NULL,		// ioctl()
511 	NULL,		// set_flags()
512 	NULL,		// select()
513 	NULL,		// deselect()
514 	index_dir_read,
515 	index_dir_rewind,
516 	NULL,		// read_stat()
517 	NULL,		// write_stat()
518 };
519 
520 #if 0
521 static struct fd_ops sIndexOps = {
522 	NULL,		// dir_close()
523 	NULL,		// free_fd()
524 	NULL, NULL,	// read(), write()
525 	NULL, NULL,	// readv(), writev()
526 	NULL,		// seek()
527 	NULL,		// ioctl()
528 	NULL,		// set_flags
529 	NULL,		// select()
530 	NULL,		// deselect()
531 	NULL,		// dir_read()
532 	NULL,		// dir_rewind()
533 	index_read_stat,	// read_stat()
534 	NULL,		// write_stat()
535 };
536 #endif
537 
538 static struct fd_ops sQueryOps = {
539 	query_close,
540 	query_free_fd,
541 	NULL, NULL,	// read(), write()
542 	NULL, NULL,	// readv(), writev()
543 	NULL,		// seek()
544 	NULL,		// ioctl()
545 	NULL,		// set_flags()
546 	NULL,		// select()
547 	NULL,		// deselect()
548 	query_read,
549 	query_rewind,
550 	NULL,		// read_stat()
551 	NULL,		// write_stat()
552 };
553 
554 
555 namespace {
556 
557 class FDCloser {
558 public:
559 	FDCloser() : fFD(-1), fKernel(true) {}
560 
561 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
562 
563 	~FDCloser()
564 	{
565 		Close();
566 	}
567 
568 	void SetTo(int fd, bool kernel)
569 	{
570 		Close();
571 		fFD = fd;
572 		fKernel = kernel;
573 	}
574 
575 	void Close()
576 	{
577 		if (fFD >= 0) {
578 			if (fKernel)
579 				_kern_close(fFD);
580 			else
581 				_user_close(fFD);
582 			fFD = -1;
583 		}
584 	}
585 
586 	int Detach()
587 	{
588 		int fd = fFD;
589 		fFD = -1;
590 		return fd;
591 	}
592 
593 private:
594 	int		fFD;
595 	bool	fKernel;
596 };
597 
598 } // namespace
599 
600 
601 #if VFS_PAGES_IO_TRACING
602 
603 namespace VFSPagesIOTracing {
604 
605 class PagesIOTraceEntry : public AbstractTraceEntry {
606 protected:
607 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
608 		const generic_io_vec* vecs, uint32 count, uint32 flags,
609 		generic_size_t bytesRequested, status_t status,
610 		generic_size_t bytesTransferred)
611 		:
612 		fVnode(vnode),
613 		fMountID(vnode->mount->id),
614 		fNodeID(vnode->id),
615 		fCookie(cookie),
616 		fPos(pos),
617 		fCount(count),
618 		fFlags(flags),
619 		fBytesRequested(bytesRequested),
620 		fStatus(status),
621 		fBytesTransferred(bytesTransferred)
622 	{
623 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
624 			sizeof(generic_io_vec) * count, false);
625 	}
626 
627 	void AddDump(TraceOutput& out, const char* mode)
628 	{
629 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
630 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
631 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
632 			(uint64)fBytesRequested);
633 
634 		if (fVecs != NULL) {
635 			for (uint32 i = 0; i < fCount; i++) {
636 				if (i > 0)
637 					out.Print(", ");
638 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
639 					(uint64)fVecs[i].length);
640 			}
641 		}
642 
643 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
644 			"transferred: %" B_PRIu64, fFlags, fStatus,
645 			(uint64)fBytesTransferred);
646 	}
647 
648 protected:
649 	struct vnode*	fVnode;
650 	dev_t			fMountID;
651 	ino_t			fNodeID;
652 	void*			fCookie;
653 	off_t			fPos;
654 	generic_io_vec*	fVecs;
655 	uint32			fCount;
656 	uint32			fFlags;
657 	generic_size_t	fBytesRequested;
658 	status_t		fStatus;
659 	generic_size_t	fBytesTransferred;
660 };
661 
662 
663 class ReadPages : public PagesIOTraceEntry {
664 public:
665 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
666 		const generic_io_vec* vecs, uint32 count, uint32 flags,
667 		generic_size_t bytesRequested, status_t status,
668 		generic_size_t bytesTransferred)
669 		:
670 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
671 			bytesRequested, status, bytesTransferred)
672 	{
673 		Initialized();
674 	}
675 
676 	virtual void AddDump(TraceOutput& out)
677 	{
678 		PagesIOTraceEntry::AddDump(out, "read");
679 	}
680 };
681 
682 
683 class WritePages : public PagesIOTraceEntry {
684 public:
685 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
686 		const generic_io_vec* vecs, uint32 count, uint32 flags,
687 		generic_size_t bytesRequested, status_t status,
688 		generic_size_t bytesTransferred)
689 		:
690 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
691 			bytesRequested, status, bytesTransferred)
692 	{
693 		Initialized();
694 	}
695 
696 	virtual void AddDump(TraceOutput& out)
697 	{
698 		PagesIOTraceEntry::AddDump(out, "write");
699 	}
700 };
701 
702 }	// namespace VFSPagesIOTracing
703 
704 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
705 #else
706 #	define TPIO(x) ;
707 #endif	// VFS_PAGES_IO_TRACING
708 
709 
710 /*! Finds the mounted device (the fs_mount structure) with the given ID.
711 	Note, you must hold the sMountLock lock when you call this function.
712 */
713 static struct fs_mount*
714 find_mount(dev_t id)
715 {
716 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
717 
718 	return sMountsTable->Lookup(id);
719 }
720 
721 
722 static status_t
723 get_mount(dev_t id, struct fs_mount** _mount)
724 {
725 	struct fs_mount* mount;
726 
727 	ReadLocker nodeLocker(sVnodeLock);
728 	ReadLocker mountLocker(sMountLock);
729 
730 	mount = find_mount(id);
731 	if (mount == NULL)
732 		return B_BAD_VALUE;
733 
734 	struct vnode* rootNode = mount->root_vnode;
735 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
736 		|| rootNode->ref_count == 0) {
737 		// might have been called during a mount/unmount operation
738 		return B_BUSY;
739 	}
740 
741 	inc_vnode_ref_count(rootNode);
742 	*_mount = mount;
743 	return B_OK;
744 }
745 
746 
747 static void
748 put_mount(struct fs_mount* mount)
749 {
750 	if (mount)
751 		put_vnode(mount->root_vnode);
752 }
753 
754 
755 /*!	Tries to open the specified file system module.
756 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
757 	Returns a pointer to file system module interface, or NULL if it
758 	could not open the module.
759 */
760 static file_system_module_info*
761 get_file_system(const char* fsName)
762 {
763 	char name[B_FILE_NAME_LENGTH];
764 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
765 		// construct module name if we didn't get one
766 		// (we currently support only one API)
767 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
768 		fsName = NULL;
769 	}
770 
771 	file_system_module_info* info;
772 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
773 		return NULL;
774 
775 	return info;
776 }
777 
778 
779 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
780 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
781 	The name is allocated for you, and you have to free() it when you're
782 	done with it.
783 	Returns NULL if the required memory is not available.
784 */
785 static char*
786 get_file_system_name(const char* fsName)
787 {
788 	const size_t length = strlen("file_systems/");
789 
790 	if (strncmp(fsName, "file_systems/", length)) {
791 		// the name already seems to be the module's file name
792 		return strdup(fsName);
793 	}
794 
795 	fsName += length;
796 	const char* end = strchr(fsName, '/');
797 	if (end == NULL) {
798 		// this doesn't seem to be a valid name, but well...
799 		return strdup(fsName);
800 	}
801 
802 	// cut off the trailing /v1
803 
804 	char* name = (char*)malloc(end + 1 - fsName);
805 	if (name == NULL)
806 		return NULL;
807 
808 	strlcpy(name, fsName, end + 1 - fsName);
809 	return name;
810 }
811 
812 
813 /*!	Accepts a list of file system names separated by a colon, one for each
814 	layer and returns the file system name for the specified layer.
815 	The name is allocated for you, and you have to free() it when you're
816 	done with it.
817 	Returns NULL if the required memory is not available or if there is no
818 	name for the specified layer.
819 */
820 static char*
821 get_file_system_name_for_layer(const char* fsNames, int32 layer)
822 {
823 	while (layer >= 0) {
824 		const char* end = strchr(fsNames, ':');
825 		if (end == NULL) {
826 			if (layer == 0)
827 				return strdup(fsNames);
828 			return NULL;
829 		}
830 
831 		if (layer == 0) {
832 			size_t length = end - fsNames + 1;
833 			char* result = (char*)malloc(length);
834 			strlcpy(result, fsNames, length);
835 			return result;
836 		}
837 
838 		fsNames = end + 1;
839 		layer--;
840 	}
841 
842 	return NULL;
843 }
844 
845 
846 static void
847 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
848 {
849 	MutexLocker _(mount->lock);
850 	mount->vnodes.Add(vnode);
851 }
852 
853 
854 static void
855 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
856 {
857 	MutexLocker _(mount->lock);
858 	mount->vnodes.Remove(vnode);
859 }
860 
861 
862 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
863 
864 	The caller must hold the sVnodeLock (read lock at least).
865 
866 	\param mountID the mount ID.
867 	\param vnodeID the node ID.
868 
869 	\return The vnode structure, if it was found in the hash table, \c NULL
870 			otherwise.
871 */
872 static struct vnode*
873 lookup_vnode(dev_t mountID, ino_t vnodeID)
874 {
875 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
876 
877 	struct vnode_hash_key key;
878 
879 	key.device = mountID;
880 	key.vnode = vnodeID;
881 
882 	return sVnodeTable->Lookup(key);
883 }
884 
885 
886 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
887 
888 	This will also wait for BUSY_VNODE_DELAY before returning if one should
889 	still wait for the vnode becoming unbusy.
890 
891 	\return \c true if one should retry, \c false if not.
892 */
893 static bool
894 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
895 {
896 	if (--tries < 0) {
897 		// vnode doesn't seem to become unbusy
898 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
899 			" is not becoming unbusy!\n", mountID, vnodeID);
900 		return false;
901 	}
902 	snooze(BUSY_VNODE_DELAY);
903 	return true;
904 }
905 
906 
907 /*!	Creates a new vnode with the given mount and node ID.
908 	If the node already exists, it is returned instead and no new node is
909 	created. In either case -- but not, if an error occurs -- the function write
910 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
911 	error the lock is not held on return.
912 
913 	\param mountID The mount ID.
914 	\param vnodeID The vnode ID.
915 	\param _vnode Will be set to the new vnode on success.
916 	\param _nodeCreated Will be set to \c true when the returned vnode has
917 		been newly created, \c false when it already existed. Will not be
918 		changed on error.
919 	\return \c B_OK, when the vnode was successfully created and inserted or
920 		a node with the given ID was found, \c B_NO_MEMORY or
921 		\c B_ENTRY_NOT_FOUND on error.
922 */
923 static status_t
924 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
925 	bool& _nodeCreated)
926 {
927 	FUNCTION(("create_new_vnode_and_lock()\n"));
928 
929 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
930 	if (vnode == NULL)
931 		return B_NO_MEMORY;
932 
933 	// initialize basic values
934 	memset(vnode, 0, sizeof(struct vnode));
935 	vnode->device = mountID;
936 	vnode->id = vnodeID;
937 	vnode->ref_count = 1;
938 	vnode->SetBusy(true);
939 
940 	// look up the node -- it might have been added by someone else in the
941 	// meantime
942 	rw_lock_write_lock(&sVnodeLock);
943 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
944 	if (existingVnode != NULL) {
945 		object_cache_free(sVnodeCache, vnode, 0);
946 		_vnode = existingVnode;
947 		_nodeCreated = false;
948 		return B_OK;
949 	}
950 
951 	// get the mount structure
952 	rw_lock_read_lock(&sMountLock);
953 	vnode->mount = find_mount(mountID);
954 	if (!vnode->mount || vnode->mount->unmounting) {
955 		rw_lock_read_unlock(&sMountLock);
956 		rw_lock_write_unlock(&sVnodeLock);
957 		object_cache_free(sVnodeCache, vnode, 0);
958 		return B_ENTRY_NOT_FOUND;
959 	}
960 
961 	// add the vnode to the mount's node list and the hash table
962 	sVnodeTable->Insert(vnode);
963 	add_vnode_to_mount_list(vnode, vnode->mount);
964 
965 	rw_lock_read_unlock(&sMountLock);
966 
967 	_vnode = vnode;
968 	_nodeCreated = true;
969 
970 	// keep the vnode lock locked
971 	return B_OK;
972 }
973 
974 
975 /*!	Frees the vnode and all resources it has acquired, and removes
976 	it from the vnode hash as well as from its mount structure.
977 	Will also make sure that any cache modifications are written back.
978 */
979 static void
980 free_vnode(struct vnode* vnode, bool reenter)
981 {
982 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
983 		vnode);
984 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
985 
986 	// write back any changes in this vnode's cache -- but only
987 	// if the vnode won't be deleted, in which case the changes
988 	// will be discarded
989 
990 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
991 		FS_CALL_NO_PARAMS(vnode, fsync);
992 
993 	// Note: If this vnode has a cache attached, there will still be two
994 	// references to that cache at this point. The last one belongs to the vnode
995 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
996 	// cache. Each but the last reference to a cache also includes a reference
997 	// to the vnode. The file cache, however, released its reference (cf.
998 	// file_cache_create()), so that this vnode's ref count has the chance to
999 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1000 	// cache reference to be released, which will also release a (no longer
1001 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1002 	// count, so that it will neither become negative nor 0.
1003 	vnode->ref_count = 2;
1004 
1005 	if (!vnode->IsUnpublished()) {
1006 		if (vnode->IsRemoved())
1007 			FS_CALL(vnode, remove_vnode, reenter);
1008 		else
1009 			FS_CALL(vnode, put_vnode, reenter);
1010 	}
1011 
1012 	// If the vnode has a VMCache attached, make sure that it won't try to get
1013 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1014 	// long as the vnode is busy and in the hash, that won't happen, but as
1015 	// soon as we've removed it from the hash, it could reload the vnode -- with
1016 	// a new cache attached!
1017 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1018 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1019 
1020 	// The file system has removed the resources of the vnode now, so we can
1021 	// make it available again (by removing the busy vnode from the hash).
1022 	rw_lock_write_lock(&sVnodeLock);
1023 	sVnodeTable->Remove(vnode);
1024 	rw_lock_write_unlock(&sVnodeLock);
1025 
1026 	// if we have a VMCache attached, remove it
1027 	if (vnode->cache)
1028 		vnode->cache->ReleaseRef();
1029 
1030 	vnode->cache = NULL;
1031 
1032 	remove_vnode_from_mount_list(vnode, vnode->mount);
1033 
1034 	object_cache_free(sVnodeCache, vnode, 0);
1035 }
1036 
1037 
1038 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1039 	if the counter dropped to 0.
1040 
1041 	The caller must, of course, own a reference to the vnode to call this
1042 	function.
1043 	The caller must not hold the sVnodeLock or the sMountLock.
1044 
1045 	\param vnode the vnode.
1046 	\param alwaysFree don't move this vnode into the unused list, but really
1047 		   delete it if possible.
1048 	\param reenter \c true, if this function is called (indirectly) from within
1049 		   a file system. This will be passed to file system hooks only.
1050 	\return \c B_OK, if everything went fine, an error code otherwise.
1051 */
1052 static status_t
1053 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1054 {
1055 	ReadLocker locker(sVnodeLock);
1056 	AutoLocker<Vnode> nodeLocker(vnode);
1057 
1058 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1059 
1060 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1061 
1062 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1063 		vnode->ref_count));
1064 
1065 	if (oldRefCount != 1)
1066 		return B_OK;
1067 
1068 	if (vnode->IsBusy())
1069 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1070 
1071 	bool freeNode = false;
1072 	bool freeUnusedNodes = false;
1073 
1074 	// Just insert the vnode into an unused list if we don't need
1075 	// to delete it
1076 	if (vnode->IsRemoved() || alwaysFree) {
1077 		vnode_to_be_freed(vnode);
1078 		vnode->SetBusy(true);
1079 		freeNode = true;
1080 	} else
1081 		freeUnusedNodes = vnode_unused(vnode);
1082 
1083 	nodeLocker.Unlock();
1084 	locker.Unlock();
1085 
1086 	if (freeNode)
1087 		free_vnode(vnode, reenter);
1088 	else if (freeUnusedNodes)
1089 		free_unused_vnodes();
1090 
1091 	return B_OK;
1092 }
1093 
1094 
1095 /*!	\brief Increments the reference counter of the given vnode.
1096 
1097 	The caller must make sure that the node isn't deleted while this function
1098 	is called. This can be done either:
1099 	- by ensuring that a reference to the node exists and remains in existence,
1100 	  or
1101 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1102 	  or by holding sVnodeLock write locked.
1103 
1104 	In the second case the caller is responsible for dealing with the ref count
1105 	0 -> 1 transition. That is 1. this function must not be invoked when the
1106 	node is busy in the first place and 2. vnode_used() must be called for the
1107 	node.
1108 
1109 	\param vnode the vnode.
1110 */
1111 static void
1112 inc_vnode_ref_count(struct vnode* vnode)
1113 {
1114 	atomic_add(&vnode->ref_count, 1);
1115 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1116 		vnode->ref_count));
1117 }
1118 
1119 
1120 static bool
1121 is_special_node_type(int type)
1122 {
1123 	// at the moment only FIFOs are supported
1124 	return S_ISFIFO(type);
1125 }
1126 
1127 
1128 static status_t
1129 create_special_sub_node(struct vnode* vnode, uint32 flags)
1130 {
1131 	if (S_ISFIFO(vnode->Type()))
1132 		return create_fifo_vnode(vnode->mount->volume, vnode);
1133 
1134 	return B_BAD_VALUE;
1135 }
1136 
1137 
1138 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1139 
1140 	If the node is not yet in memory, it will be loaded.
1141 
1142 	The caller must not hold the sVnodeLock or the sMountLock.
1143 
1144 	\param mountID the mount ID.
1145 	\param vnodeID the node ID.
1146 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1147 		   retrieved vnode structure shall be written.
1148 	\param reenter \c true, if this function is called (indirectly) from within
1149 		   a file system.
1150 	\return \c B_OK, if everything when fine, an error code otherwise.
1151 */
1152 static status_t
1153 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1154 	int reenter)
1155 {
1156 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1157 		mountID, vnodeID, _vnode));
1158 
1159 	rw_lock_read_lock(&sVnodeLock);
1160 
1161 	int32 tries = BUSY_VNODE_RETRIES;
1162 restart:
1163 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1164 	AutoLocker<Vnode> nodeLocker(vnode);
1165 
1166 	if (vnode && vnode->IsBusy()) {
1167 		// vnodes in the Removed state (except ones still Unpublished)
1168 		// which are also Busy will disappear soon, so we do not wait for them.
1169 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1170 
1171 		nodeLocker.Unlock();
1172 		rw_lock_read_unlock(&sVnodeLock);
1173 		if (!canWait) {
1174 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1175 				mountID, vnodeID);
1176 			return B_BUSY;
1177 		}
1178 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1179 			return B_BUSY;
1180 
1181 		rw_lock_read_lock(&sVnodeLock);
1182 		goto restart;
1183 	}
1184 
1185 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1186 
1187 	status_t status;
1188 
1189 	if (vnode) {
1190 		if (vnode->ref_count == 0) {
1191 			// this vnode has been unused before
1192 			vnode_used(vnode);
1193 		}
1194 		inc_vnode_ref_count(vnode);
1195 
1196 		nodeLocker.Unlock();
1197 		rw_lock_read_unlock(&sVnodeLock);
1198 	} else {
1199 		// we need to create a new vnode and read it in
1200 		rw_lock_read_unlock(&sVnodeLock);
1201 			// unlock -- create_new_vnode_and_lock() write-locks on success
1202 		bool nodeCreated;
1203 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1204 			nodeCreated);
1205 		if (status != B_OK)
1206 			return status;
1207 
1208 		if (!nodeCreated) {
1209 			rw_lock_read_lock(&sVnodeLock);
1210 			rw_lock_write_unlock(&sVnodeLock);
1211 			goto restart;
1212 		}
1213 
1214 		rw_lock_write_unlock(&sVnodeLock);
1215 
1216 		int type = 0;
1217 		uint32 flags = 0;
1218 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1219 			&flags, reenter);
1220 		if (status == B_OK && (vnode->private_node == NULL || vnode->ops == NULL)) {
1221 			KDEBUG_ONLY(panic("filesystem get_vnode returned 0 with unset fields"));
1222 			status = B_BAD_VALUE;
1223 		}
1224 
1225 		bool gotNode = status == B_OK;
1226 		bool publishSpecialSubNode = false;
1227 		if (gotNode) {
1228 			vnode->SetType(type);
1229 			publishSpecialSubNode = is_special_node_type(type)
1230 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1231 		}
1232 
1233 		if (gotNode && publishSpecialSubNode)
1234 			status = create_special_sub_node(vnode, flags);
1235 
1236 		if (status != B_OK) {
1237 			if (gotNode)
1238 				FS_CALL(vnode, put_vnode, reenter);
1239 
1240 			rw_lock_write_lock(&sVnodeLock);
1241 			sVnodeTable->Remove(vnode);
1242 			remove_vnode_from_mount_list(vnode, vnode->mount);
1243 			rw_lock_write_unlock(&sVnodeLock);
1244 
1245 			object_cache_free(sVnodeCache, vnode, 0);
1246 			return status;
1247 		}
1248 
1249 		rw_lock_read_lock(&sVnodeLock);
1250 		vnode->Lock();
1251 
1252 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1253 		vnode->SetBusy(false);
1254 
1255 		vnode->Unlock();
1256 		rw_lock_read_unlock(&sVnodeLock);
1257 	}
1258 
1259 	TRACE(("get_vnode: returning %p\n", vnode));
1260 
1261 	*_vnode = vnode;
1262 	return B_OK;
1263 }
1264 
1265 
1266 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1267 	if the counter dropped to 0.
1268 
1269 	The caller must, of course, own a reference to the vnode to call this
1270 	function.
1271 	The caller must not hold the sVnodeLock or the sMountLock.
1272 
1273 	\param vnode the vnode.
1274 */
1275 static inline void
1276 put_vnode(struct vnode* vnode)
1277 {
1278 	dec_vnode_ref_count(vnode, false, false);
1279 }
1280 
1281 
1282 static void
1283 free_unused_vnodes(int32 level)
1284 {
1285 	unused_vnodes_check_started();
1286 
1287 	if (level == B_NO_LOW_RESOURCE) {
1288 		unused_vnodes_check_done();
1289 		return;
1290 	}
1291 
1292 	flush_hot_vnodes();
1293 
1294 	// determine how many nodes to free
1295 	uint32 count = 1;
1296 	{
1297 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1298 
1299 		switch (level) {
1300 			case B_LOW_RESOURCE_NOTE:
1301 				count = sUnusedVnodes / 100;
1302 				break;
1303 			case B_LOW_RESOURCE_WARNING:
1304 				count = sUnusedVnodes / 10;
1305 				break;
1306 			case B_LOW_RESOURCE_CRITICAL:
1307 				count = sUnusedVnodes;
1308 				break;
1309 		}
1310 
1311 		if (count > sUnusedVnodes)
1312 			count = sUnusedVnodes;
1313 	}
1314 
1315 	// Write back the modified pages of some unused vnodes and free them.
1316 
1317 	for (uint32 i = 0; i < count; i++) {
1318 		ReadLocker vnodesReadLocker(sVnodeLock);
1319 
1320 		// get the first node
1321 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1322 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1323 			&sUnusedVnodeList);
1324 		unusedVnodesLocker.Unlock();
1325 
1326 		if (vnode == NULL)
1327 			break;
1328 
1329 		// lock the node
1330 		AutoLocker<Vnode> nodeLocker(vnode);
1331 
1332 		// Check whether the node is still unused -- since we only append to the
1333 		// tail of the unused queue, the vnode should still be at its head.
1334 		// Alternatively we could check its ref count for 0 and its busy flag,
1335 		// but if the node is no longer at the head of the queue, it means it
1336 		// has been touched in the meantime, i.e. it is no longer the least
1337 		// recently used unused vnode and we rather don't free it.
1338 		unusedVnodesLocker.Lock();
1339 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1340 			continue;
1341 		unusedVnodesLocker.Unlock();
1342 
1343 		ASSERT(!vnode->IsBusy());
1344 
1345 		// grab a reference
1346 		inc_vnode_ref_count(vnode);
1347 		vnode_used(vnode);
1348 
1349 		// write back changes and free the node
1350 		nodeLocker.Unlock();
1351 		vnodesReadLocker.Unlock();
1352 
1353 		if (vnode->cache != NULL)
1354 			vnode->cache->WriteModified();
1355 
1356 		dec_vnode_ref_count(vnode, true, false);
1357 			// this should free the vnode when it's still unused
1358 	}
1359 
1360 	unused_vnodes_check_done();
1361 }
1362 
1363 
1364 /*!	Gets the vnode the given vnode is covering.
1365 
1366 	The caller must have \c sVnodeLock read-locked at least.
1367 
1368 	The function returns a reference to the retrieved vnode (if any), the caller
1369 	is responsible to free.
1370 
1371 	\param vnode The vnode whose covered node shall be returned.
1372 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1373 		vnode.
1374 */
1375 static inline Vnode*
1376 get_covered_vnode_locked(Vnode* vnode)
1377 {
1378 	if (Vnode* coveredNode = vnode->covers) {
1379 		while (coveredNode->covers != NULL)
1380 			coveredNode = coveredNode->covers;
1381 
1382 		inc_vnode_ref_count(coveredNode);
1383 		return coveredNode;
1384 	}
1385 
1386 	return NULL;
1387 }
1388 
1389 
1390 /*!	Gets the vnode the given vnode is covering.
1391 
1392 	The caller must not hold \c sVnodeLock. Note that this implies a race
1393 	condition, since the situation can change at any time.
1394 
1395 	The function returns a reference to the retrieved vnode (if any), the caller
1396 	is responsible to free.
1397 
1398 	\param vnode The vnode whose covered node shall be returned.
1399 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1400 		vnode.
1401 */
1402 static inline Vnode*
1403 get_covered_vnode(Vnode* vnode)
1404 {
1405 	if (!vnode->IsCovering())
1406 		return NULL;
1407 
1408 	ReadLocker vnodeReadLocker(sVnodeLock);
1409 	return get_covered_vnode_locked(vnode);
1410 }
1411 
1412 
1413 /*!	Gets the vnode the given vnode is covered by.
1414 
1415 	The caller must have \c sVnodeLock read-locked at least.
1416 
1417 	The function returns a reference to the retrieved vnode (if any), the caller
1418 	is responsible to free.
1419 
1420 	\param vnode The vnode whose covering node shall be returned.
1421 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1422 		any vnode.
1423 */
1424 static Vnode*
1425 get_covering_vnode_locked(Vnode* vnode)
1426 {
1427 	if (Vnode* coveringNode = vnode->covered_by) {
1428 		while (coveringNode->covered_by != NULL)
1429 			coveringNode = coveringNode->covered_by;
1430 
1431 		inc_vnode_ref_count(coveringNode);
1432 		return coveringNode;
1433 	}
1434 
1435 	return NULL;
1436 }
1437 
1438 
1439 /*!	Gets the vnode the given vnode is covered by.
1440 
1441 	The caller must not hold \c sVnodeLock. Note that this implies a race
1442 	condition, since the situation can change at any time.
1443 
1444 	The function returns a reference to the retrieved vnode (if any), the caller
1445 	is responsible to free.
1446 
1447 	\param vnode The vnode whose covering node shall be returned.
1448 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1449 		any vnode.
1450 */
1451 static inline Vnode*
1452 get_covering_vnode(Vnode* vnode)
1453 {
1454 	if (!vnode->IsCovered())
1455 		return NULL;
1456 
1457 	ReadLocker vnodeReadLocker(sVnodeLock);
1458 	return get_covering_vnode_locked(vnode);
1459 }
1460 
1461 
1462 static void
1463 free_unused_vnodes()
1464 {
1465 	free_unused_vnodes(
1466 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1467 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1468 }
1469 
1470 
1471 static void
1472 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1473 {
1474 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1475 
1476 	free_unused_vnodes(level);
1477 }
1478 
1479 
1480 static inline void
1481 put_advisory_locking(struct advisory_locking* locking)
1482 {
1483 	release_sem(locking->lock);
1484 }
1485 
1486 
1487 /*!	Returns the advisory_locking object of the \a vnode in case it
1488 	has one, and locks it.
1489 	You have to call put_advisory_locking() when you're done with
1490 	it.
1491 	Note, you must not have the vnode mutex locked when calling
1492 	this function.
1493 */
1494 static struct advisory_locking*
1495 get_advisory_locking(struct vnode* vnode)
1496 {
1497 	rw_lock_read_lock(&sVnodeLock);
1498 	vnode->Lock();
1499 
1500 	struct advisory_locking* locking = vnode->advisory_locking;
1501 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1502 
1503 	vnode->Unlock();
1504 	rw_lock_read_unlock(&sVnodeLock);
1505 
1506 	if (lock >= 0)
1507 		lock = acquire_sem(lock);
1508 	if (lock < 0) {
1509 		// This means the locking has been deleted in the mean time
1510 		// or had never existed in the first place - otherwise, we
1511 		// would get the lock at some point.
1512 		return NULL;
1513 	}
1514 
1515 	return locking;
1516 }
1517 
1518 
1519 /*!	Creates a locked advisory_locking object, and attaches it to the
1520 	given \a vnode.
1521 	Returns B_OK in case of success - also if the vnode got such an
1522 	object from someone else in the mean time, you'll still get this
1523 	one locked then.
1524 */
1525 static status_t
1526 create_advisory_locking(struct vnode* vnode)
1527 {
1528 	if (vnode == NULL)
1529 		return B_FILE_ERROR;
1530 
1531 	ObjectDeleter<advisory_locking> lockingDeleter;
1532 	struct advisory_locking* locking = NULL;
1533 
1534 	while (get_advisory_locking(vnode) == NULL) {
1535 		// no locking object set on the vnode yet, create one
1536 		if (locking == NULL) {
1537 			locking = new(std::nothrow) advisory_locking;
1538 			if (locking == NULL)
1539 				return B_NO_MEMORY;
1540 			lockingDeleter.SetTo(locking);
1541 
1542 			locking->wait_sem = create_sem(0, "advisory lock");
1543 			if (locking->wait_sem < 0)
1544 				return locking->wait_sem;
1545 
1546 			locking->lock = create_sem(0, "advisory locking");
1547 			if (locking->lock < 0)
1548 				return locking->lock;
1549 		}
1550 
1551 		// set our newly created locking object
1552 		ReadLocker _(sVnodeLock);
1553 		AutoLocker<Vnode> nodeLocker(vnode);
1554 		if (vnode->advisory_locking == NULL) {
1555 			vnode->advisory_locking = locking;
1556 			lockingDeleter.Detach();
1557 			return B_OK;
1558 		}
1559 	}
1560 
1561 	// The vnode already had a locking object. That's just as well.
1562 
1563 	return B_OK;
1564 }
1565 
1566 
1567 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1568 	with the advisory_lock \a lock.
1569 */
1570 static bool
1571 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1572 {
1573 	if (flock == NULL)
1574 		return true;
1575 
1576 	return lock->start <= flock->l_start - 1 + flock->l_len
1577 		&& lock->end >= flock->l_start;
1578 }
1579 
1580 
1581 /*!	Tests whether acquiring a lock would block.
1582 */
1583 static status_t
1584 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1585 {
1586 	flock->l_type = F_UNLCK;
1587 
1588 	struct advisory_locking* locking = get_advisory_locking(vnode);
1589 	if (locking == NULL)
1590 		return B_OK;
1591 
1592 	team_id team = team_get_current_team_id();
1593 
1594 	LockList::Iterator iterator = locking->locks.GetIterator();
1595 	while (iterator.HasNext()) {
1596 		struct advisory_lock* lock = iterator.Next();
1597 
1598 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1599 			// locks do overlap
1600 			if (flock->l_type != F_RDLCK || !lock->shared) {
1601 				// collision
1602 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1603 				flock->l_whence = SEEK_SET;
1604 				flock->l_start = lock->start;
1605 				flock->l_len = lock->end - lock->start + 1;
1606 				flock->l_pid = lock->team;
1607 				break;
1608 			}
1609 		}
1610 	}
1611 
1612 	put_advisory_locking(locking);
1613 	return B_OK;
1614 }
1615 
1616 
1617 /*!	Removes the specified lock, or all locks of the calling team
1618 	if \a flock is NULL.
1619 */
1620 static status_t
1621 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1622 	struct file_descriptor* descriptor, struct flock* flock)
1623 {
1624 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1625 
1626 	struct advisory_locking* locking = get_advisory_locking(vnode);
1627 	if (locking == NULL)
1628 		return B_OK;
1629 
1630 	// find matching lock entries
1631 
1632 	LockList::Iterator iterator = locking->locks.GetIterator();
1633 	while (iterator.HasNext()) {
1634 		struct advisory_lock* lock = iterator.Next();
1635 		bool removeLock = false;
1636 
1637 		if (descriptor != NULL && lock->bound_to == descriptor) {
1638 			// Remove flock() locks
1639 			removeLock = true;
1640 		} else if (lock->bound_to == context
1641 				&& advisory_lock_intersects(lock, flock)) {
1642 			// Remove POSIX locks
1643 			bool endsBeyond = false;
1644 			bool startsBefore = false;
1645 			if (flock != NULL) {
1646 				startsBefore = lock->start < flock->l_start;
1647 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1648 			}
1649 
1650 			if (!startsBefore && !endsBeyond) {
1651 				// lock is completely contained in flock
1652 				removeLock = true;
1653 			} else if (startsBefore && !endsBeyond) {
1654 				// cut the end of the lock
1655 				lock->end = flock->l_start - 1;
1656 			} else if (!startsBefore && endsBeyond) {
1657 				// cut the start of the lock
1658 				lock->start = flock->l_start + flock->l_len;
1659 			} else {
1660 				// divide the lock into two locks
1661 				struct advisory_lock* secondLock = new advisory_lock;
1662 				if (secondLock == NULL) {
1663 					// TODO: we should probably revert the locks we already
1664 					// changed... (ie. allocate upfront)
1665 					put_advisory_locking(locking);
1666 					return B_NO_MEMORY;
1667 				}
1668 
1669 				lock->end = flock->l_start - 1;
1670 
1671 				secondLock->bound_to = context;
1672 				secondLock->team = lock->team;
1673 				secondLock->session = lock->session;
1674 				// values must already be normalized when getting here
1675 				secondLock->start = flock->l_start + flock->l_len;
1676 				secondLock->end = lock->end;
1677 				secondLock->shared = lock->shared;
1678 
1679 				locking->locks.Add(secondLock);
1680 			}
1681 		}
1682 
1683 		if (removeLock) {
1684 			// this lock is no longer used
1685 			iterator.Remove();
1686 			delete lock;
1687 		}
1688 	}
1689 
1690 	bool removeLocking = locking->locks.IsEmpty();
1691 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1692 
1693 	put_advisory_locking(locking);
1694 
1695 	if (removeLocking) {
1696 		// We can remove the whole advisory locking structure; it's no
1697 		// longer used
1698 		locking = get_advisory_locking(vnode);
1699 		if (locking != NULL) {
1700 			ReadLocker locker(sVnodeLock);
1701 			AutoLocker<Vnode> nodeLocker(vnode);
1702 
1703 			// the locking could have been changed in the mean time
1704 			if (locking->locks.IsEmpty()) {
1705 				vnode->advisory_locking = NULL;
1706 				nodeLocker.Unlock();
1707 				locker.Unlock();
1708 
1709 				// we've detached the locking from the vnode, so we can
1710 				// safely delete it
1711 				delete locking;
1712 			} else {
1713 				// the locking is in use again
1714 				nodeLocker.Unlock();
1715 				locker.Unlock();
1716 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1717 			}
1718 		}
1719 	}
1720 
1721 	return B_OK;
1722 }
1723 
1724 
1725 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1726 	will wait for the lock to become available, if there are any collisions
1727 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1728 
1729 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1730 	BSD flock() semantics are used, that is, all children can unlock the file
1731 	in question (we even allow parents to remove the lock, though, but that
1732 	seems to be in line to what the BSD's are doing).
1733 */
1734 static status_t
1735 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1736 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1737 {
1738 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1739 		vnode, flock, wait ? "yes" : "no"));
1740 
1741 	bool shared = flock->l_type == F_RDLCK;
1742 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1743 	status_t status = B_OK;
1744 
1745 	// TODO: do deadlock detection!
1746 
1747 	struct advisory_locking* locking;
1748 
1749 	while (true) {
1750 		// if this vnode has an advisory_locking structure attached,
1751 		// lock that one and search for any colliding file lock
1752 		status = create_advisory_locking(vnode);
1753 		if (status != B_OK)
1754 			return status;
1755 
1756 		locking = vnode->advisory_locking;
1757 		team_id team = team_get_current_team_id();
1758 		sem_id waitForLock = -1;
1759 
1760 		// test for collisions
1761 		LockList::Iterator iterator = locking->locks.GetIterator();
1762 		while (iterator.HasNext()) {
1763 			struct advisory_lock* lock = iterator.Next();
1764 
1765 			// TODO: locks from the same team might be joinable!
1766 			if ((lock->team != team || lock->bound_to != boundTo)
1767 					&& advisory_lock_intersects(lock, flock)) {
1768 				// locks do overlap
1769 				if (!shared || !lock->shared) {
1770 					// we need to wait
1771 					waitForLock = locking->wait_sem;
1772 					break;
1773 				}
1774 			}
1775 		}
1776 
1777 		if (waitForLock < 0)
1778 			break;
1779 
1780 		// We need to wait. Do that or fail now, if we've been asked not to.
1781 
1782 		if (!wait) {
1783 			put_advisory_locking(locking);
1784 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1785 		}
1786 
1787 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1788 			B_CAN_INTERRUPT, 0);
1789 		if (status != B_OK && status != B_BAD_SEM_ID)
1790 			return status;
1791 
1792 		// We have been notified, but we need to re-lock the locking object. So
1793 		// go another round...
1794 	}
1795 
1796 	// install new lock
1797 
1798 	struct advisory_lock* lock = new(std::nothrow) advisory_lock;
1799 	if (lock == NULL) {
1800 		put_advisory_locking(locking);
1801 		return B_NO_MEMORY;
1802 	}
1803 
1804 	lock->bound_to = boundTo;
1805 	lock->team = team_get_current_team_id();
1806 	lock->session = thread_get_current_thread()->team->session_id;
1807 	// values must already be normalized when getting here
1808 	lock->start = flock->l_start;
1809 	lock->end = flock->l_start - 1 + flock->l_len;
1810 	lock->shared = shared;
1811 
1812 	locking->locks.Add(lock);
1813 	put_advisory_locking(locking);
1814 
1815 	return status;
1816 }
1817 
1818 
1819 /*!	Normalizes the \a flock structure to make it easier to compare the
1820 	structure with others. The l_start and l_len fields are set to absolute
1821 	values according to the l_whence field.
1822 */
1823 static status_t
1824 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1825 {
1826 	switch (flock->l_whence) {
1827 		case SEEK_SET:
1828 			break;
1829 		case SEEK_CUR:
1830 			flock->l_start += descriptor->pos;
1831 			break;
1832 		case SEEK_END:
1833 		{
1834 			struct vnode* vnode = descriptor->u.vnode;
1835 			struct stat stat;
1836 			status_t status;
1837 
1838 			if (!HAS_FS_CALL(vnode, read_stat))
1839 				return B_UNSUPPORTED;
1840 
1841 			status = FS_CALL(vnode, read_stat, &stat);
1842 			if (status != B_OK)
1843 				return status;
1844 
1845 			flock->l_start += stat.st_size;
1846 			break;
1847 		}
1848 		default:
1849 			return B_BAD_VALUE;
1850 	}
1851 
1852 	if (flock->l_start < 0)
1853 		flock->l_start = 0;
1854 	if (flock->l_len == 0)
1855 		flock->l_len = OFF_MAX;
1856 
1857 	// don't let the offset and length overflow
1858 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1859 		flock->l_len = OFF_MAX - flock->l_start;
1860 
1861 	if (flock->l_len < 0) {
1862 		// a negative length reverses the region
1863 		flock->l_start += flock->l_len;
1864 		flock->l_len = -flock->l_len;
1865 	}
1866 
1867 	return B_OK;
1868 }
1869 
1870 
1871 static void
1872 replace_vnode_if_disconnected(struct fs_mount* mount,
1873 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1874 	struct vnode* fallBack, bool lockRootLock)
1875 {
1876 	struct vnode* givenVnode = vnode;
1877 	bool vnodeReplaced = false;
1878 
1879 	ReadLocker vnodeReadLocker(sVnodeLock);
1880 
1881 	if (lockRootLock)
1882 		mutex_lock(&sIOContextRootLock);
1883 
1884 	while (vnode != NULL && vnode->mount == mount
1885 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1886 		if (vnode->covers != NULL) {
1887 			// redirect the vnode to the covered vnode
1888 			vnode = vnode->covers;
1889 		} else
1890 			vnode = fallBack;
1891 
1892 		vnodeReplaced = true;
1893 	}
1894 
1895 	// If we've replaced the node, grab a reference for the new one.
1896 	if (vnodeReplaced && vnode != NULL)
1897 		inc_vnode_ref_count(vnode);
1898 
1899 	if (lockRootLock)
1900 		mutex_unlock(&sIOContextRootLock);
1901 
1902 	vnodeReadLocker.Unlock();
1903 
1904 	if (vnodeReplaced)
1905 		put_vnode(givenVnode);
1906 }
1907 
1908 
1909 /*!	Disconnects all file descriptors that are associated with the
1910 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1911 	\a mount object.
1912 
1913 	Note, after you've called this function, there might still be ongoing
1914 	accesses - they won't be interrupted if they already happened before.
1915 	However, any subsequent access will fail.
1916 
1917 	This is not a cheap function and should be used with care and rarely.
1918 	TODO: there is currently no means to stop a blocking read/write!
1919 */
1920 static void
1921 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1922 	struct vnode* vnodeToDisconnect)
1923 {
1924 	// iterate over all teams and peek into their file descriptors
1925 	TeamListIterator teamIterator;
1926 	while (Team* team = teamIterator.Next()) {
1927 		BReference<Team> teamReference(team, true);
1928 		TeamLocker teamLocker(team);
1929 
1930 		// lock the I/O context
1931 		io_context* context = team->io_context;
1932 		if (context == NULL)
1933 			continue;
1934 		MutexLocker contextLocker(context->io_mutex);
1935 
1936 		teamLocker.Unlock();
1937 
1938 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1939 			sRoot, true);
1940 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1941 			sRoot, false);
1942 
1943 		for (uint32 i = 0; i < context->table_size; i++) {
1944 			struct file_descriptor* descriptor = context->fds[i];
1945 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1946 				continue;
1947 
1948 			inc_fd_ref_count(descriptor);
1949 
1950 			// if this descriptor points at this mount, we
1951 			// need to disconnect it to be able to unmount
1952 			struct vnode* vnode = fd_vnode(descriptor);
1953 			if (vnodeToDisconnect != NULL) {
1954 				if (vnode == vnodeToDisconnect)
1955 					disconnect_fd(descriptor);
1956 			} else if ((vnode != NULL && vnode->mount == mount)
1957 				|| (vnode == NULL && descriptor->u.mount == mount))
1958 				disconnect_fd(descriptor);
1959 
1960 			put_fd(descriptor);
1961 		}
1962 	}
1963 }
1964 
1965 
1966 /*!	\brief Gets the root node of the current IO context.
1967 	If \a kernel is \c true, the kernel IO context will be used.
1968 	The caller obtains a reference to the returned node.
1969 */
1970 struct vnode*
1971 get_root_vnode(bool kernel)
1972 {
1973 	if (!kernel) {
1974 		// Get current working directory from io context
1975 		struct io_context* context = get_current_io_context(kernel);
1976 
1977 		mutex_lock(&sIOContextRootLock);
1978 
1979 		struct vnode* root = context->root;
1980 		if (root != NULL)
1981 			inc_vnode_ref_count(root);
1982 
1983 		mutex_unlock(&sIOContextRootLock);
1984 
1985 		if (root != NULL)
1986 			return root;
1987 
1988 		// That should never happen.
1989 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1990 			"have a root\n", team_get_current_team_id());
1991 	}
1992 
1993 	inc_vnode_ref_count(sRoot);
1994 	return sRoot;
1995 }
1996 
1997 
1998 /*!	\brief Gets the directory path and leaf name for a given path.
1999 
2000 	The supplied \a path is transformed to refer to the directory part of
2001 	the entry identified by the original path, and into the buffer \a filename
2002 	the leaf name of the original entry is written.
2003 	Neither the returned path nor the leaf name can be expected to be
2004 	canonical.
2005 
2006 	\param path The path to be analyzed. Must be able to store at least one
2007 		   additional character.
2008 	\param filename The buffer into which the leaf name will be written.
2009 		   Must be of size B_FILE_NAME_LENGTH at least.
2010 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2011 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2012 		   if the given path name is empty.
2013 */
2014 static status_t
2015 get_dir_path_and_leaf(char* path, char* filename)
2016 {
2017 	if (*path == '\0')
2018 		return B_ENTRY_NOT_FOUND;
2019 
2020 	char* last = strrchr(path, '/');
2021 		// '/' are not allowed in file names!
2022 
2023 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2024 
2025 	if (last == NULL) {
2026 		// this path is single segment with no '/' in it
2027 		// ex. "foo"
2028 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2029 			return B_NAME_TOO_LONG;
2030 
2031 		strcpy(path, ".");
2032 	} else {
2033 		last++;
2034 		if (last[0] == '\0') {
2035 			// special case: the path ends in one or more '/' - remove them
2036 			while (*--last == '/' && last != path);
2037 			last[1] = '\0';
2038 
2039 			if (last == path && last[0] == '/') {
2040 				// This path points to the root of the file system
2041 				strcpy(filename, ".");
2042 				return B_OK;
2043 			}
2044 			for (; last != path && *(last - 1) != '/'; last--);
2045 				// rewind to the start of the leaf before the '/'
2046 		}
2047 
2048 		// normal leaf: replace the leaf portion of the path with a '.'
2049 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2050 			return B_NAME_TOO_LONG;
2051 
2052 		last[0] = '.';
2053 		last[1] = '\0';
2054 	}
2055 	return B_OK;
2056 }
2057 
2058 
2059 static status_t
2060 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2061 	bool traverse, bool kernel, VnodePutter& _vnode)
2062 {
2063 	char clonedName[B_FILE_NAME_LENGTH + 1];
2064 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2065 		return B_NAME_TOO_LONG;
2066 
2067 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2068 	struct vnode* directory;
2069 
2070 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2071 	if (status < 0)
2072 		return status;
2073 
2074 	return vnode_path_to_vnode(directory, clonedName, traverse, kernel,
2075 		_vnode, NULL);
2076 }
2077 
2078 
2079 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2080 	and returns the respective vnode.
2081 	On success a reference to the vnode is acquired for the caller.
2082 */
2083 static status_t
2084 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2085 {
2086 	ino_t id;
2087 	bool missing;
2088 
2089 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2090 		return missing ? B_ENTRY_NOT_FOUND
2091 			: get_vnode(dir->device, id, _vnode, true, false);
2092 	}
2093 
2094 	status_t status = FS_CALL(dir, lookup, name, &id);
2095 	if (status != B_OK)
2096 		return status;
2097 
2098 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2099 	// have a reference and just need to look the node up.
2100 	rw_lock_read_lock(&sVnodeLock);
2101 	*_vnode = lookup_vnode(dir->device, id);
2102 	rw_lock_read_unlock(&sVnodeLock);
2103 
2104 	if (*_vnode == NULL) {
2105 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2106 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2107 		return B_ENTRY_NOT_FOUND;
2108 	}
2109 
2110 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2111 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2112 //		(*_vnode)->mount->id, (*_vnode)->id);
2113 
2114 	return B_OK;
2115 }
2116 
2117 
2118 /*!	Returns the vnode for the relative \a path starting at the specified \a vnode.
2119 
2120 	\param[in,out] path The relative path being searched. Must not be NULL.
2121 	If the function returns successfully, \a path contains the name of the last path
2122 	component. This function clobbers the buffer pointed to by \a path only
2123 	if it does contain more than one component.
2124 
2125 	If the function fails and leafName is not NULL, \a _vnode contains the last directory,
2126 	the caller has the responsibility to call put_vnode() on it.
2127 
2128 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2129 	it is successful or not!
2130 
2131 	\param[out] _vnode If the function returns B_OK, points to the found node.
2132 	\param[out] _vnode If the function returns something else and leafname is not NULL: set to the
2133 		last existing directory in the path. The caller has responsibility to release it using
2134 		put_vnode().
2135 	\param[out] _vnode If the function returns something else and leafname is NULL: not used.
2136 */
2137 static status_t
2138 vnode_path_to_vnode(struct vnode* start, char* path, bool traverseLeafLink,
2139 	int count, struct io_context* ioContext, VnodePutter& _vnode,
2140 	ino_t* _parentID, char* leafName)
2141 {
2142 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2143 	ASSERT(!_vnode.IsSet());
2144 
2145 	VnodePutter vnode(start);
2146 
2147 	if (path == NULL)
2148 		return B_BAD_VALUE;
2149 	if (*path == '\0')
2150 		return B_ENTRY_NOT_FOUND;
2151 
2152 	status_t status = B_OK;
2153 	ino_t lastParentID = vnode->id;
2154 	while (true) {
2155 		char* nextPath;
2156 
2157 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2158 			path));
2159 
2160 		// done?
2161 		if (path[0] == '\0')
2162 			break;
2163 
2164 		// walk to find the next path component ("path" will point to a single
2165 		// path component), and filter out multiple slashes
2166 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2167 				nextPath++);
2168 
2169 		bool directoryFound = false;
2170 		if (*nextPath == '/') {
2171 			directoryFound = true;
2172 			*nextPath = '\0';
2173 			do
2174 				nextPath++;
2175 			while (*nextPath == '/');
2176 		}
2177 
2178 		// See if the '..' is at a covering vnode move to the covered
2179 		// vnode so we pass the '..' path to the underlying filesystem.
2180 		// Also prevent breaking the root of the IO context.
2181 		if (strcmp("..", path) == 0) {
2182 			if (vnode.Get() == ioContext->root) {
2183 				// Attempted prison break! Keep it contained.
2184 				path = nextPath;
2185 				continue;
2186 			}
2187 
2188 			if (Vnode* coveredVnode = get_covered_vnode(vnode.Get()))
2189 				vnode.SetTo(coveredVnode);
2190 		}
2191 
2192 		// check if vnode is really a directory
2193 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2194 			status = B_NOT_A_DIRECTORY;
2195 
2196 		// Check if we have the right to search the current directory vnode.
2197 		// If a file system doesn't have the access() function, we assume that
2198 		// searching a directory is always allowed
2199 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2200 			status = FS_CALL(vnode.Get(), access, X_OK);
2201 
2202 		// Tell the filesystem to get the vnode of this path component (if we
2203 		// got the permission from the call above)
2204 		VnodePutter nextVnode;
2205 		if (status == B_OK) {
2206 			struct vnode* temp = NULL;
2207 			status = lookup_dir_entry(vnode.Get(), path, &temp);
2208 			nextVnode.SetTo(temp);
2209 		}
2210 
2211 		if (status != B_OK) {
2212 			if (leafName != NULL) {
2213 				strlcpy(leafName, path, B_FILE_NAME_LENGTH);
2214 				_vnode.SetTo(vnode.Detach());
2215 			}
2216 			return status;
2217 		}
2218 
2219 		// If the new node is a symbolic link, resolve it (if we've been told
2220 		// to do it)
2221 		if (S_ISLNK(nextVnode->Type())
2222 			&& (traverseLeafLink || directoryFound)) {
2223 			size_t bufferSize;
2224 			char* buffer;
2225 
2226 			TRACE(("traverse link\n"));
2227 
2228 			if (count + 1 > B_MAX_SYMLINKS)
2229 				return B_LINK_LIMIT;
2230 
2231 			bufferSize = B_PATH_NAME_LENGTH;
2232 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2233 			if (buffer == NULL)
2234 				return B_NO_MEMORY;
2235 
2236 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2237 				bufferSize--;
2238 				status = FS_CALL(nextVnode.Get(), read_symlink, buffer, &bufferSize);
2239 				// null-terminate
2240 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2241 					buffer[bufferSize] = '\0';
2242 			} else
2243 				status = B_BAD_VALUE;
2244 
2245 			if (status != B_OK) {
2246 				free(buffer);
2247 				return status;
2248 			}
2249 			nextVnode.Unset();
2250 
2251 			// Check if we start from the root directory or the current
2252 			// directory ("vnode" still points to that one).
2253 			// Cut off all leading slashes if it's the root directory
2254 			path = buffer;
2255 			bool absoluteSymlink = false;
2256 			if (path[0] == '/') {
2257 				// we don't need the old directory anymore
2258 				vnode.Unset();
2259 
2260 				while (*++path == '/')
2261 					;
2262 
2263 				mutex_lock(&sIOContextRootLock);
2264 				vnode.SetTo(ioContext->root);
2265 				inc_vnode_ref_count(vnode.Get());
2266 				mutex_unlock(&sIOContextRootLock);
2267 
2268 				absoluteSymlink = true;
2269 			}
2270 
2271 			inc_vnode_ref_count(vnode.Get());
2272 				// balance the next recursion - we will decrement the
2273 				// ref_count of the vnode, no matter if we succeeded or not
2274 
2275 			if (absoluteSymlink && *path == '\0') {
2276 				// symlink was just "/"
2277 				nextVnode.SetTo(vnode.Get());
2278 			} else {
2279 				status = vnode_path_to_vnode(vnode.Get(), path, true, count + 1,
2280 					ioContext, nextVnode, &lastParentID, leafName);
2281 			}
2282 
2283 			object_cache_free(sPathNameCache, buffer, 0);
2284 
2285 			if (status != B_OK) {
2286 				if (leafName != NULL)
2287 					_vnode.SetTo(nextVnode.Detach());
2288 				return status;
2289 			}
2290 		} else
2291 			lastParentID = vnode->id;
2292 
2293 		// decrease the ref count on the old dir we just looked up into
2294 		vnode.Unset();
2295 
2296 		path = nextPath;
2297 		vnode.SetTo(nextVnode.Detach());
2298 
2299 		// see if we hit a covered node
2300 		if (Vnode* coveringNode = get_covering_vnode(vnode.Get()))
2301 			vnode.SetTo(coveringNode);
2302 	}
2303 
2304 	_vnode.SetTo(vnode.Detach());
2305 	if (_parentID)
2306 		*_parentID = lastParentID;
2307 
2308 	return B_OK;
2309 }
2310 
2311 
2312 static status_t
2313 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2314 	bool kernel, VnodePutter& _vnode, ino_t* _parentID, char* leafName)
2315 {
2316 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2317 		get_current_io_context(kernel), _vnode, _parentID, leafName);
2318 }
2319 
2320 
2321 static status_t
2322 path_to_vnode(char* path, bool traverseLink, VnodePutter& _vnode,
2323 	ino_t* _parentID, bool kernel)
2324 {
2325 	struct vnode* start = NULL;
2326 
2327 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2328 
2329 	if (!path)
2330 		return B_BAD_VALUE;
2331 
2332 	if (*path == '\0')
2333 		return B_ENTRY_NOT_FOUND;
2334 
2335 	// figure out if we need to start at root or at cwd
2336 	if (*path == '/') {
2337 		if (sRoot == NULL) {
2338 			// we're a bit early, aren't we?
2339 			return B_ERROR;
2340 		}
2341 
2342 		while (*++path == '/')
2343 			;
2344 		start = get_root_vnode(kernel);
2345 
2346 		if (*path == '\0') {
2347 			_vnode.SetTo(start);
2348 			return B_OK;
2349 		}
2350 
2351 	} else {
2352 		struct io_context* context = get_current_io_context(kernel);
2353 
2354 		mutex_lock(&context->io_mutex);
2355 		start = context->cwd;
2356 		if (start != NULL)
2357 			inc_vnode_ref_count(start);
2358 		mutex_unlock(&context->io_mutex);
2359 
2360 		if (start == NULL)
2361 			return B_ERROR;
2362 	}
2363 
2364 	return vnode_path_to_vnode(start, path, traverseLink, kernel, _vnode,
2365 		_parentID);
2366 }
2367 
2368 
2369 /*! Returns the vnode in the next to last segment of the path, and returns
2370 	the last portion in filename.
2371 	The path buffer must be able to store at least one additional character.
2372 */
2373 static status_t
2374 path_to_dir_vnode(char* path, VnodePutter& _vnode, char* filename,
2375 	bool kernel)
2376 {
2377 	status_t status = get_dir_path_and_leaf(path, filename);
2378 	if (status != B_OK)
2379 		return status;
2380 
2381 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2382 }
2383 
2384 
2385 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2386 		   to by a FD + path pair.
2387 
2388 	\a path must be given in either case. \a fd might be omitted, in which
2389 	case \a path is either an absolute path or one relative to the current
2390 	directory. If both a supplied and \a path is relative it is reckoned off
2391 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2392 	ignored.
2393 
2394 	The caller has the responsibility to call put_vnode() on the returned
2395 	directory vnode.
2396 
2397 	\param fd The FD. May be < 0.
2398 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2399 	       is modified by this function. It must have at least room for a
2400 	       string one character longer than the path it contains.
2401 	\param _vnode A pointer to a variable the directory vnode shall be written
2402 		   into.
2403 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2404 		   the leaf name of the specified entry will be written.
2405 	\param kernel \c true, if invoked from inside the kernel, \c false if
2406 		   invoked from userland.
2407 	\return \c B_OK, if everything went fine, another error code otherwise.
2408 */
2409 static status_t
2410 fd_and_path_to_dir_vnode(int fd, char* path, VnodePutter& _vnode,
2411 	char* filename, bool kernel)
2412 {
2413 	if (!path)
2414 		return B_BAD_VALUE;
2415 	if (*path == '\0')
2416 		return B_ENTRY_NOT_FOUND;
2417 	if (fd == AT_FDCWD || fd == -1 || *path == '/')
2418 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2419 
2420 	status_t status = get_dir_path_and_leaf(path, filename);
2421 	if (status != B_OK)
2422 		return status;
2423 
2424 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2425 }
2426 
2427 
2428 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2429 		   to by a vnode + path pair.
2430 
2431 	\a path must be given in either case. \a vnode might be omitted, in which
2432 	case \a path is either an absolute path or one relative to the current
2433 	directory. If both a supplied and \a path is relative it is reckoned off
2434 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2435 	ignored.
2436 
2437 	The caller has the responsibility to call put_vnode() on the returned
2438 	directory vnode.
2439 
2440 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2441 	it is successful or not.
2442 
2443 	\param vnode The vnode. May be \c NULL.
2444 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2445 	       is modified by this function. It must have at least room for a
2446 	       string one character longer than the path it contains.
2447 	\param _vnode A pointer to a variable the directory vnode shall be written
2448 		   into.
2449 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2450 		   the leaf name of the specified entry will be written.
2451 	\param kernel \c true, if invoked from inside the kernel, \c false if
2452 		   invoked from userland.
2453 	\return \c B_OK, if everything went fine, another error code otherwise.
2454 */
2455 static status_t
2456 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2457 	VnodePutter& _vnode, char* filename, bool kernel)
2458 {
2459 	VnodePutter vnodePutter(vnode);
2460 
2461 	if (!path)
2462 		return B_BAD_VALUE;
2463 	if (*path == '\0')
2464 		return B_ENTRY_NOT_FOUND;
2465 	if (vnode == NULL || path[0] == '/')
2466 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2467 
2468 	status_t status = get_dir_path_and_leaf(path, filename);
2469 	if (status != B_OK)
2470 		return status;
2471 
2472 	vnodePutter.Detach();
2473 	return vnode_path_to_vnode(vnode, path, true, kernel, _vnode, NULL);
2474 }
2475 
2476 
2477 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2478 */
2479 static status_t
2480 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2481 	size_t bufferSize, struct io_context* ioContext)
2482 {
2483 	if (bufferSize < sizeof(struct dirent))
2484 		return B_BAD_VALUE;
2485 
2486 	// See if the vnode is covering another vnode and move to the covered
2487 	// vnode so we get the underlying file system
2488 	VnodePutter vnodePutter;
2489 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2490 		vnode = coveredVnode;
2491 		vnodePutter.SetTo(vnode);
2492 	}
2493 
2494 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2495 		// The FS supports getting the name of a vnode.
2496 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2497 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2498 			return B_OK;
2499 	}
2500 
2501 	// The FS doesn't support getting the name of a vnode. So we search the
2502 	// parent directory for the vnode, if the caller let us.
2503 
2504 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2505 		return B_UNSUPPORTED;
2506 
2507 	void* cookie;
2508 
2509 	status_t status = FS_CALL(parent, open_dir, &cookie);
2510 	if (status >= B_OK) {
2511 		while (true) {
2512 			uint32 num = 1;
2513 			// We use the FS hook directly instead of dir_read(), since we don't
2514 			// want the entries to be fixed. We have already resolved vnode to
2515 			// the covered node.
2516 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2517 				&num);
2518 			if (status != B_OK)
2519 				break;
2520 			if (num == 0) {
2521 				status = B_ENTRY_NOT_FOUND;
2522 				break;
2523 			}
2524 
2525 			if (vnode->id == buffer->d_ino) {
2526 				// found correct entry!
2527 				break;
2528 			}
2529 		}
2530 
2531 		FS_CALL(parent, close_dir, cookie);
2532 		FS_CALL(parent, free_dir_cookie, cookie);
2533 	}
2534 	return status;
2535 }
2536 
2537 
2538 static status_t
2539 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2540 	size_t nameSize, bool kernel)
2541 {
2542 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2543 	struct dirent* dirent = (struct dirent*)buffer;
2544 
2545 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2546 		get_current_io_context(kernel));
2547 	if (status != B_OK)
2548 		return status;
2549 
2550 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2551 		return B_BUFFER_OVERFLOW;
2552 
2553 	return B_OK;
2554 }
2555 
2556 
2557 /*!	Gets the full path to a given directory vnode.
2558 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2559 	file system doesn't support this call, it will fall back to iterating
2560 	through the parent directory to get the name of the child.
2561 
2562 	To protect against circular loops, it supports a maximum tree depth
2563 	of 256 levels.
2564 
2565 	Note that the path may not be correct the time this function returns!
2566 	It doesn't use any locking to prevent returning the correct path, as
2567 	paths aren't safe anyway: the path to a file can change at any time.
2568 
2569 	It might be a good idea, though, to check if the returned path exists
2570 	in the calling function (it's not done here because of efficiency)
2571 */
2572 static status_t
2573 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2574 	bool kernel)
2575 {
2576 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2577 
2578 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2579 		return B_BAD_VALUE;
2580 
2581 	if (!S_ISDIR(vnode->Type()))
2582 		return B_NOT_A_DIRECTORY;
2583 
2584 	char* path = buffer;
2585 	int32 insert = bufferSize;
2586 	int32 maxLevel = 256;
2587 	int32 length;
2588 	status_t status = B_OK;
2589 	struct io_context* ioContext = get_current_io_context(kernel);
2590 
2591 	// we don't use get_vnode() here because this call is more
2592 	// efficient and does all we need from get_vnode()
2593 	inc_vnode_ref_count(vnode);
2594 
2595 	path[--insert] = '\0';
2596 		// the path is filled right to left
2597 
2598 	while (true) {
2599 		// If the node is the context's root, bail out. Otherwise resolve mount
2600 		// points.
2601 		if (vnode == ioContext->root)
2602 			break;
2603 
2604 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2605 			put_vnode(vnode);
2606 			vnode = coveredVnode;
2607 		}
2608 
2609 		// lookup the parent vnode
2610 		struct vnode* parentVnode;
2611 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2612 		if (status != B_OK)
2613 			goto out;
2614 
2615 		if (parentVnode == vnode) {
2616 			// The caller apparently got their hands on a node outside of their
2617 			// context's root. Now we've hit the global root.
2618 			put_vnode(parentVnode);
2619 			break;
2620 		}
2621 
2622 		// get the node's name
2623 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2624 			// also used for fs_read_dir()
2625 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2626 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2627 			sizeof(nameBuffer), ioContext);
2628 
2629 		// release the current vnode, we only need its parent from now on
2630 		put_vnode(vnode);
2631 		vnode = parentVnode;
2632 
2633 		if (status != B_OK)
2634 			goto out;
2635 
2636 		// TODO: add an explicit check for loops in about 10 levels to do
2637 		// real loop detection
2638 
2639 		// don't go deeper as 'maxLevel' to prevent circular loops
2640 		if (maxLevel-- < 0) {
2641 			status = B_LINK_LIMIT;
2642 			goto out;
2643 		}
2644 
2645 		// add the name in front of the current path
2646 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2647 		length = strlen(name);
2648 		insert -= length;
2649 		if (insert <= 0) {
2650 			status = B_RESULT_NOT_REPRESENTABLE;
2651 			goto out;
2652 		}
2653 		memcpy(path + insert, name, length);
2654 		path[--insert] = '/';
2655 	}
2656 
2657 	// the root dir will result in an empty path: fix it
2658 	if (path[insert] == '\0')
2659 		path[--insert] = '/';
2660 
2661 	TRACE(("  path is: %s\n", path + insert));
2662 
2663 	// move the path to the start of the buffer
2664 	length = bufferSize - insert;
2665 	memmove(buffer, path + insert, length);
2666 
2667 out:
2668 	put_vnode(vnode);
2669 	return status;
2670 }
2671 
2672 
2673 /*!	Checks the length of every path component, and adds a '.'
2674 	if the path ends in a slash.
2675 	The given path buffer must be able to store at least one
2676 	additional character.
2677 */
2678 static status_t
2679 check_path(char* to)
2680 {
2681 	int32 length = 0;
2682 
2683 	// check length of every path component
2684 
2685 	while (*to) {
2686 		char* begin;
2687 		if (*to == '/')
2688 			to++, length++;
2689 
2690 		begin = to;
2691 		while (*to != '/' && *to)
2692 			to++, length++;
2693 
2694 		if (to - begin > B_FILE_NAME_LENGTH)
2695 			return B_NAME_TOO_LONG;
2696 	}
2697 
2698 	if (length == 0)
2699 		return B_ENTRY_NOT_FOUND;
2700 
2701 	// complete path if there is a slash at the end
2702 
2703 	if (*(to - 1) == '/') {
2704 		if (length > B_PATH_NAME_LENGTH - 2)
2705 			return B_NAME_TOO_LONG;
2706 
2707 		to[0] = '.';
2708 		to[1] = '\0';
2709 	}
2710 
2711 	return B_OK;
2712 }
2713 
2714 
2715 static struct file_descriptor*
2716 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2717 {
2718 	struct file_descriptor* descriptor
2719 		= get_fd(get_current_io_context(kernel), fd);
2720 	if (descriptor == NULL)
2721 		return NULL;
2722 
2723 	struct vnode* vnode = fd_vnode(descriptor);
2724 	if (vnode == NULL) {
2725 		put_fd(descriptor);
2726 		return NULL;
2727 	}
2728 
2729 	// ToDo: when we can close a file descriptor at any point, investigate
2730 	//	if this is still valid to do (accessing the vnode without ref_count
2731 	//	or locking)
2732 	*_vnode = vnode;
2733 	return descriptor;
2734 }
2735 
2736 
2737 static struct vnode*
2738 get_vnode_from_fd(int fd, bool kernel)
2739 {
2740 	struct file_descriptor* descriptor;
2741 	struct vnode* vnode;
2742 
2743 	descriptor = get_fd(get_current_io_context(kernel), fd);
2744 	if (descriptor == NULL)
2745 		return NULL;
2746 
2747 	vnode = fd_vnode(descriptor);
2748 	if (vnode != NULL)
2749 		inc_vnode_ref_count(vnode);
2750 
2751 	put_fd(descriptor);
2752 	return vnode;
2753 }
2754 
2755 
2756 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2757 	only the path will be considered. In this case, the \a path must not be
2758 	NULL.
2759 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2760 	and should be NULL for files.
2761 */
2762 static status_t
2763 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2764 	VnodePutter& _vnode, ino_t* _parentID, bool kernel)
2765 {
2766 	if (fd < 0 && !path)
2767 		return B_BAD_VALUE;
2768 
2769 	if (path != NULL && *path == '\0')
2770 		return B_ENTRY_NOT_FOUND;
2771 
2772 	if ((fd == AT_FDCWD || fd == -1) || (path != NULL && path[0] == '/')) {
2773 		// no FD or absolute path
2774 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2775 	}
2776 
2777 	// FD only, or FD + relative path
2778 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2779 	if (vnode == NULL)
2780 		return B_FILE_ERROR;
2781 
2782 	if (path != NULL) {
2783 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, kernel,
2784 			_vnode, _parentID);
2785 	}
2786 
2787 	// there is no relative path to take into account
2788 
2789 	_vnode.SetTo(vnode);
2790 	if (_parentID)
2791 		*_parentID = -1;
2792 
2793 	return B_OK;
2794 }
2795 
2796 
2797 struct vnode*
2798 fd_vnode(struct file_descriptor* descriptor)
2799 {
2800 	if (descriptor->ops == &sFileOps
2801 			|| descriptor->ops == &sDirectoryOps
2802 			|| descriptor->ops == &sAttributeOps
2803 			|| descriptor->ops == &sAttributeDirectoryOps)
2804 		return descriptor->u.vnode;
2805 
2806 	return NULL;
2807 }
2808 
2809 
2810 bool
2811 fd_is_file(struct file_descriptor* descriptor)
2812 {
2813 	return descriptor->ops == &sFileOps;
2814 }
2815 
2816 
2817 static int
2818 get_new_fd(struct fd_ops* ops, struct fs_mount* mount, struct vnode* vnode,
2819 	void* cookie, int openMode, bool kernel)
2820 {
2821 	struct file_descriptor* descriptor;
2822 	int fd;
2823 
2824 	// If the vnode is locked, we don't allow creating a new file/directory
2825 	// file_descriptor for it
2826 	if (vnode && vnode->mandatory_locked_by != NULL
2827 		&& (ops == &sFileOps || ops == &sDirectoryOps))
2828 		return B_BUSY;
2829 
2830 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2831 		return B_BAD_VALUE;
2832 
2833 	descriptor = alloc_fd();
2834 	if (!descriptor)
2835 		return B_NO_MEMORY;
2836 
2837 	if (vnode)
2838 		descriptor->u.vnode = vnode;
2839 	else
2840 		descriptor->u.mount = mount;
2841 	descriptor->cookie = cookie;
2842 
2843 	descriptor->ops = ops;
2844 	descriptor->open_mode = openMode;
2845 
2846 	if (descriptor->ops->fd_seek != NULL) {
2847 		// some kinds of files are not seekable
2848 		switch (vnode->Type() & S_IFMT) {
2849 			case S_IFIFO:
2850 			case S_IFSOCK:
2851 				ASSERT(descriptor->pos == -1);
2852 				break;
2853 
2854 			// The Open Group Base Specs don't mention any file types besides pipes,
2855 			// FIFOs, and sockets specially, so we allow seeking all others.
2856 			default:
2857 				descriptor->pos = 0;
2858 				break;
2859 		}
2860 	}
2861 
2862 	io_context* context = get_current_io_context(kernel);
2863 	fd = new_fd(context, descriptor);
2864 	if (fd < 0) {
2865 		descriptor->ops = NULL;
2866 		put_fd(descriptor);
2867 		return B_NO_MORE_FDS;
2868 	}
2869 
2870 	mutex_lock(&context->io_mutex);
2871 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2872 	mutex_unlock(&context->io_mutex);
2873 
2874 	return fd;
2875 }
2876 
2877 
2878 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2879 	vfs_normalize_path(). See there for more documentation.
2880 */
2881 static status_t
2882 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2883 {
2884 	VnodePutter dir;
2885 	status_t error;
2886 
2887 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2888 		// get dir vnode + leaf name
2889 		char leaf[B_FILE_NAME_LENGTH];
2890 		error = vnode_and_path_to_dir_vnode(dir.Detach(), path, dir, leaf, kernel);
2891 		if (error != B_OK)
2892 			return error;
2893 		strcpy(path, leaf);
2894 
2895 		// get file vnode, if we shall resolve links
2896 		bool fileExists = false;
2897 		VnodePutter fileVnode;
2898 		if (traverseLink) {
2899 			inc_vnode_ref_count(dir.Get());
2900 			if (vnode_path_to_vnode(dir.Get(), path, false, kernel, fileVnode,
2901 					NULL) == B_OK) {
2902 				fileExists = true;
2903 			}
2904 		}
2905 
2906 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2907 			// we're done -- construct the path
2908 			bool hasLeaf = true;
2909 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2910 				// special cases "." and ".." -- get the dir, forget the leaf
2911 				error = vnode_path_to_vnode(dir.Detach(), leaf, false, kernel,
2912 					dir, NULL);
2913 				if (error != B_OK)
2914 					return error;
2915 				hasLeaf = false;
2916 			}
2917 
2918 			// get the directory path
2919 			error = dir_vnode_to_path(dir.Get(), path, B_PATH_NAME_LENGTH, kernel);
2920 			if (error != B_OK)
2921 				return error;
2922 
2923 			// append the leaf name
2924 			if (hasLeaf) {
2925 				// insert a directory separator if this is not the file system
2926 				// root
2927 				if ((strcmp(path, "/") != 0
2928 					&& strlcat(path, "/", pathSize) >= pathSize)
2929 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2930 					return B_NAME_TOO_LONG;
2931 				}
2932 			}
2933 
2934 			return B_OK;
2935 		}
2936 
2937 		// read link
2938 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2939 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2940 			error = FS_CALL(fileVnode.Get(), read_symlink, path, &bufferSize);
2941 			if (error != B_OK)
2942 				return error;
2943 			if (bufferSize < B_PATH_NAME_LENGTH)
2944 				path[bufferSize] = '\0';
2945 		} else
2946 			return B_BAD_VALUE;
2947 	}
2948 
2949 	return B_LINK_LIMIT;
2950 }
2951 
2952 
2953 static status_t
2954 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2955 	struct io_context* ioContext)
2956 {
2957 	// Make sure the IO context root is not bypassed.
2958 	if (parent == ioContext->root) {
2959 		*_device = parent->device;
2960 		*_node = parent->id;
2961 		return B_OK;
2962 	}
2963 
2964 	inc_vnode_ref_count(parent);
2965 		// vnode_path_to_vnode() puts the node
2966 
2967 	// ".." is guaranteed not to be clobbered by this call
2968 	VnodePutter vnode;
2969 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false,
2970 		ioContext, vnode, NULL);
2971 	if (status == B_OK) {
2972 		*_device = vnode->device;
2973 		*_node = vnode->id;
2974 	}
2975 
2976 	return status;
2977 }
2978 
2979 
2980 #ifdef ADD_DEBUGGER_COMMANDS
2981 
2982 
2983 static void
2984 _dump_advisory_locking(advisory_locking* locking)
2985 {
2986 	if (locking == NULL)
2987 		return;
2988 
2989 	kprintf("   lock:        %" B_PRId32, locking->lock);
2990 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2991 
2992 	int32 index = 0;
2993 	LockList::Iterator iterator = locking->locks.GetIterator();
2994 	while (iterator.HasNext()) {
2995 		struct advisory_lock* lock = iterator.Next();
2996 
2997 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2998 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2999 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3000 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3001 	}
3002 }
3003 
3004 
3005 static void
3006 _dump_mount(struct fs_mount* mount)
3007 {
3008 	kprintf("MOUNT: %p\n", mount);
3009 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3010 	kprintf(" device_name:   %s\n", mount->device_name);
3011 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3012 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3013 	kprintf(" partition:     %p\n", mount->partition);
3014 	kprintf(" lock:          %p\n", &mount->lock);
3015 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3016 		mount->owns_file_device ? " owns_file_device" : "");
3017 
3018 	fs_volume* volume = mount->volume;
3019 	while (volume != NULL) {
3020 		kprintf(" volume %p:\n", volume);
3021 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3022 		kprintf("  private_volume:   %p\n", volume->private_volume);
3023 		kprintf("  ops:              %p\n", volume->ops);
3024 		kprintf("  file_system:      %p\n", volume->file_system);
3025 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3026 		volume = volume->super_volume;
3027 	}
3028 
3029 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3030 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3031 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3032 	set_debug_variable("_partition", (addr_t)mount->partition);
3033 }
3034 
3035 
3036 static bool
3037 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3038 	const char* name)
3039 {
3040 	bool insertSlash = buffer[bufferSize] != '\0';
3041 	size_t nameLength = strlen(name);
3042 
3043 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3044 		return false;
3045 
3046 	if (insertSlash)
3047 		buffer[--bufferSize] = '/';
3048 
3049 	bufferSize -= nameLength;
3050 	memcpy(buffer + bufferSize, name, nameLength);
3051 
3052 	return true;
3053 }
3054 
3055 
3056 static bool
3057 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3058 	ino_t nodeID)
3059 {
3060 	if (bufferSize == 0)
3061 		return false;
3062 
3063 	bool insertSlash = buffer[bufferSize] != '\0';
3064 	if (insertSlash)
3065 		buffer[--bufferSize] = '/';
3066 
3067 	size_t size = snprintf(buffer, bufferSize,
3068 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3069 	if (size > bufferSize) {
3070 		if (insertSlash)
3071 			bufferSize++;
3072 		return false;
3073 	}
3074 
3075 	if (size < bufferSize)
3076 		memmove(buffer + bufferSize - size, buffer, size);
3077 
3078 	bufferSize -= size;
3079 	return true;
3080 }
3081 
3082 
3083 static char*
3084 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3085 	bool& _truncated)
3086 {
3087 	// null-terminate the path
3088 	buffer[--bufferSize] = '\0';
3089 
3090 	while (true) {
3091 		while (vnode->covers != NULL)
3092 			vnode = vnode->covers;
3093 
3094 		if (vnode == sRoot) {
3095 			_truncated = bufferSize == 0;
3096 			if (!_truncated)
3097 				buffer[--bufferSize] = '/';
3098 			return buffer + bufferSize;
3099 		}
3100 
3101 		// resolve the name
3102 		ino_t dirID;
3103 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3104 			vnode->id, dirID);
3105 		if (name == NULL) {
3106 			// Failed to resolve the name -- prepend "<dev,node>/".
3107 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3108 				vnode->mount->id, vnode->id);
3109 			return buffer + bufferSize;
3110 		}
3111 
3112 		// prepend the name
3113 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3114 			_truncated = true;
3115 			return buffer + bufferSize;
3116 		}
3117 
3118 		// resolve the directory node
3119 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3120 		if (nextVnode == NULL) {
3121 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3122 				vnode->mount->id, dirID);
3123 			return buffer + bufferSize;
3124 		}
3125 
3126 		vnode = nextVnode;
3127 	}
3128 }
3129 
3130 
3131 static void
3132 _dump_vnode(struct vnode* vnode, bool printPath)
3133 {
3134 	kprintf("VNODE: %p\n", vnode);
3135 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3136 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3137 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3138 	kprintf(" private_node:  %p\n", vnode->private_node);
3139 	kprintf(" mount:         %p\n", vnode->mount);
3140 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3141 	kprintf(" covers:        %p\n", vnode->covers);
3142 	kprintf(" cache:         %p\n", vnode->cache);
3143 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3144 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3145 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3146 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3147 
3148 	_dump_advisory_locking(vnode->advisory_locking);
3149 
3150 	if (printPath) {
3151 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3152 		if (buffer != NULL) {
3153 			bool truncated;
3154 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3155 				B_PATH_NAME_LENGTH, truncated);
3156 			if (path != NULL) {
3157 				kprintf(" path:          ");
3158 				if (truncated)
3159 					kputs("<truncated>/");
3160 				kputs(path);
3161 				kputs("\n");
3162 			} else
3163 				kprintf("Failed to resolve vnode path.\n");
3164 
3165 			debug_free(buffer);
3166 		} else
3167 			kprintf("Failed to allocate memory for constructing the path.\n");
3168 	}
3169 
3170 	set_debug_variable("_node", (addr_t)vnode->private_node);
3171 	set_debug_variable("_mount", (addr_t)vnode->mount);
3172 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3173 	set_debug_variable("_covers", (addr_t)vnode->covers);
3174 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3175 }
3176 
3177 
3178 static int
3179 dump_mount(int argc, char** argv)
3180 {
3181 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3182 		kprintf("usage: %s [id|address]\n", argv[0]);
3183 		return 0;
3184 	}
3185 
3186 	ulong val = parse_expression(argv[1]);
3187 	uint32 id = val;
3188 
3189 	struct fs_mount* mount = sMountsTable->Lookup(id);
3190 	if (mount == NULL) {
3191 		if (IS_USER_ADDRESS(id)) {
3192 			kprintf("fs_mount not found\n");
3193 			return 0;
3194 		}
3195 		mount = (fs_mount*)val;
3196 	}
3197 
3198 	_dump_mount(mount);
3199 	return 0;
3200 }
3201 
3202 
3203 static int
3204 dump_mounts(int argc, char** argv)
3205 {
3206 	if (argc != 1) {
3207 		kprintf("usage: %s\n", argv[0]);
3208 		return 0;
3209 	}
3210 
3211 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3212 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3213 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3214 
3215 	struct fs_mount* mount;
3216 
3217 	MountTable::Iterator iterator(sMountsTable);
3218 	while (iterator.HasNext()) {
3219 		mount = iterator.Next();
3220 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3221 			mount->root_vnode->covers, mount->volume->private_volume,
3222 			mount->volume->file_system_name);
3223 
3224 		fs_volume* volume = mount->volume;
3225 		while (volume->super_volume != NULL) {
3226 			volume = volume->super_volume;
3227 			kprintf("                                     %p %s\n",
3228 				volume->private_volume, volume->file_system_name);
3229 		}
3230 	}
3231 
3232 	return 0;
3233 }
3234 
3235 
3236 static int
3237 dump_vnode(int argc, char** argv)
3238 {
3239 	bool printPath = false;
3240 	int argi = 1;
3241 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3242 		printPath = true;
3243 		argi++;
3244 	}
3245 
3246 	if (argi >= argc || argi + 2 < argc || strcmp(argv[argi], "--help") == 0) {
3247 		print_debugger_command_usage(argv[0]);
3248 		return 0;
3249 	}
3250 
3251 	struct vnode* vnode = NULL;
3252 
3253 	if (argi + 1 == argc) {
3254 		vnode = (struct vnode*)parse_expression(argv[argi]);
3255 		if (IS_USER_ADDRESS(vnode)) {
3256 			kprintf("invalid vnode address\n");
3257 			return 0;
3258 		}
3259 		_dump_vnode(vnode, printPath);
3260 		return 0;
3261 	}
3262 
3263 	dev_t device = parse_expression(argv[argi]);
3264 	ino_t id = parse_expression(argv[argi + 1]);
3265 
3266 	VnodeTable::Iterator iterator(sVnodeTable);
3267 	while (iterator.HasNext()) {
3268 		vnode = iterator.Next();
3269 		if (vnode->id != id || vnode->device != device)
3270 			continue;
3271 
3272 		_dump_vnode(vnode, printPath);
3273 	}
3274 
3275 	return 0;
3276 }
3277 
3278 
3279 static int
3280 dump_vnodes(int argc, char** argv)
3281 {
3282 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3283 		kprintf("usage: %s [device]\n", argv[0]);
3284 		return 0;
3285 	}
3286 
3287 	// restrict dumped nodes to a certain device if requested
3288 	dev_t device = parse_expression(argv[1]);
3289 
3290 	struct vnode* vnode;
3291 
3292 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3293 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3294 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3295 
3296 	VnodeTable::Iterator iterator(sVnodeTable);
3297 	while (iterator.HasNext()) {
3298 		vnode = iterator.Next();
3299 		if (vnode->device != device)
3300 			continue;
3301 
3302 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3303 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3304 			vnode->private_node, vnode->advisory_locking,
3305 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3306 			vnode->IsUnpublished() ? "u" : "-");
3307 	}
3308 
3309 	return 0;
3310 }
3311 
3312 
3313 static int
3314 dump_vnode_caches(int argc, char** argv)
3315 {
3316 	struct vnode* vnode;
3317 
3318 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3319 		kprintf("usage: %s [device]\n", argv[0]);
3320 		return 0;
3321 	}
3322 
3323 	// restrict dumped nodes to a certain device if requested
3324 	dev_t device = -1;
3325 	if (argc > 1)
3326 		device = parse_expression(argv[1]);
3327 
3328 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3329 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3330 
3331 	VnodeTable::Iterator iterator(sVnodeTable);
3332 	while (iterator.HasNext()) {
3333 		vnode = iterator.Next();
3334 		if (vnode->cache == NULL)
3335 			continue;
3336 		if (device != -1 && vnode->device != device)
3337 			continue;
3338 
3339 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3340 			vnode, vnode->device, vnode->id, vnode->cache,
3341 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3342 			vnode->cache->page_count);
3343 	}
3344 
3345 	return 0;
3346 }
3347 
3348 
3349 int
3350 dump_io_context(int argc, char** argv)
3351 {
3352 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3353 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3354 		return 0;
3355 	}
3356 
3357 	struct io_context* context = NULL;
3358 
3359 	if (argc > 1) {
3360 		ulong num = parse_expression(argv[1]);
3361 		if (IS_KERNEL_ADDRESS(num))
3362 			context = (struct io_context*)num;
3363 		else {
3364 			Team* team = team_get_team_struct_locked(num);
3365 			if (team == NULL) {
3366 				kprintf("could not find team with ID %lu\n", num);
3367 				return 0;
3368 			}
3369 			context = (struct io_context*)team->io_context;
3370 		}
3371 	} else
3372 		context = get_current_io_context(true);
3373 
3374 	kprintf("I/O CONTEXT: %p\n", context);
3375 	kprintf(" root vnode:\t%p\n", context->root);
3376 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3377 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3378 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3379 
3380 	if (context->num_used_fds) {
3381 		kprintf("   no.    %*s  ref  open  mode         pos    %*s\n",
3382 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3383 	}
3384 
3385 	for (uint32 i = 0; i < context->table_size; i++) {
3386 		struct file_descriptor* fd = context->fds[i];
3387 		if (fd == NULL)
3388 			continue;
3389 
3390 		kprintf("  %3" B_PRIu32 ":  %p  %3" B_PRId32 "  %4"
3391 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3392 			fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3393 			fd->pos, fd->cookie,
3394 			(fd_vnode(fd) != NULL) ? "vnode" : "mount",
3395 			fd->u.vnode);
3396 	}
3397 
3398 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3399 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3400 
3401 	set_debug_variable("_cwd", (addr_t)context->cwd);
3402 
3403 	return 0;
3404 }
3405 
3406 
3407 int
3408 dump_vnode_usage(int argc, char** argv)
3409 {
3410 	if (argc != 1) {
3411 		kprintf("usage: %s\n", argv[0]);
3412 		return 0;
3413 	}
3414 
3415 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3416 		sUnusedVnodes, kMaxUnusedVnodes);
3417 
3418 	uint32 count = sVnodeTable->CountElements();
3419 
3420 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3421 		count - sUnusedVnodes);
3422 	return 0;
3423 }
3424 
3425 #endif	// ADD_DEBUGGER_COMMANDS
3426 
3427 
3428 /*!	Clears memory specified by an iovec array.
3429 */
3430 static void
3431 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3432 {
3433 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3434 		size_t length = std::min(vecs[i].iov_len, bytes);
3435 		memset(vecs[i].iov_base, 0, length);
3436 		bytes -= length;
3437 	}
3438 }
3439 
3440 
3441 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3442 	and calls the file system hooks to read/write the request to disk.
3443 */
3444 static status_t
3445 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3446 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3447 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3448 	bool doWrite)
3449 {
3450 	if (fileVecCount == 0) {
3451 		// There are no file vecs at this offset, so we're obviously trying
3452 		// to access the file outside of its bounds
3453 		return B_BAD_VALUE;
3454 	}
3455 
3456 	size_t numBytes = *_numBytes;
3457 	uint32 fileVecIndex;
3458 	size_t vecOffset = *_vecOffset;
3459 	uint32 vecIndex = *_vecIndex;
3460 	status_t status;
3461 	size_t size;
3462 
3463 	if (!doWrite && vecOffset == 0) {
3464 		// now directly read the data from the device
3465 		// the first file_io_vec can be read directly
3466 		// TODO: we could also write directly
3467 
3468 		if (fileVecs[0].length < (off_t)numBytes)
3469 			size = fileVecs[0].length;
3470 		else
3471 			size = numBytes;
3472 
3473 		if (fileVecs[0].offset >= 0) {
3474 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3475 				&vecs[vecIndex], vecCount - vecIndex, &size);
3476 		} else {
3477 			// sparse read
3478 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3479 			status = B_OK;
3480 		}
3481 		if (status != B_OK)
3482 			return status;
3483 
3484 		ASSERT((off_t)size <= fileVecs[0].length);
3485 
3486 		// If the file portion was contiguous, we're already done now
3487 		if (size == numBytes)
3488 			return B_OK;
3489 
3490 		// if we reached the end of the file, we can return as well
3491 		if ((off_t)size != fileVecs[0].length) {
3492 			*_numBytes = size;
3493 			return B_OK;
3494 		}
3495 
3496 		fileVecIndex = 1;
3497 
3498 		// first, find out where we have to continue in our iovecs
3499 		for (; vecIndex < vecCount; vecIndex++) {
3500 			if (size < vecs[vecIndex].iov_len)
3501 				break;
3502 
3503 			size -= vecs[vecIndex].iov_len;
3504 		}
3505 
3506 		vecOffset = size;
3507 	} else {
3508 		fileVecIndex = 0;
3509 		size = 0;
3510 	}
3511 
3512 	// Too bad, let's process the rest of the file_io_vecs
3513 
3514 	size_t totalSize = size;
3515 	size_t bytesLeft = numBytes - size;
3516 
3517 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3518 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3519 		off_t fileOffset = fileVec.offset;
3520 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3521 
3522 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3523 			fileLeft));
3524 
3525 		// process the complete fileVec
3526 		while (fileLeft > 0) {
3527 			iovec tempVecs[MAX_TEMP_IO_VECS];
3528 			uint32 tempCount = 0;
3529 
3530 			// size tracks how much of what is left of the current fileVec
3531 			// (fileLeft) has been assigned to tempVecs
3532 			size = 0;
3533 
3534 			// assign what is left of the current fileVec to the tempVecs
3535 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3536 					&& tempCount < MAX_TEMP_IO_VECS;) {
3537 				// try to satisfy one iovec per iteration (or as much as
3538 				// possible)
3539 
3540 				// bytes left of the current iovec
3541 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3542 				if (vecLeft == 0) {
3543 					vecOffset = 0;
3544 					vecIndex++;
3545 					continue;
3546 				}
3547 
3548 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3549 					vecIndex, vecOffset, size));
3550 
3551 				// actually available bytes
3552 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3553 
3554 				tempVecs[tempCount].iov_base
3555 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3556 				tempVecs[tempCount].iov_len = tempVecSize;
3557 				tempCount++;
3558 
3559 				size += tempVecSize;
3560 				vecOffset += tempVecSize;
3561 			}
3562 
3563 			size_t bytes = size;
3564 
3565 			if (fileOffset == -1) {
3566 				if (doWrite) {
3567 					panic("sparse write attempt: vnode %p", vnode);
3568 					status = B_IO_ERROR;
3569 				} else {
3570 					// sparse read
3571 					zero_iovecs(tempVecs, tempCount, bytes);
3572 					status = B_OK;
3573 				}
3574 			} else if (doWrite) {
3575 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3576 					tempVecs, tempCount, &bytes);
3577 			} else {
3578 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3579 					tempVecs, tempCount, &bytes);
3580 			}
3581 			if (status != B_OK)
3582 				return status;
3583 
3584 			totalSize += bytes;
3585 			bytesLeft -= size;
3586 			if (fileOffset >= 0)
3587 				fileOffset += size;
3588 			fileLeft -= size;
3589 			//dprintf("-> file left = %Lu\n", fileLeft);
3590 
3591 			if (size != bytes || vecIndex >= vecCount) {
3592 				// there are no more bytes or iovecs, let's bail out
3593 				*_numBytes = totalSize;
3594 				return B_OK;
3595 			}
3596 		}
3597 	}
3598 
3599 	*_vecIndex = vecIndex;
3600 	*_vecOffset = vecOffset;
3601 	*_numBytes = totalSize;
3602 	return B_OK;
3603 }
3604 
3605 
3606 static bool
3607 is_user_in_group(gid_t gid)
3608 {
3609 	if (gid == getegid())
3610 		return true;
3611 
3612 	gid_t groups[NGROUPS_MAX];
3613 	int groupCount = getgroups(NGROUPS_MAX, groups);
3614 	for (int i = 0; i < groupCount; i++) {
3615 		if (gid == groups[i])
3616 			return true;
3617 	}
3618 
3619 	return false;
3620 }
3621 
3622 
3623 static status_t
3624 free_io_context(io_context* context)
3625 {
3626 	uint32 i;
3627 
3628 	TIOC(FreeIOContext(context));
3629 
3630 	if (context->root)
3631 		put_vnode(context->root);
3632 
3633 	if (context->cwd)
3634 		put_vnode(context->cwd);
3635 
3636 	mutex_lock(&context->io_mutex);
3637 
3638 	for (i = 0; i < context->table_size; i++) {
3639 		if (struct file_descriptor* descriptor = context->fds[i]) {
3640 			close_fd(context, descriptor);
3641 			put_fd(descriptor);
3642 		}
3643 	}
3644 
3645 	mutex_destroy(&context->io_mutex);
3646 
3647 	remove_node_monitors(context);
3648 	free(context->fds);
3649 	free(context);
3650 
3651 	return B_OK;
3652 }
3653 
3654 
3655 static status_t
3656 resize_monitor_table(struct io_context* context, const int newSize)
3657 {
3658 	int	status = B_OK;
3659 
3660 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3661 		return B_BAD_VALUE;
3662 
3663 	mutex_lock(&context->io_mutex);
3664 
3665 	if ((size_t)newSize < context->num_monitors) {
3666 		status = B_BUSY;
3667 		goto out;
3668 	}
3669 	context->max_monitors = newSize;
3670 
3671 out:
3672 	mutex_unlock(&context->io_mutex);
3673 	return status;
3674 }
3675 
3676 
3677 //	#pragma mark - public API for file systems
3678 
3679 
3680 extern "C" status_t
3681 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3682 	fs_vnode_ops* ops)
3683 {
3684 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3685 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3686 
3687 	if (privateNode == NULL)
3688 		return B_BAD_VALUE;
3689 
3690 	int32 tries = BUSY_VNODE_RETRIES;
3691 restart:
3692 	// create the node
3693 	bool nodeCreated;
3694 	struct vnode* vnode;
3695 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3696 		nodeCreated);
3697 	if (status != B_OK)
3698 		return status;
3699 
3700 	WriteLocker nodeLocker(sVnodeLock, true);
3701 		// create_new_vnode_and_lock() has locked for us
3702 
3703 	if (!nodeCreated && vnode->IsBusy()) {
3704 		nodeLocker.Unlock();
3705 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3706 			return B_BUSY;
3707 		goto restart;
3708 	}
3709 
3710 	// file system integrity check:
3711 	// test if the vnode already exists and bail out if this is the case!
3712 	if (!nodeCreated) {
3713 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3714 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3715 			vnode->private_node);
3716 		return B_ERROR;
3717 	}
3718 
3719 	vnode->private_node = privateNode;
3720 	vnode->ops = ops;
3721 	vnode->SetUnpublished(true);
3722 
3723 	TRACE(("returns: %s\n", strerror(status)));
3724 
3725 	return status;
3726 }
3727 
3728 
3729 extern "C" status_t
3730 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3731 	fs_vnode_ops* ops, int type, uint32 flags)
3732 {
3733 	FUNCTION(("publish_vnode()\n"));
3734 
3735 	int32 tries = BUSY_VNODE_RETRIES;
3736 restart:
3737 	WriteLocker locker(sVnodeLock);
3738 
3739 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3740 
3741 	bool nodeCreated = false;
3742 	if (vnode == NULL) {
3743 		if (privateNode == NULL)
3744 			return B_BAD_VALUE;
3745 
3746 		// create the node
3747 		locker.Unlock();
3748 			// create_new_vnode_and_lock() will re-lock for us on success
3749 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3750 			nodeCreated);
3751 		if (status != B_OK)
3752 			return status;
3753 
3754 		locker.SetTo(sVnodeLock, true);
3755 	}
3756 
3757 	if (nodeCreated) {
3758 		vnode->private_node = privateNode;
3759 		vnode->ops = ops;
3760 		vnode->SetUnpublished(true);
3761 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3762 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3763 		// already known, but not published
3764 	} else if (vnode->IsBusy()) {
3765 		locker.Unlock();
3766 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3767 			return B_BUSY;
3768 		goto restart;
3769 	} else
3770 		return B_BAD_VALUE;
3771 
3772 	bool publishSpecialSubNode = false;
3773 
3774 	vnode->SetType(type);
3775 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3776 	publishSpecialSubNode = is_special_node_type(type)
3777 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3778 
3779 	status_t status = B_OK;
3780 
3781 	// create sub vnodes, if necessary
3782 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3783 		locker.Unlock();
3784 
3785 		fs_volume* subVolume = volume;
3786 		if (volume->sub_volume != NULL) {
3787 			while (status == B_OK && subVolume->sub_volume != NULL) {
3788 				subVolume = subVolume->sub_volume;
3789 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3790 					vnode);
3791 			}
3792 		}
3793 
3794 		if (status == B_OK && publishSpecialSubNode)
3795 			status = create_special_sub_node(vnode, flags);
3796 
3797 		if (status != B_OK) {
3798 			// error -- clean up the created sub vnodes
3799 			while (subVolume->super_volume != volume) {
3800 				subVolume = subVolume->super_volume;
3801 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3802 			}
3803 		}
3804 
3805 		if (status == B_OK) {
3806 			ReadLocker vnodesReadLocker(sVnodeLock);
3807 			AutoLocker<Vnode> nodeLocker(vnode);
3808 			vnode->SetBusy(false);
3809 			vnode->SetUnpublished(false);
3810 		} else {
3811 			locker.Lock();
3812 			sVnodeTable->Remove(vnode);
3813 			remove_vnode_from_mount_list(vnode, vnode->mount);
3814 			object_cache_free(sVnodeCache, vnode, 0);
3815 		}
3816 	} else {
3817 		// we still hold the write lock -- mark the node unbusy and published
3818 		vnode->SetBusy(false);
3819 		vnode->SetUnpublished(false);
3820 	}
3821 
3822 	TRACE(("returns: %s\n", strerror(status)));
3823 
3824 	return status;
3825 }
3826 
3827 
3828 extern "C" status_t
3829 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3830 {
3831 	struct vnode* vnode;
3832 
3833 	if (volume == NULL)
3834 		return B_BAD_VALUE;
3835 
3836 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3837 	if (status != B_OK)
3838 		return status;
3839 
3840 	// If this is a layered FS, we need to get the node cookie for the requested
3841 	// layer.
3842 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3843 		fs_vnode resolvedNode;
3844 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3845 			&resolvedNode);
3846 		if (status != B_OK) {
3847 			panic("get_vnode(): Failed to get super node for vnode %p, "
3848 				"volume: %p", vnode, volume);
3849 			put_vnode(vnode);
3850 			return status;
3851 		}
3852 
3853 		if (_privateNode != NULL)
3854 			*_privateNode = resolvedNode.private_node;
3855 	} else if (_privateNode != NULL)
3856 		*_privateNode = vnode->private_node;
3857 
3858 	return B_OK;
3859 }
3860 
3861 
3862 extern "C" status_t
3863 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3864 {
3865 	ReadLocker nodeLocker(sVnodeLock);
3866 
3867 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3868 	if (vnode == NULL)
3869 		return B_BAD_VALUE;
3870 
3871 	inc_vnode_ref_count(vnode);
3872 	return B_OK;
3873 }
3874 
3875 
3876 extern "C" status_t
3877 put_vnode(fs_volume* volume, ino_t vnodeID)
3878 {
3879 	struct vnode* vnode;
3880 
3881 	rw_lock_read_lock(&sVnodeLock);
3882 	vnode = lookup_vnode(volume->id, vnodeID);
3883 	rw_lock_read_unlock(&sVnodeLock);
3884 
3885 	if (vnode == NULL)
3886 		return B_BAD_VALUE;
3887 
3888 	dec_vnode_ref_count(vnode, false, true);
3889 	return B_OK;
3890 }
3891 
3892 
3893 extern "C" status_t
3894 remove_vnode(fs_volume* volume, ino_t vnodeID)
3895 {
3896 	ReadLocker locker(sVnodeLock);
3897 
3898 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3899 	if (vnode == NULL)
3900 		return B_ENTRY_NOT_FOUND;
3901 
3902 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3903 		// this vnode is in use
3904 		return B_BUSY;
3905 	}
3906 
3907 	vnode->Lock();
3908 
3909 	vnode->SetRemoved(true);
3910 	bool removeUnpublished = false;
3911 
3912 	if (vnode->IsUnpublished()) {
3913 		// prepare the vnode for deletion
3914 		removeUnpublished = true;
3915 		vnode->SetBusy(true);
3916 	}
3917 
3918 	vnode->Unlock();
3919 	locker.Unlock();
3920 
3921 	if (removeUnpublished) {
3922 		// If the vnode hasn't been published yet, we delete it here
3923 		atomic_add(&vnode->ref_count, -1);
3924 		free_vnode(vnode, true);
3925 	}
3926 
3927 	return B_OK;
3928 }
3929 
3930 
3931 extern "C" status_t
3932 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3933 {
3934 	struct vnode* vnode;
3935 
3936 	rw_lock_read_lock(&sVnodeLock);
3937 
3938 	vnode = lookup_vnode(volume->id, vnodeID);
3939 	if (vnode) {
3940 		AutoLocker<Vnode> nodeLocker(vnode);
3941 		vnode->SetRemoved(false);
3942 	}
3943 
3944 	rw_lock_read_unlock(&sVnodeLock);
3945 	return B_OK;
3946 }
3947 
3948 
3949 extern "C" status_t
3950 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3951 {
3952 	ReadLocker _(sVnodeLock);
3953 
3954 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3955 		if (_removed != NULL)
3956 			*_removed = vnode->IsRemoved();
3957 		return B_OK;
3958 	}
3959 
3960 	return B_BAD_VALUE;
3961 }
3962 
3963 
3964 extern "C" fs_volume*
3965 volume_for_vnode(fs_vnode* _vnode)
3966 {
3967 	if (_vnode == NULL)
3968 		return NULL;
3969 
3970 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3971 	return vnode->mount->volume;
3972 }
3973 
3974 
3975 extern "C" status_t
3976 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3977 	uid_t nodeUserID)
3978 {
3979 	// get node permissions
3980 	int userPermissions = (mode & S_IRWXU) >> 6;
3981 	int groupPermissions = (mode & S_IRWXG) >> 3;
3982 	int otherPermissions = mode & S_IRWXO;
3983 
3984 	// get the node permissions for this uid/gid
3985 	int permissions = 0;
3986 	uid_t uid = geteuid();
3987 
3988 	if (uid == 0) {
3989 		// user is root
3990 		// root has always read/write permission, but at least one of the
3991 		// X bits must be set for execute permission
3992 		permissions = userPermissions | groupPermissions | otherPermissions
3993 			| S_IROTH | S_IWOTH;
3994 		if (S_ISDIR(mode))
3995 			permissions |= S_IXOTH;
3996 	} else if (uid == nodeUserID) {
3997 		// user is node owner
3998 		permissions = userPermissions;
3999 	} else if (is_user_in_group(nodeGroupID)) {
4000 		// user is in owning group
4001 		permissions = groupPermissions;
4002 	} else {
4003 		// user is one of the others
4004 		permissions = otherPermissions;
4005 	}
4006 
4007 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4008 }
4009 
4010 
4011 #if 0
4012 extern "C" status_t
4013 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4014 	size_t* _numBytes)
4015 {
4016 	struct file_descriptor* descriptor;
4017 	struct vnode* vnode;
4018 
4019 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4020 	if (descriptor == NULL)
4021 		return B_FILE_ERROR;
4022 
4023 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4024 		count, 0, _numBytes);
4025 
4026 	put_fd(descriptor);
4027 	return status;
4028 }
4029 
4030 
4031 extern "C" status_t
4032 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4033 	size_t* _numBytes)
4034 {
4035 	struct file_descriptor* descriptor;
4036 	struct vnode* vnode;
4037 
4038 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4039 	if (descriptor == NULL)
4040 		return B_FILE_ERROR;
4041 
4042 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4043 		count, 0, _numBytes);
4044 
4045 	put_fd(descriptor);
4046 	return status;
4047 }
4048 #endif
4049 
4050 
4051 extern "C" status_t
4052 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4053 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4054 	size_t* _bytes)
4055 {
4056 	struct vnode* vnode;
4057 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4058 	if (!descriptor.IsSet())
4059 		return B_FILE_ERROR;
4060 
4061 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4062 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4063 		false);
4064 
4065 	return status;
4066 }
4067 
4068 
4069 extern "C" status_t
4070 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4071 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4072 	size_t* _bytes)
4073 {
4074 	struct vnode* vnode;
4075 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4076 	if (!descriptor.IsSet())
4077 		return B_FILE_ERROR;
4078 
4079 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4080 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4081 		true);
4082 
4083 	return status;
4084 }
4085 
4086 
4087 extern "C" status_t
4088 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4089 {
4090 	// lookup mount -- the caller is required to make sure that the mount
4091 	// won't go away
4092 	ReadLocker locker(sMountLock);
4093 	struct fs_mount* mount = find_mount(mountID);
4094 	if (mount == NULL)
4095 		return B_BAD_VALUE;
4096 	locker.Unlock();
4097 
4098 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4099 }
4100 
4101 
4102 extern "C" status_t
4103 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4104 {
4105 	// lookup mount -- the caller is required to make sure that the mount
4106 	// won't go away
4107 	ReadLocker locker(sMountLock);
4108 	struct fs_mount* mount = find_mount(mountID);
4109 	if (mount == NULL)
4110 		return B_BAD_VALUE;
4111 	locker.Unlock();
4112 
4113 	return mount->entry_cache.Add(dirID, name, -1, true);
4114 }
4115 
4116 
4117 extern "C" status_t
4118 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4119 {
4120 	// lookup mount -- the caller is required to make sure that the mount
4121 	// won't go away
4122 	ReadLocker locker(sMountLock);
4123 	struct fs_mount* mount = find_mount(mountID);
4124 	if (mount == NULL)
4125 		return B_BAD_VALUE;
4126 	locker.Unlock();
4127 
4128 	return mount->entry_cache.Remove(dirID, name);
4129 }
4130 
4131 
4132 //	#pragma mark - private VFS API
4133 //	Functions the VFS exports for other parts of the kernel
4134 
4135 
4136 /*! Acquires another reference to the vnode that has to be released
4137 	by calling vfs_put_vnode().
4138 */
4139 void
4140 vfs_acquire_vnode(struct vnode* vnode)
4141 {
4142 	inc_vnode_ref_count(vnode);
4143 }
4144 
4145 
4146 /*! This is currently called from file_cache_create() only.
4147 	It's probably a temporary solution as long as devfs requires that
4148 	fs_read_pages()/fs_write_pages() are called with the standard
4149 	open cookie and not with a device cookie.
4150 	If that's done differently, remove this call; it has no other
4151 	purpose.
4152 */
4153 extern "C" status_t
4154 vfs_get_cookie_from_fd(int fd, void** _cookie)
4155 {
4156 	struct file_descriptor* descriptor;
4157 
4158 	descriptor = get_fd(get_current_io_context(true), fd);
4159 	if (descriptor == NULL)
4160 		return B_FILE_ERROR;
4161 
4162 	*_cookie = descriptor->cookie;
4163 	return B_OK;
4164 }
4165 
4166 
4167 extern "C" status_t
4168 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4169 {
4170 	*vnode = get_vnode_from_fd(fd, kernel);
4171 
4172 	if (*vnode == NULL)
4173 		return B_FILE_ERROR;
4174 
4175 	return B_NO_ERROR;
4176 }
4177 
4178 
4179 extern "C" status_t
4180 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4181 {
4182 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4183 		path, kernel));
4184 
4185 	KPath pathBuffer;
4186 	if (pathBuffer.InitCheck() != B_OK)
4187 		return B_NO_MEMORY;
4188 
4189 	char* buffer = pathBuffer.LockBuffer();
4190 	strlcpy(buffer, path, pathBuffer.BufferSize());
4191 
4192 	VnodePutter vnode;
4193 	status_t status = path_to_vnode(buffer, true, vnode, NULL, kernel);
4194 	if (status != B_OK)
4195 		return status;
4196 
4197 	*_vnode = vnode.Detach();
4198 	return B_OK;
4199 }
4200 
4201 
4202 extern "C" status_t
4203 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4204 {
4205 	struct vnode* vnode = NULL;
4206 
4207 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4208 	if (status != B_OK)
4209 		return status;
4210 
4211 	*_vnode = vnode;
4212 	return B_OK;
4213 }
4214 
4215 
4216 extern "C" status_t
4217 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4218 	const char* name, struct vnode** _vnode)
4219 {
4220 	VnodePutter vnode;
4221 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, false, true, vnode);
4222 	*_vnode = vnode.Detach();
4223 	return status;
4224 }
4225 
4226 
4227 extern "C" void
4228 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4229 {
4230 	*_mountID = vnode->device;
4231 	*_vnodeID = vnode->id;
4232 }
4233 
4234 
4235 /*!
4236 	Helper function abstracting the process of "converting" a given
4237 	vnode-pointer to a fs_vnode-pointer.
4238 	Currently only used in bindfs.
4239 */
4240 extern "C" fs_vnode*
4241 vfs_fsnode_for_vnode(struct vnode* vnode)
4242 {
4243 	return vnode;
4244 }
4245 
4246 
4247 /*!
4248 	Calls fs_open() on the given vnode and returns a new
4249 	file descriptor for it
4250 */
4251 int
4252 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4253 {
4254 	return open_vnode(vnode, openMode, kernel);
4255 }
4256 
4257 
4258 /*!	Looks up a vnode with the given mount and vnode ID.
4259 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4260 	to the node.
4261 	It's currently only be used by file_cache_create().
4262 */
4263 extern "C" status_t
4264 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4265 {
4266 	rw_lock_read_lock(&sVnodeLock);
4267 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4268 	rw_lock_read_unlock(&sVnodeLock);
4269 
4270 	if (vnode == NULL)
4271 		return B_ERROR;
4272 
4273 	*_vnode = vnode;
4274 	return B_OK;
4275 }
4276 
4277 
4278 extern "C" status_t
4279 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4280 	bool traverseLeafLink, bool kernel, void** _node)
4281 {
4282 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4283 		volume, path, kernel));
4284 
4285 	KPath pathBuffer;
4286 	if (pathBuffer.InitCheck() != B_OK)
4287 		return B_NO_MEMORY;
4288 
4289 	fs_mount* mount;
4290 	status_t status = get_mount(volume->id, &mount);
4291 	if (status != B_OK)
4292 		return status;
4293 
4294 	char* buffer = pathBuffer.LockBuffer();
4295 	strlcpy(buffer, path, pathBuffer.BufferSize());
4296 
4297 	VnodePutter vnode;
4298 
4299 	if (buffer[0] == '/')
4300 		status = path_to_vnode(buffer, traverseLeafLink, vnode, NULL, kernel);
4301 	else {
4302 		inc_vnode_ref_count(mount->root_vnode);
4303 			// vnode_path_to_vnode() releases a reference to the starting vnode
4304 		status = vnode_path_to_vnode(mount->root_vnode, buffer, traverseLeafLink,
4305 			kernel, vnode, NULL);
4306 	}
4307 
4308 	put_mount(mount);
4309 
4310 	if (status != B_OK)
4311 		return status;
4312 
4313 	if (vnode->device != volume->id) {
4314 		// wrong mount ID - must not gain access on foreign file system nodes
4315 		return B_BAD_VALUE;
4316 	}
4317 
4318 	// Use get_vnode() to resolve the cookie for the right layer.
4319 	status = get_vnode(volume, vnode->id, _node);
4320 
4321 	return status;
4322 }
4323 
4324 
4325 status_t
4326 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4327 	struct stat* stat, bool kernel)
4328 {
4329 	status_t status;
4330 
4331 	if (path != NULL) {
4332 		// path given: get the stat of the node referred to by (fd, path)
4333 		KPath pathBuffer(path);
4334 		if (pathBuffer.InitCheck() != B_OK)
4335 			return B_NO_MEMORY;
4336 
4337 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4338 			traverseLeafLink, stat, kernel);
4339 	} else {
4340 		// no path given: get the FD and use the FD operation
4341 		FileDescriptorPutter descriptor
4342 			(get_fd(get_current_io_context(kernel), fd));
4343 		if (!descriptor.IsSet())
4344 			return B_FILE_ERROR;
4345 
4346 		if (descriptor->ops->fd_read_stat)
4347 			status = descriptor->ops->fd_read_stat(descriptor.Get(), stat);
4348 		else
4349 			status = B_UNSUPPORTED;
4350 	}
4351 
4352 	return status;
4353 }
4354 
4355 
4356 /*!	Finds the full path to the file that contains the module \a moduleName,
4357 	puts it into \a pathBuffer, and returns B_OK for success.
4358 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4359 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4360 	\a pathBuffer is clobbered in any case and must not be relied on if this
4361 	functions returns unsuccessfully.
4362 	\a basePath and \a pathBuffer must not point to the same space.
4363 */
4364 status_t
4365 vfs_get_module_path(const char* basePath, const char* moduleName,
4366 	char* pathBuffer, size_t bufferSize)
4367 {
4368 	status_t status;
4369 	size_t length;
4370 	char* path;
4371 
4372 	if (bufferSize == 0
4373 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4374 		return B_BUFFER_OVERFLOW;
4375 
4376 	VnodePutter dir;
4377 	status = path_to_vnode(pathBuffer, true, dir, NULL, true);
4378 	if (status != B_OK)
4379 		return status;
4380 
4381 	// the path buffer had been clobbered by the above call
4382 	length = strlcpy(pathBuffer, basePath, bufferSize);
4383 	if (pathBuffer[length - 1] != '/')
4384 		pathBuffer[length++] = '/';
4385 
4386 	path = pathBuffer + length;
4387 	bufferSize -= length;
4388 
4389 	VnodePutter file;
4390 	while (moduleName) {
4391 		char* nextPath = strchr(moduleName, '/');
4392 		if (nextPath == NULL)
4393 			length = strlen(moduleName);
4394 		else {
4395 			length = nextPath - moduleName;
4396 			nextPath++;
4397 		}
4398 
4399 		if (length + 1 >= bufferSize)
4400 			return B_BUFFER_OVERFLOW;
4401 
4402 		memcpy(path, moduleName, length);
4403 		path[length] = '\0';
4404 		moduleName = nextPath;
4405 
4406 		// vnode_path_to_vnode() assumes ownership of the passed dir
4407 		status = vnode_path_to_vnode(dir.Detach(), path, true, true, file, NULL);
4408 		if (status != B_OK)
4409 			return status;
4410 
4411 		if (S_ISDIR(file->Type())) {
4412 			// goto the next directory
4413 			path[length] = '/';
4414 			path[length + 1] = '\0';
4415 			path += length + 1;
4416 			bufferSize -= length + 1;
4417 
4418 			dir.SetTo(file.Detach());
4419 		} else if (S_ISREG(file->Type())) {
4420 			// it's a file so it should be what we've searched for
4421 			return B_OK;
4422 		} else {
4423 			TRACE(("vfs_get_module_path(): something is strange here: "
4424 				"0x%08" B_PRIx32 "...\n", file->Type()));
4425 			return B_ERROR;
4426 		}
4427 	}
4428 
4429 	// if we got here, the moduleName just pointed to a directory, not to
4430 	// a real module - what should we do in this case?
4431 	return B_ENTRY_NOT_FOUND;
4432 }
4433 
4434 
4435 /*!	\brief Normalizes a given path.
4436 
4437 	The path must refer to an existing or non-existing entry in an existing
4438 	directory, that is chopping off the leaf component the remaining path must
4439 	refer to an existing directory.
4440 
4441 	The returned will be canonical in that it will be absolute, will not
4442 	contain any "." or ".." components or duplicate occurrences of '/'s,
4443 	and none of the directory components will by symbolic links.
4444 
4445 	Any two paths referring to the same entry, will result in the same
4446 	normalized path (well, that is pretty much the definition of `normalized',
4447 	isn't it :-).
4448 
4449 	\param path The path to be normalized.
4450 	\param buffer The buffer into which the normalized path will be written.
4451 		   May be the same one as \a path.
4452 	\param bufferSize The size of \a buffer.
4453 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4454 	\param kernel \c true, if the IO context of the kernel shall be used,
4455 		   otherwise that of the team this thread belongs to. Only relevant,
4456 		   if the path is relative (to get the CWD).
4457 	\return \c B_OK if everything went fine, another error code otherwise.
4458 */
4459 status_t
4460 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4461 	bool traverseLink, bool kernel)
4462 {
4463 	if (!path || !buffer || bufferSize < 1)
4464 		return B_BAD_VALUE;
4465 
4466 	if (path != buffer) {
4467 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4468 			return B_BUFFER_OVERFLOW;
4469 	}
4470 
4471 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4472 }
4473 
4474 
4475 /*!	\brief Gets the parent of the passed in node.
4476 
4477 	Gets the parent of the passed in node, and correctly resolves covered
4478 	nodes.
4479 */
4480 extern "C" status_t
4481 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4482 {
4483 	return resolve_covered_parent(parent, device, node,
4484 		get_current_io_context(true));
4485 }
4486 
4487 
4488 /*!	\brief Creates a special node in the file system.
4489 
4490 	The caller gets a reference to the newly created node (which is passed
4491 	back through \a _createdVnode) and is responsible for releasing it.
4492 
4493 	\param path The path where to create the entry for the node. Can be \c NULL,
4494 		in which case the node is created without an entry in the root FS -- it
4495 		will automatically be deleted when the last reference has been released.
4496 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4497 		the target file system will just create the node with its standard
4498 		operations. Depending on the type of the node a subnode might be created
4499 		automatically, though.
4500 	\param mode The type and permissions for the node to be created.
4501 	\param flags Flags to be passed to the creating FS.
4502 	\param kernel \c true, if called in the kernel context (relevant only if
4503 		\a path is not \c NULL and not absolute).
4504 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4505 		file system creating the node, with the private data pointer and
4506 		operations for the super node. Can be \c NULL.
4507 	\param _createVnode Pointer to pre-allocated storage where to store the
4508 		pointer to the newly created node.
4509 	\return \c B_OK, if everything went fine, another error code otherwise.
4510 */
4511 status_t
4512 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4513 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4514 	struct vnode** _createdVnode)
4515 {
4516 	VnodePutter dirNode;
4517 	char _leaf[B_FILE_NAME_LENGTH];
4518 	char* leaf = NULL;
4519 
4520 	if (path) {
4521 		// We've got a path. Get the dir vnode and the leaf name.
4522 		KPath tmpPathBuffer;
4523 		if (tmpPathBuffer.InitCheck() != B_OK)
4524 			return B_NO_MEMORY;
4525 
4526 		char* tmpPath = tmpPathBuffer.LockBuffer();
4527 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4528 			return B_NAME_TOO_LONG;
4529 
4530 		// get the dir vnode and the leaf name
4531 		leaf = _leaf;
4532 		status_t error = path_to_dir_vnode(tmpPath, dirNode, leaf, kernel);
4533 		if (error != B_OK)
4534 			return error;
4535 	} else {
4536 		// No path. Create the node in the root FS.
4537 		dirNode.SetTo(sRoot);
4538 		inc_vnode_ref_count(dirNode.Get());
4539 	}
4540 
4541 	// check support for creating special nodes
4542 	if (!HAS_FS_CALL(dirNode, create_special_node))
4543 		return B_UNSUPPORTED;
4544 
4545 	// create the node
4546 	fs_vnode superVnode;
4547 	ino_t nodeID;
4548 	status_t status = FS_CALL(dirNode.Get(), create_special_node, leaf, subVnode,
4549 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4550 	if (status != B_OK)
4551 		return status;
4552 
4553 	// lookup the node
4554 	rw_lock_read_lock(&sVnodeLock);
4555 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4556 	rw_lock_read_unlock(&sVnodeLock);
4557 
4558 	if (*_createdVnode == NULL) {
4559 		panic("vfs_create_special_node(): lookup of node failed");
4560 		return B_ERROR;
4561 	}
4562 
4563 	return B_OK;
4564 }
4565 
4566 
4567 extern "C" void
4568 vfs_put_vnode(struct vnode* vnode)
4569 {
4570 	put_vnode(vnode);
4571 }
4572 
4573 
4574 extern "C" status_t
4575 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4576 {
4577 	// Get current working directory from io context
4578 	struct io_context* context = get_current_io_context(false);
4579 	status_t status = B_OK;
4580 
4581 	mutex_lock(&context->io_mutex);
4582 
4583 	if (context->cwd != NULL) {
4584 		*_mountID = context->cwd->device;
4585 		*_vnodeID = context->cwd->id;
4586 	} else
4587 		status = B_ERROR;
4588 
4589 	mutex_unlock(&context->io_mutex);
4590 	return status;
4591 }
4592 
4593 
4594 status_t
4595 vfs_unmount(dev_t mountID, uint32 flags)
4596 {
4597 	return fs_unmount(NULL, mountID, flags, true);
4598 }
4599 
4600 
4601 extern "C" status_t
4602 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4603 {
4604 	struct vnode* vnode;
4605 
4606 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4607 	if (status != B_OK)
4608 		return status;
4609 
4610 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4611 	put_vnode(vnode);
4612 	return B_OK;
4613 }
4614 
4615 
4616 extern "C" void
4617 vfs_free_unused_vnodes(int32 level)
4618 {
4619 	vnode_low_resource_handler(NULL,
4620 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4621 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4622 		level);
4623 }
4624 
4625 
4626 extern "C" bool
4627 vfs_can_page(struct vnode* vnode, void* cookie)
4628 {
4629 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4630 
4631 	if (HAS_FS_CALL(vnode, can_page))
4632 		return FS_CALL(vnode, can_page, cookie);
4633 	return false;
4634 }
4635 
4636 
4637 extern "C" status_t
4638 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4639 	const generic_io_vec* vecs, size_t count, uint32 flags,
4640 	generic_size_t* _numBytes)
4641 {
4642 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4643 		vecs, pos));
4644 
4645 #if VFS_PAGES_IO_TRACING
4646 	generic_size_t bytesRequested = *_numBytes;
4647 #endif
4648 
4649 	IORequest request;
4650 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4651 	if (status == B_OK) {
4652 		status = vfs_vnode_io(vnode, cookie, &request);
4653 		if (status == B_OK)
4654 			status = request.Wait();
4655 		*_numBytes = request.TransferredBytes();
4656 	}
4657 
4658 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4659 		status, *_numBytes));
4660 
4661 	return status;
4662 }
4663 
4664 
4665 extern "C" status_t
4666 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4667 	const generic_io_vec* vecs, size_t count, uint32 flags,
4668 	generic_size_t* _numBytes)
4669 {
4670 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4671 		vecs, pos));
4672 
4673 #if VFS_PAGES_IO_TRACING
4674 	generic_size_t bytesRequested = *_numBytes;
4675 #endif
4676 
4677 	IORequest request;
4678 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4679 	if (status == B_OK) {
4680 		status = vfs_vnode_io(vnode, cookie, &request);
4681 		if (status == B_OK)
4682 			status = request.Wait();
4683 		*_numBytes = request.TransferredBytes();
4684 	}
4685 
4686 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4687 		status, *_numBytes));
4688 
4689 	return status;
4690 }
4691 
4692 
4693 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4694 	created if \a allocate is \c true.
4695 	In case it's successful, it will also grab a reference to the cache
4696 	it returns.
4697 */
4698 extern "C" status_t
4699 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4700 {
4701 	if (vnode->cache != NULL) {
4702 		vnode->cache->AcquireRef();
4703 		*_cache = vnode->cache;
4704 		return B_OK;
4705 	}
4706 
4707 	rw_lock_read_lock(&sVnodeLock);
4708 	vnode->Lock();
4709 
4710 	status_t status = B_OK;
4711 
4712 	// The cache could have been created in the meantime
4713 	if (vnode->cache == NULL) {
4714 		if (allocate) {
4715 			// TODO: actually the vnode needs to be busy already here, or
4716 			//	else this won't work...
4717 			bool wasBusy = vnode->IsBusy();
4718 			vnode->SetBusy(true);
4719 
4720 			vnode->Unlock();
4721 			rw_lock_read_unlock(&sVnodeLock);
4722 
4723 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4724 
4725 			rw_lock_read_lock(&sVnodeLock);
4726 			vnode->Lock();
4727 			vnode->SetBusy(wasBusy);
4728 		} else
4729 			status = B_BAD_VALUE;
4730 	}
4731 
4732 	vnode->Unlock();
4733 	rw_lock_read_unlock(&sVnodeLock);
4734 
4735 	if (status == B_OK) {
4736 		vnode->cache->AcquireRef();
4737 		*_cache = vnode->cache;
4738 	}
4739 
4740 	return status;
4741 }
4742 
4743 
4744 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4745 	their own.
4746 	In case it's successful, it will also grab a reference to the cache
4747 	it returns.
4748 */
4749 extern "C" status_t
4750 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4751 {
4752 	rw_lock_read_lock(&sVnodeLock);
4753 	vnode->Lock();
4754 
4755 	status_t status = B_OK;
4756 	if (vnode->cache != NULL) {
4757 		status = B_NOT_ALLOWED;
4758 	} else {
4759 		vnode->cache = _cache;
4760 		_cache->AcquireRef();
4761 	}
4762 
4763 	vnode->Unlock();
4764 	rw_lock_read_unlock(&sVnodeLock);
4765 	return status;
4766 }
4767 
4768 
4769 status_t
4770 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4771 	file_io_vec* vecs, size_t* _count)
4772 {
4773 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4774 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4775 
4776 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4777 }
4778 
4779 
4780 status_t
4781 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4782 {
4783 	status_t status = FS_CALL(vnode, read_stat, stat);
4784 
4785 	// fill in the st_dev and st_ino fields
4786 	if (status == B_OK) {
4787 		stat->st_dev = vnode->device;
4788 		stat->st_ino = vnode->id;
4789 		// the rdev field must stay unset for non-special files
4790 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4791 			stat->st_rdev = -1;
4792 	}
4793 
4794 	return status;
4795 }
4796 
4797 
4798 status_t
4799 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4800 {
4801 	struct vnode* vnode;
4802 	status_t status = get_vnode(device, inode, &vnode, true, false);
4803 	if (status != B_OK)
4804 		return status;
4805 
4806 	status = vfs_stat_vnode(vnode, stat);
4807 
4808 	put_vnode(vnode);
4809 	return status;
4810 }
4811 
4812 
4813 status_t
4814 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4815 {
4816 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4817 }
4818 
4819 
4820 status_t
4821 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4822 	bool kernel, char* path, size_t pathLength)
4823 {
4824 	VnodePutter vnode;
4825 	status_t status;
4826 
4827 	// filter invalid leaf names
4828 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4829 		return B_BAD_VALUE;
4830 
4831 	// get the vnode matching the dir's node_ref
4832 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4833 		// special cases "." and "..": we can directly get the vnode of the
4834 		// referenced directory
4835 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, vnode);
4836 		leaf = NULL;
4837 	} else {
4838 		struct vnode* temp = NULL;
4839 		status = get_vnode(device, inode, &temp, true, false);
4840 		vnode.SetTo(temp);
4841 	}
4842 	if (status != B_OK)
4843 		return status;
4844 
4845 	// get the directory path
4846 	status = dir_vnode_to_path(vnode.Get(), path, pathLength, kernel);
4847 	vnode.Unset();
4848 		// we don't need the vnode anymore
4849 	if (status != B_OK)
4850 		return status;
4851 
4852 	// append the leaf name
4853 	if (leaf) {
4854 		// insert a directory separator if this is not the file system root
4855 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4856 				>= pathLength)
4857 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4858 			return B_NAME_TOO_LONG;
4859 		}
4860 	}
4861 
4862 	return B_OK;
4863 }
4864 
4865 
4866 /*!	If the given descriptor locked its vnode, that lock will be released. */
4867 void
4868 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4869 {
4870 	struct vnode* vnode = fd_vnode(descriptor);
4871 
4872 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4873 		vnode->mandatory_locked_by = NULL;
4874 }
4875 
4876 
4877 /*!	Releases any POSIX locks on the file descriptor. */
4878 status_t
4879 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4880 {
4881 	struct vnode* vnode = descriptor->u.vnode;
4882 	if (vnode == NULL)
4883 		return B_OK;
4884 
4885 	if (HAS_FS_CALL(vnode, release_lock))
4886 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4887 
4888 	return release_advisory_lock(vnode, context, NULL, NULL);
4889 }
4890 
4891 
4892 /*!	Closes all file descriptors of the specified I/O context that
4893 	have the O_CLOEXEC flag set.
4894 */
4895 void
4896 vfs_exec_io_context(io_context* context)
4897 {
4898 	uint32 i;
4899 
4900 	for (i = 0; i < context->table_size; i++) {
4901 		mutex_lock(&context->io_mutex);
4902 
4903 		struct file_descriptor* descriptor = context->fds[i];
4904 		bool remove = false;
4905 
4906 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4907 			context->fds[i] = NULL;
4908 			context->num_used_fds--;
4909 
4910 			remove = true;
4911 		}
4912 
4913 		mutex_unlock(&context->io_mutex);
4914 
4915 		if (remove) {
4916 			close_fd(context, descriptor);
4917 			put_fd(descriptor);
4918 		}
4919 	}
4920 }
4921 
4922 
4923 /*! Sets up a new io_control structure, and inherits the properties
4924 	of the parent io_control if it is given.
4925 */
4926 io_context*
4927 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4928 {
4929 	io_context* context = (io_context*)malloc(sizeof(io_context));
4930 	if (context == NULL)
4931 		return NULL;
4932 
4933 	TIOC(NewIOContext(context, parentContext));
4934 
4935 	memset(context, 0, sizeof(io_context));
4936 	context->ref_count = 1;
4937 
4938 	MutexLocker parentLocker;
4939 
4940 	size_t tableSize;
4941 	if (parentContext != NULL) {
4942 		parentLocker.SetTo(parentContext->io_mutex, false);
4943 		tableSize = parentContext->table_size;
4944 	} else
4945 		tableSize = DEFAULT_FD_TABLE_SIZE;
4946 
4947 	// allocate space for FDs and their close-on-exec flag
4948 	context->fds = (file_descriptor**)malloc(
4949 		sizeof(struct file_descriptor*) * tableSize
4950 		+ sizeof(struct select_info**) * tableSize
4951 		+ (tableSize + 7) / 8);
4952 	if (context->fds == NULL) {
4953 		free(context);
4954 		return NULL;
4955 	}
4956 
4957 	context->select_infos = (select_info**)(context->fds + tableSize);
4958 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4959 
4960 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4961 		+ sizeof(struct select_info**) * tableSize
4962 		+ (tableSize + 7) / 8);
4963 
4964 	mutex_init(&context->io_mutex, "I/O context");
4965 
4966 	// Copy all parent file descriptors
4967 
4968 	if (parentContext != NULL) {
4969 		size_t i;
4970 
4971 		mutex_lock(&sIOContextRootLock);
4972 		context->root = parentContext->root;
4973 		if (context->root)
4974 			inc_vnode_ref_count(context->root);
4975 		mutex_unlock(&sIOContextRootLock);
4976 
4977 		context->cwd = parentContext->cwd;
4978 		if (context->cwd)
4979 			inc_vnode_ref_count(context->cwd);
4980 
4981 		if (parentContext->inherit_fds) {
4982 			for (i = 0; i < tableSize; i++) {
4983 				struct file_descriptor* descriptor = parentContext->fds[i];
4984 
4985 				if (descriptor != NULL
4986 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4987 					bool closeOnExec = fd_close_on_exec(parentContext, i);
4988 					if (closeOnExec && purgeCloseOnExec)
4989 						continue;
4990 
4991 					TFD(InheritFD(context, i, descriptor, parentContext));
4992 
4993 					context->fds[i] = descriptor;
4994 					context->num_used_fds++;
4995 					atomic_add(&descriptor->ref_count, 1);
4996 					atomic_add(&descriptor->open_count, 1);
4997 
4998 					if (closeOnExec)
4999 						fd_set_close_on_exec(context, i, true);
5000 				}
5001 			}
5002 		}
5003 
5004 		parentLocker.Unlock();
5005 	} else {
5006 		context->root = sRoot;
5007 		context->cwd = sRoot;
5008 
5009 		if (context->root)
5010 			inc_vnode_ref_count(context->root);
5011 
5012 		if (context->cwd)
5013 			inc_vnode_ref_count(context->cwd);
5014 	}
5015 
5016 	context->table_size = tableSize;
5017 	context->inherit_fds = parentContext != NULL;
5018 
5019 	list_init(&context->node_monitors);
5020 	context->max_monitors = DEFAULT_NODE_MONITORS;
5021 
5022 	return context;
5023 }
5024 
5025 
5026 void
5027 vfs_get_io_context(io_context* context)
5028 {
5029 	atomic_add(&context->ref_count, 1);
5030 }
5031 
5032 
5033 void
5034 vfs_put_io_context(io_context* context)
5035 {
5036 	if (atomic_add(&context->ref_count, -1) == 1)
5037 		free_io_context(context);
5038 }
5039 
5040 
5041 status_t
5042 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5043 {
5044 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5045 		return B_BAD_VALUE;
5046 
5047 	TIOC(ResizeIOContext(context, newSize));
5048 
5049 	MutexLocker _(context->io_mutex);
5050 
5051 	uint32 oldSize = context->table_size;
5052 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5053 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5054 
5055 	// If the tables shrink, make sure none of the fds being dropped are in use.
5056 	if (newSize < oldSize) {
5057 		for (uint32 i = oldSize; i-- > newSize;) {
5058 			if (context->fds[i])
5059 				return B_BUSY;
5060 		}
5061 	}
5062 
5063 	// store pointers to the old tables
5064 	file_descriptor** oldFDs = context->fds;
5065 	select_info** oldSelectInfos = context->select_infos;
5066 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5067 
5068 	// allocate new tables
5069 	file_descriptor** newFDs = (file_descriptor**)malloc(
5070 		sizeof(struct file_descriptor*) * newSize
5071 		+ sizeof(struct select_infos**) * newSize
5072 		+ newCloseOnExitBitmapSize);
5073 	if (newFDs == NULL)
5074 		return B_NO_MEMORY;
5075 
5076 	context->fds = newFDs;
5077 	context->select_infos = (select_info**)(context->fds + newSize);
5078 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5079 	context->table_size = newSize;
5080 
5081 	// copy entries from old tables
5082 	uint32 toCopy = min_c(oldSize, newSize);
5083 
5084 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5085 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5086 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5087 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5088 
5089 	// clear additional entries, if the tables grow
5090 	if (newSize > oldSize) {
5091 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5092 		memset(context->select_infos + oldSize, 0,
5093 			sizeof(void*) * (newSize - oldSize));
5094 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5095 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5096 	}
5097 
5098 	free(oldFDs);
5099 
5100 	return B_OK;
5101 }
5102 
5103 
5104 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5105 
5106 	Given an arbitrary vnode (identified by mount and node ID), the function
5107 	checks, whether the vnode is covered by another vnode. If it is, the
5108 	function returns the mount and node ID of the covering vnode. Otherwise
5109 	it simply returns the supplied mount and node ID.
5110 
5111 	In case of error (e.g. the supplied node could not be found) the variables
5112 	for storing the resolved mount and node ID remain untouched and an error
5113 	code is returned.
5114 
5115 	\param mountID The mount ID of the vnode in question.
5116 	\param nodeID The node ID of the vnode in question.
5117 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5118 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5119 	\return
5120 	- \c B_OK, if everything went fine,
5121 	- another error code, if something went wrong.
5122 */
5123 status_t
5124 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5125 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5126 {
5127 	// get the node
5128 	struct vnode* node;
5129 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5130 	if (error != B_OK)
5131 		return error;
5132 
5133 	// resolve the node
5134 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5135 		put_vnode(node);
5136 		node = coveringNode;
5137 	}
5138 
5139 	// set the return values
5140 	*resolvedMountID = node->device;
5141 	*resolvedNodeID = node->id;
5142 
5143 	put_vnode(node);
5144 
5145 	return B_OK;
5146 }
5147 
5148 
5149 status_t
5150 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5151 	ino_t* _mountPointNodeID)
5152 {
5153 	ReadLocker nodeLocker(sVnodeLock);
5154 	ReadLocker mountLocker(sMountLock);
5155 
5156 	struct fs_mount* mount = find_mount(mountID);
5157 	if (mount == NULL)
5158 		return B_BAD_VALUE;
5159 
5160 	Vnode* mountPoint = mount->covers_vnode;
5161 
5162 	*_mountPointMountID = mountPoint->device;
5163 	*_mountPointNodeID = mountPoint->id;
5164 
5165 	return B_OK;
5166 }
5167 
5168 
5169 status_t
5170 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5171 	ino_t coveredNodeID)
5172 {
5173 	// get the vnodes
5174 	Vnode* vnode;
5175 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5176 	if (error != B_OK)
5177 		return B_BAD_VALUE;
5178 	VnodePutter vnodePutter(vnode);
5179 
5180 	Vnode* coveredVnode;
5181 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5182 		false);
5183 	if (error != B_OK)
5184 		return B_BAD_VALUE;
5185 	VnodePutter coveredVnodePutter(coveredVnode);
5186 
5187 	// establish the covered/covering links
5188 	WriteLocker locker(sVnodeLock);
5189 
5190 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5191 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5192 		return B_BUSY;
5193 	}
5194 
5195 	vnode->covers = coveredVnode;
5196 	vnode->SetCovering(true);
5197 
5198 	coveredVnode->covered_by = vnode;
5199 	coveredVnode->SetCovered(true);
5200 
5201 	// the vnodes do now reference each other
5202 	inc_vnode_ref_count(vnode);
5203 	inc_vnode_ref_count(coveredVnode);
5204 
5205 	return B_OK;
5206 }
5207 
5208 
5209 int
5210 vfs_getrlimit(int resource, struct rlimit* rlp)
5211 {
5212 	if (!rlp)
5213 		return B_BAD_ADDRESS;
5214 
5215 	switch (resource) {
5216 		case RLIMIT_NOFILE:
5217 		{
5218 			struct io_context* context = get_current_io_context(false);
5219 			MutexLocker _(context->io_mutex);
5220 
5221 			rlp->rlim_cur = context->table_size;
5222 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5223 			return 0;
5224 		}
5225 
5226 		case RLIMIT_NOVMON:
5227 		{
5228 			struct io_context* context = get_current_io_context(false);
5229 			MutexLocker _(context->io_mutex);
5230 
5231 			rlp->rlim_cur = context->max_monitors;
5232 			rlp->rlim_max = MAX_NODE_MONITORS;
5233 			return 0;
5234 		}
5235 
5236 		default:
5237 			return B_BAD_VALUE;
5238 	}
5239 }
5240 
5241 
5242 int
5243 vfs_setrlimit(int resource, const struct rlimit* rlp)
5244 {
5245 	if (!rlp)
5246 		return B_BAD_ADDRESS;
5247 
5248 	switch (resource) {
5249 		case RLIMIT_NOFILE:
5250 			/* TODO: check getuid() */
5251 			if (rlp->rlim_max != RLIM_SAVED_MAX
5252 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5253 				return B_NOT_ALLOWED;
5254 
5255 			return vfs_resize_fd_table(get_current_io_context(false),
5256 				rlp->rlim_cur);
5257 
5258 		case RLIMIT_NOVMON:
5259 			/* TODO: check getuid() */
5260 			if (rlp->rlim_max != RLIM_SAVED_MAX
5261 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5262 				return B_NOT_ALLOWED;
5263 
5264 			return resize_monitor_table(get_current_io_context(false),
5265 				rlp->rlim_cur);
5266 
5267 		default:
5268 			return B_BAD_VALUE;
5269 	}
5270 }
5271 
5272 
5273 status_t
5274 vfs_init(kernel_args* args)
5275 {
5276 	vnode::StaticInit();
5277 
5278 	sVnodeTable = new(std::nothrow) VnodeTable();
5279 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5280 		panic("vfs_init: error creating vnode hash table\n");
5281 
5282 	struct vnode dummy_vnode;
5283 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5284 
5285 	struct fs_mount dummyMount;
5286 	sMountsTable = new(std::nothrow) MountTable();
5287 	if (sMountsTable == NULL
5288 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5289 		panic("vfs_init: error creating mounts hash table\n");
5290 
5291 	sPathNameCache = create_object_cache("vfs path names",
5292 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5293 	if (sPathNameCache == NULL)
5294 		panic("vfs_init: error creating path name object_cache\n");
5295 
5296 	sVnodeCache = create_object_cache("vfs vnodes",
5297 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5298 	if (sVnodeCache == NULL)
5299 		panic("vfs_init: error creating vnode object_cache\n");
5300 
5301 	sFileDescriptorCache = create_object_cache("vfs fds",
5302 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5303 	if (sFileDescriptorCache == NULL)
5304 		panic("vfs_init: error creating file descriptor object_cache\n");
5305 
5306 	node_monitor_init();
5307 
5308 	sRoot = NULL;
5309 
5310 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5311 
5312 	if (block_cache_init() != B_OK)
5313 		return B_ERROR;
5314 
5315 #ifdef ADD_DEBUGGER_COMMANDS
5316 	// add some debugger commands
5317 	add_debugger_command_etc("vnode", &dump_vnode,
5318 		"Print info about the specified vnode",
5319 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5320 		"Prints information about the vnode specified by address <vnode> or\n"
5321 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5322 		"constructed and printed. It might not be possible to construct a\n"
5323 		"complete path, though.\n",
5324 		0);
5325 	add_debugger_command("vnodes", &dump_vnodes,
5326 		"list all vnodes (from the specified device)");
5327 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5328 		"list all vnode caches");
5329 	add_debugger_command("mount", &dump_mount,
5330 		"info about the specified fs_mount");
5331 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5332 	add_debugger_command("io_context", &dump_io_context,
5333 		"info about the I/O context");
5334 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5335 		"info about vnode usage");
5336 #endif
5337 
5338 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5339 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5340 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5341 		0);
5342 
5343 	fifo_init();
5344 	file_map_init();
5345 
5346 	return file_cache_init();
5347 }
5348 
5349 
5350 //	#pragma mark - fd_ops implementations
5351 
5352 
5353 /*!
5354 	Calls fs_open() on the given vnode and returns a new
5355 	file descriptor for it
5356 */
5357 static int
5358 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5359 {
5360 	void* cookie;
5361 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5362 	if (status != B_OK)
5363 		return status;
5364 
5365 	int fd = get_new_fd(&sFileOps, NULL, vnode, cookie, openMode, kernel);
5366 	if (fd < 0) {
5367 		FS_CALL(vnode, close, cookie);
5368 		FS_CALL(vnode, free_cookie, cookie);
5369 	}
5370 	return fd;
5371 }
5372 
5373 
5374 /*!
5375 	Calls fs_open() on the given vnode and returns a new
5376 	file descriptor for it
5377 */
5378 static int
5379 create_vnode(struct vnode* directory, const char* name, int openMode,
5380 	int perms, bool kernel)
5381 {
5382 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5383 	status_t status = B_ERROR;
5384 	VnodePutter vnode, dirPutter;
5385 	void* cookie;
5386 	ino_t newID;
5387 	char clonedName[B_FILE_NAME_LENGTH + 1];
5388 
5389 	// This is somewhat tricky: If the entry already exists, the FS responsible
5390 	// for the directory might not necessarily also be the one responsible for
5391 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5392 	// we can actually never call the create() hook without O_EXCL. Instead we
5393 	// try to look the entry up first. If it already exists, we just open the
5394 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5395 	// introduces a race condition, since someone else might have created the
5396 	// entry in the meantime. We hope the respective FS returns the correct
5397 	// error code and retry (up to 3 times) again.
5398 
5399 	for (int i = 0; i < 3 && status != B_OK; i++) {
5400 		bool create = false;
5401 
5402 		// look the node up
5403 		{
5404 			struct vnode* entry = NULL;
5405 			status = lookup_dir_entry(directory, name, &entry);
5406 			vnode.SetTo(entry);
5407 		}
5408 		if (status == B_OK) {
5409 			if ((openMode & O_EXCL) != 0)
5410 				return B_FILE_EXISTS;
5411 
5412 			// If the node is a symlink, we have to follow it, unless
5413 			// O_NOTRAVERSE is set.
5414 			if (S_ISLNK(vnode->Type()) && traverse) {
5415 				vnode.Unset();
5416 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5417 						>= B_FILE_NAME_LENGTH) {
5418 					return B_NAME_TOO_LONG;
5419 				}
5420 
5421 				inc_vnode_ref_count(directory);
5422 				dirPutter.Unset();
5423 				status = vnode_path_to_vnode(directory, clonedName, true,
5424 					kernel, vnode, NULL, clonedName);
5425 				if (status != B_OK) {
5426 					// vnode is not found, but maybe it has a parent and we can create it from
5427 					// there. In that case, vnode_path_to_vnode has set vnode to the latest
5428 					// directory found in the path
5429 					if (status == B_ENTRY_NOT_FOUND) {
5430 						directory = vnode.Detach();
5431 						dirPutter.SetTo(directory);
5432 						name = clonedName;
5433 						create = true;
5434 					} else
5435 						return status;
5436 				}
5437 			}
5438 
5439 			if (!create) {
5440 				if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5441 					return B_LINK_LIMIT;
5442 
5443 				int fd = open_vnode(vnode.Get(), openMode & ~O_CREAT, kernel);
5444 				// on success keep the vnode reference for the FD
5445 				if (fd >= 0)
5446 					vnode.Detach();
5447 
5448 				return fd;
5449 			}
5450 		}
5451 
5452 		// it doesn't exist yet -- try to create it
5453 
5454 		if (!HAS_FS_CALL(directory, create))
5455 			return B_READ_ONLY_DEVICE;
5456 
5457 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5458 			&cookie, &newID);
5459 		if (status != B_OK
5460 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5461 			return status;
5462 		}
5463 	}
5464 
5465 	if (status != B_OK)
5466 		return status;
5467 
5468 	// the node has been created successfully
5469 
5470 	rw_lock_read_lock(&sVnodeLock);
5471 	vnode.SetTo(lookup_vnode(directory->device, newID));
5472 	rw_lock_read_unlock(&sVnodeLock);
5473 
5474 	if (!vnode.IsSet()) {
5475 		panic("vfs: fs_create() returned success but there is no vnode, "
5476 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5477 		return B_BAD_VALUE;
5478 	}
5479 
5480 	int fd = get_new_fd(&sFileOps, NULL, vnode.Get(), cookie, openMode, kernel);
5481 	if (fd >= 0) {
5482 		vnode.Detach();
5483 		return fd;
5484 	}
5485 
5486 	status = fd;
5487 
5488 	// something went wrong, clean up
5489 
5490 	FS_CALL(vnode.Get(), close, cookie);
5491 	FS_CALL(vnode.Get(), free_cookie, cookie);
5492 
5493 	FS_CALL(directory, unlink, name);
5494 
5495 	return status;
5496 }
5497 
5498 
5499 /*! Calls fs open_dir() on the given vnode and returns a new
5500 	file descriptor for it
5501 */
5502 static int
5503 open_dir_vnode(struct vnode* vnode, bool kernel)
5504 {
5505 	if (!HAS_FS_CALL(vnode, open_dir))
5506 		return B_UNSUPPORTED;
5507 
5508 	void* cookie;
5509 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5510 	if (status != B_OK)
5511 		return status;
5512 
5513 	// directory is opened, create a fd
5514 	status = get_new_fd(&sDirectoryOps, NULL, vnode, cookie, O_CLOEXEC, kernel);
5515 	if (status >= 0)
5516 		return status;
5517 
5518 	FS_CALL(vnode, close_dir, cookie);
5519 	FS_CALL(vnode, free_dir_cookie, cookie);
5520 
5521 	return status;
5522 }
5523 
5524 
5525 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5526 	file descriptor for it.
5527 	Used by attr_dir_open(), and attr_dir_open_fd().
5528 */
5529 static int
5530 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5531 {
5532 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5533 		return B_UNSUPPORTED;
5534 
5535 	void* cookie;
5536 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5537 	if (status != B_OK)
5538 		return status;
5539 
5540 	// directory is opened, create a fd
5541 	status = get_new_fd(&sAttributeDirectoryOps, NULL, vnode, cookie, O_CLOEXEC,
5542 		kernel);
5543 	if (status >= 0)
5544 		return status;
5545 
5546 	FS_CALL(vnode, close_attr_dir, cookie);
5547 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5548 
5549 	return status;
5550 }
5551 
5552 
5553 static int
5554 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5555 	int openMode, int perms, bool kernel)
5556 {
5557 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5558 		"kernel %d\n", name, openMode, perms, kernel));
5559 
5560 	// get directory to put the new file in
5561 	struct vnode* directory;
5562 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5563 	if (status != B_OK)
5564 		return status;
5565 
5566 	status = create_vnode(directory, name, openMode, perms, kernel);
5567 	put_vnode(directory);
5568 
5569 	return status;
5570 }
5571 
5572 
5573 static int
5574 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5575 {
5576 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5577 		openMode, perms, kernel));
5578 
5579 	// get directory to put the new file in
5580 	char name[B_FILE_NAME_LENGTH];
5581 	VnodePutter directory;
5582 	status_t status = fd_and_path_to_dir_vnode(fd, path, directory, name,
5583 		kernel);
5584 	if (status < 0)
5585 		return status;
5586 
5587 	return create_vnode(directory.Get(), name, openMode, perms, kernel);
5588 }
5589 
5590 
5591 static int
5592 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5593 	int openMode, bool kernel)
5594 {
5595 	if (name == NULL || *name == '\0')
5596 		return B_BAD_VALUE;
5597 
5598 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5599 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5600 
5601 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5602 
5603 	// get the vnode matching the entry_ref
5604 	VnodePutter vnode;
5605 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5606 		kernel, vnode);
5607 	if (status != B_OK)
5608 		return status;
5609 
5610 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5611 		return B_LINK_LIMIT;
5612 
5613 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5614 	if (newFD >= 0) {
5615 		cache_node_opened(vnode.Get(), vnode->cache, mountID,
5616 			directoryID, vnode->id, name);
5617 
5618 		// The vnode reference has been transferred to the FD
5619 		vnode.Detach();
5620 	}
5621 
5622 	return newFD;
5623 }
5624 
5625 
5626 static int
5627 file_open(int fd, char* path, int openMode, bool kernel)
5628 {
5629 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5630 
5631 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5632 		fd, path, openMode, kernel));
5633 
5634 	// get the vnode matching the vnode + path combination
5635 	VnodePutter vnode;
5636 	ino_t parentID;
5637 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode,
5638 		&parentID, kernel);
5639 	if (status != B_OK)
5640 		return status;
5641 
5642 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5643 		return B_LINK_LIMIT;
5644 
5645 	// open the vnode
5646 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5647 	if (newFD >= 0) {
5648 		cache_node_opened(vnode.Get(), vnode->cache,
5649 			vnode->device, parentID, vnode->id, NULL);
5650 
5651 		// The vnode reference has been transferred to the FD
5652 		vnode.Detach();
5653 	}
5654 
5655 	return newFD;
5656 }
5657 
5658 
5659 static status_t
5660 file_close(struct file_descriptor* descriptor)
5661 {
5662 	struct vnode* vnode = descriptor->u.vnode;
5663 	status_t status = B_OK;
5664 
5665 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5666 
5667 	cache_node_closed(vnode, vnode->cache, vnode->device,
5668 		vnode->id);
5669 	if (HAS_FS_CALL(vnode, close)) {
5670 		status = FS_CALL(vnode, close, descriptor->cookie);
5671 	}
5672 
5673 	if (status == B_OK) {
5674 		// remove all outstanding locks for this team
5675 		if (HAS_FS_CALL(vnode, release_lock))
5676 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5677 		else
5678 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5679 	}
5680 	return status;
5681 }
5682 
5683 
5684 static void
5685 file_free_fd(struct file_descriptor* descriptor)
5686 {
5687 	struct vnode* vnode = descriptor->u.vnode;
5688 
5689 	if (vnode != NULL) {
5690 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5691 		put_vnode(vnode);
5692 	}
5693 }
5694 
5695 
5696 static status_t
5697 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5698 	size_t* length)
5699 {
5700 	struct vnode* vnode = descriptor->u.vnode;
5701 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5702 		pos, length, *length));
5703 
5704 	if (S_ISDIR(vnode->Type()))
5705 		return B_IS_A_DIRECTORY;
5706 	if (pos != -1 && descriptor->pos == -1)
5707 		return ESPIPE;
5708 
5709 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5710 }
5711 
5712 
5713 static status_t
5714 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5715 	size_t* length)
5716 {
5717 	struct vnode* vnode = descriptor->u.vnode;
5718 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5719 		length));
5720 
5721 	if (S_ISDIR(vnode->Type()))
5722 		return B_IS_A_DIRECTORY;
5723 	if (pos != -1 && descriptor->pos == -1)
5724 		return ESPIPE;
5725 
5726 	if (!HAS_FS_CALL(vnode, write))
5727 		return B_READ_ONLY_DEVICE;
5728 
5729 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5730 }
5731 
5732 
5733 static ssize_t
5734 file_vector_io(struct file_descriptor* descriptor, off_t pos,
5735 	const struct iovec *vecs, int count, bool write)
5736 {
5737 	struct vnode* vnode = descriptor->u.vnode;
5738 	if (pos != -1 && descriptor->pos == -1)
5739 		return ESPIPE;
5740 	if (S_ISDIR(vnode->Type()))
5741 		return B_IS_A_DIRECTORY;
5742 
5743 	if (pos == -1)
5744 		return B_UNSUPPORTED;
5745 	if (!HAS_FS_CALL(vnode, io))
5746 		return B_UNSUPPORTED;
5747 
5748 	// We can only perform real vectored I/O for vnodes that have no cache,
5749 	// because the I/O hook bypasses the cache entirely.
5750 	if (vnode->cache != NULL)
5751 		return B_UNSUPPORTED;
5752 
5753 	BStackOrHeapArray<generic_io_vec, 8> iovecs(count);
5754 	if (!iovecs.IsValid())
5755 		return B_NO_MEMORY;
5756 
5757 	generic_size_t length = 0;
5758 	for (int i = 0; i < count; i++) {
5759 		iovecs[i].base = (generic_addr_t)vecs[i].iov_base;
5760 		iovecs[i].length = vecs[i].iov_len;
5761 		length += vecs[i].iov_len;
5762 	}
5763 
5764 	status_t status = (write ? vfs_write_pages : vfs_read_pages)(vnode,
5765 		descriptor->cookie, pos, iovecs, count, 0, &length);
5766 	if (length > 0)
5767 		return length;
5768 	return status;
5769 }
5770 
5771 
5772 static ssize_t
5773 file_readv(struct file_descriptor* descriptor, off_t pos,
5774 	const struct iovec *vecs, int count)
5775 {
5776 	FUNCTION(("file_readv: pos %" B_PRIdOFF "\n", pos));
5777 	return file_vector_io(descriptor, pos, vecs, count, false);
5778 }
5779 
5780 
5781 static ssize_t
5782 file_writev(struct file_descriptor* descriptor, off_t pos,
5783 	const struct iovec *vecs, int count)
5784 {
5785 	FUNCTION(("file_writev: pos %" B_PRIdOFF "\n", pos));
5786 	return file_vector_io(descriptor, pos, vecs, count, true);
5787 }
5788 
5789 
5790 static off_t
5791 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5792 {
5793 	struct vnode* vnode = descriptor->u.vnode;
5794 	off_t offset;
5795 	bool isDevice = false;
5796 
5797 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5798 		seekType));
5799 
5800 	if (descriptor->pos == -1)
5801 		return ESPIPE;
5802 
5803 	switch (vnode->Type() & S_IFMT) {
5804 		// drivers publish block devices as chr, so pick both
5805 		case S_IFBLK:
5806 		case S_IFCHR:
5807 			isDevice = true;
5808 			break;
5809 	}
5810 
5811 	switch (seekType) {
5812 		case SEEK_SET:
5813 			offset = 0;
5814 			break;
5815 		case SEEK_CUR:
5816 			offset = descriptor->pos;
5817 			break;
5818 		case SEEK_END:
5819 		{
5820 			// stat() the node
5821 			if (!HAS_FS_CALL(vnode, read_stat))
5822 				return B_UNSUPPORTED;
5823 
5824 			struct stat stat;
5825 			status_t status = FS_CALL(vnode, read_stat, &stat);
5826 			if (status != B_OK)
5827 				return status;
5828 
5829 			offset = stat.st_size;
5830 
5831 			if (offset == 0 && isDevice) {
5832 				// stat() on regular drivers doesn't report size
5833 				device_geometry geometry;
5834 
5835 				if (HAS_FS_CALL(vnode, ioctl)) {
5836 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5837 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5838 					if (status == B_OK)
5839 						offset = (off_t)geometry.bytes_per_sector
5840 							* geometry.sectors_per_track
5841 							* geometry.cylinder_count
5842 							* geometry.head_count;
5843 				}
5844 			}
5845 
5846 			break;
5847 		}
5848 		case SEEK_DATA:
5849 		case SEEK_HOLE:
5850 		{
5851 			status_t status = B_BAD_VALUE;
5852 			if (HAS_FS_CALL(vnode, ioctl)) {
5853 				offset = pos;
5854 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5855 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5856 					&offset, sizeof(offset));
5857 				if (status == B_OK) {
5858 					if (offset > pos)
5859 						offset -= pos;
5860 					break;
5861 				}
5862 			}
5863 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5864 				return status;
5865 
5866 			// basic implementation with stat() the node
5867 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5868 				return B_BAD_VALUE;
5869 
5870 			struct stat stat;
5871 			status = FS_CALL(vnode, read_stat, &stat);
5872 			if (status != B_OK)
5873 				return status;
5874 
5875 			off_t end = stat.st_size;
5876 			if (pos >= end)
5877 				return ENXIO;
5878 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5879 			break;
5880 		}
5881 		default:
5882 			return B_BAD_VALUE;
5883 	}
5884 
5885 	// assumes off_t is 64 bits wide
5886 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5887 		return B_BUFFER_OVERFLOW;
5888 
5889 	pos += offset;
5890 	if (pos < 0)
5891 		return B_BAD_VALUE;
5892 
5893 	return descriptor->pos = pos;
5894 }
5895 
5896 
5897 static status_t
5898 file_select(struct file_descriptor* descriptor, uint8 event,
5899 	struct selectsync* sync)
5900 {
5901 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5902 
5903 	struct vnode* vnode = descriptor->u.vnode;
5904 
5905 	// If the FS has no select() hook, notify select() now.
5906 	if (!HAS_FS_CALL(vnode, select)) {
5907 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5908 			notify_select_event(sync, event);
5909 		return B_UNSUPPORTED;
5910 	}
5911 
5912 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5913 }
5914 
5915 
5916 static status_t
5917 file_deselect(struct file_descriptor* descriptor, uint8 event,
5918 	struct selectsync* sync)
5919 {
5920 	struct vnode* vnode = descriptor->u.vnode;
5921 
5922 	if (!HAS_FS_CALL(vnode, deselect))
5923 		return B_OK;
5924 
5925 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5926 }
5927 
5928 
5929 static status_t
5930 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5931 	bool kernel)
5932 {
5933 	struct vnode* vnode;
5934 	status_t status;
5935 
5936 	if (name == NULL || *name == '\0')
5937 		return B_BAD_VALUE;
5938 
5939 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5940 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5941 
5942 	status = get_vnode(mountID, parentID, &vnode, true, false);
5943 	if (status != B_OK)
5944 		return status;
5945 
5946 	if (HAS_FS_CALL(vnode, create_dir))
5947 		status = FS_CALL(vnode, create_dir, name, perms);
5948 	else
5949 		status = B_READ_ONLY_DEVICE;
5950 
5951 	put_vnode(vnode);
5952 	return status;
5953 }
5954 
5955 
5956 static status_t
5957 dir_create(int fd, char* path, int perms, bool kernel)
5958 {
5959 	char filename[B_FILE_NAME_LENGTH];
5960 	status_t status;
5961 
5962 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5963 		kernel));
5964 
5965 	VnodePutter vnode;
5966 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
5967 	if (status < 0)
5968 		return status;
5969 
5970 	if (HAS_FS_CALL(vnode, create_dir)) {
5971 		status = FS_CALL(vnode.Get(), create_dir, filename, perms);
5972 	} else
5973 		status = B_READ_ONLY_DEVICE;
5974 
5975 	return status;
5976 }
5977 
5978 
5979 static int
5980 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5981 {
5982 	FUNCTION(("dir_open_entry_ref()\n"));
5983 
5984 	if (name && name[0] == '\0')
5985 		return B_BAD_VALUE;
5986 
5987 	// get the vnode matching the entry_ref/node_ref
5988 	VnodePutter vnode;
5989 	status_t status;
5990 	if (name) {
5991 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5992 			vnode);
5993 	} else {
5994 		struct vnode* temp = NULL;
5995 		status = get_vnode(mountID, parentID, &temp, true, false);
5996 		vnode.SetTo(temp);
5997 	}
5998 	if (status != B_OK)
5999 		return status;
6000 
6001 	int newFD = open_dir_vnode(vnode.Get(), kernel);
6002 	if (newFD >= 0) {
6003 		cache_node_opened(vnode.Get(), vnode->cache, mountID, parentID,
6004 			vnode->id, name);
6005 
6006 		// The vnode reference has been transferred to the FD
6007 		vnode.Detach();
6008 	}
6009 
6010 	return newFD;
6011 }
6012 
6013 
6014 static int
6015 dir_open(int fd, char* path, bool kernel)
6016 {
6017 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
6018 		kernel));
6019 
6020 	// get the vnode matching the vnode + path combination
6021 	VnodePutter vnode;
6022 	ino_t parentID;
6023 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, &parentID,
6024 		kernel);
6025 	if (status != B_OK)
6026 		return status;
6027 
6028 	// open the dir
6029 	int newFD = open_dir_vnode(vnode.Get(), kernel);
6030 	if (newFD >= 0) {
6031 		cache_node_opened(vnode.Get(), vnode->cache, vnode->device,
6032 			parentID, vnode->id, NULL);
6033 
6034 		// The vnode reference has been transferred to the FD
6035 		vnode.Detach();
6036 	}
6037 
6038 	return newFD;
6039 }
6040 
6041 
6042 static status_t
6043 dir_close(struct file_descriptor* descriptor)
6044 {
6045 	struct vnode* vnode = descriptor->u.vnode;
6046 
6047 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6048 
6049 	cache_node_closed(vnode, vnode->cache, vnode->device,
6050 		vnode->id);
6051 	if (HAS_FS_CALL(vnode, close_dir))
6052 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6053 
6054 	return B_OK;
6055 }
6056 
6057 
6058 static void
6059 dir_free_fd(struct file_descriptor* descriptor)
6060 {
6061 	struct vnode* vnode = descriptor->u.vnode;
6062 
6063 	if (vnode != NULL) {
6064 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6065 		put_vnode(vnode);
6066 	}
6067 }
6068 
6069 
6070 static status_t
6071 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6072 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6073 {
6074 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6075 		bufferSize, _count);
6076 }
6077 
6078 
6079 static status_t
6080 fix_dirent(struct vnode* parent, struct dirent* entry,
6081 	struct io_context* ioContext)
6082 {
6083 	// set d_pdev and d_pino
6084 	entry->d_pdev = parent->device;
6085 	entry->d_pino = parent->id;
6086 
6087 	// If this is the ".." entry and the directory covering another vnode,
6088 	// we need to replace d_dev and d_ino with the actual values.
6089 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6090 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6091 			ioContext);
6092 	}
6093 
6094 	// resolve covered vnodes
6095 	ReadLocker _(&sVnodeLock);
6096 
6097 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6098 	if (vnode != NULL && vnode->covered_by != NULL) {
6099 		do {
6100 			vnode = vnode->covered_by;
6101 		} while (vnode->covered_by != NULL);
6102 
6103 		entry->d_dev = vnode->device;
6104 		entry->d_ino = vnode->id;
6105 	}
6106 
6107 	return B_OK;
6108 }
6109 
6110 
6111 static status_t
6112 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6113 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6114 {
6115 	if (!HAS_FS_CALL(vnode, read_dir))
6116 		return B_UNSUPPORTED;
6117 
6118 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6119 		_count);
6120 	if (error != B_OK)
6121 		return error;
6122 
6123 	// we need to adjust the read dirents
6124 	uint32 count = *_count;
6125 	for (uint32 i = 0; i < count; i++) {
6126 		error = fix_dirent(vnode, buffer, ioContext);
6127 		if (error != B_OK)
6128 			return error;
6129 
6130 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6131 	}
6132 
6133 	return error;
6134 }
6135 
6136 
6137 static status_t
6138 dir_rewind(struct file_descriptor* descriptor)
6139 {
6140 	struct vnode* vnode = descriptor->u.vnode;
6141 
6142 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6143 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6144 	}
6145 
6146 	return B_UNSUPPORTED;
6147 }
6148 
6149 
6150 static status_t
6151 dir_remove(int fd, char* path, bool kernel)
6152 {
6153 	char name[B_FILE_NAME_LENGTH];
6154 	status_t status;
6155 
6156 	if (path != NULL) {
6157 		// we need to make sure our path name doesn't stop with "/", ".",
6158 		// or ".."
6159 		char* lastSlash;
6160 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6161 			char* leaf = lastSlash + 1;
6162 			if (!strcmp(leaf, ".."))
6163 				return B_NOT_ALLOWED;
6164 
6165 			// omit multiple slashes
6166 			while (lastSlash > path && lastSlash[-1] == '/')
6167 				lastSlash--;
6168 
6169 			if (leaf[0]
6170 				&& strcmp(leaf, ".")) {
6171 				break;
6172 			}
6173 			// "name/" -> "name", or "name/." -> "name"
6174 			lastSlash[0] = '\0';
6175 		}
6176 
6177 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6178 			return B_NOT_ALLOWED;
6179 	}
6180 
6181 	VnodePutter directory;
6182 	status = fd_and_path_to_dir_vnode(fd, path, directory, name, kernel);
6183 	if (status != B_OK)
6184 		return status;
6185 
6186 	if (HAS_FS_CALL(directory, remove_dir))
6187 		status = FS_CALL(directory.Get(), remove_dir, name);
6188 	else
6189 		status = B_READ_ONLY_DEVICE;
6190 
6191 	return status;
6192 }
6193 
6194 
6195 static status_t
6196 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6197 	size_t length)
6198 {
6199 	struct vnode* vnode = descriptor->u.vnode;
6200 
6201 	if (HAS_FS_CALL(vnode, ioctl))
6202 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6203 
6204 	return B_DEV_INVALID_IOCTL;
6205 }
6206 
6207 
6208 static status_t
6209 common_fcntl(int fd, int op, size_t argument, bool kernel)
6210 {
6211 	struct flock flock;
6212 
6213 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6214 		fd, op, argument, kernel ? "kernel" : "user"));
6215 
6216 	struct io_context* context = get_current_io_context(kernel);
6217 
6218 	FileDescriptorPutter descriptor(get_fd(context, fd));
6219 	if (!descriptor.IsSet())
6220 		return B_FILE_ERROR;
6221 
6222 	struct vnode* vnode = fd_vnode(descriptor.Get());
6223 
6224 	status_t status = B_OK;
6225 
6226 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6227 		if (descriptor->ops != &sFileOps)
6228 			status = B_BAD_VALUE;
6229 		else if (kernel)
6230 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6231 		else if (user_memcpy(&flock, (struct flock*)argument,
6232 				sizeof(struct flock)) != B_OK)
6233 			status = B_BAD_ADDRESS;
6234 		if (status != B_OK)
6235 			return status;
6236 	}
6237 
6238 	switch (op) {
6239 		case F_SETFD:
6240 		{
6241 			// Set file descriptor flags
6242 
6243 			// O_CLOEXEC is the only flag available at this time
6244 			mutex_lock(&context->io_mutex);
6245 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6246 			mutex_unlock(&context->io_mutex);
6247 
6248 			status = B_OK;
6249 			break;
6250 		}
6251 
6252 		case F_GETFD:
6253 		{
6254 			// Get file descriptor flags
6255 			mutex_lock(&context->io_mutex);
6256 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6257 			mutex_unlock(&context->io_mutex);
6258 			break;
6259 		}
6260 
6261 		case F_SETFL:
6262 		{
6263 			// Set file descriptor open mode
6264 
6265 			// we only accept changes to certain flags
6266 			const int32 modifiableFlags = O_APPEND | O_NONBLOCK;
6267 			argument &= modifiableFlags;
6268 
6269 			if (descriptor->ops->fd_set_flags != NULL) {
6270 				status = descriptor->ops->fd_set_flags(descriptor.Get(), argument);
6271 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6272 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6273 					(int)argument);
6274 			} else
6275 				status = B_UNSUPPORTED;
6276 
6277 			if (status == B_OK) {
6278 				// update this descriptor's open_mode field
6279 				descriptor->open_mode = (descriptor->open_mode
6280 					& ~modifiableFlags) | argument;
6281 			}
6282 
6283 			break;
6284 		}
6285 
6286 		case F_GETFL:
6287 			// Get file descriptor open mode
6288 			status = descriptor->open_mode;
6289 			break;
6290 
6291 		case F_DUPFD:
6292 		case F_DUPFD_CLOEXEC:
6293 		{
6294 			status = new_fd_etc(context, descriptor.Get(), (int)argument);
6295 			if (status >= 0) {
6296 				mutex_lock(&context->io_mutex);
6297 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6298 				mutex_unlock(&context->io_mutex);
6299 
6300 				atomic_add(&descriptor->ref_count, 1);
6301 			}
6302 			break;
6303 		}
6304 
6305 		case F_GETLK:
6306 			if (vnode != NULL) {
6307 				struct flock normalizedLock;
6308 
6309 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6310 				status = normalize_flock(descriptor.Get(), &normalizedLock);
6311 				if (status != B_OK)
6312 					break;
6313 
6314 				if (HAS_FS_CALL(vnode, test_lock)) {
6315 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6316 						&normalizedLock);
6317 				} else
6318 					status = test_advisory_lock(vnode, &normalizedLock);
6319 				if (status == B_OK) {
6320 					if (normalizedLock.l_type == F_UNLCK) {
6321 						// no conflicting lock found, copy back the same struct
6322 						// we were given except change type to F_UNLCK
6323 						flock.l_type = F_UNLCK;
6324 						if (kernel) {
6325 							memcpy((struct flock*)argument, &flock,
6326 								sizeof(struct flock));
6327 						} else {
6328 							status = user_memcpy((struct flock*)argument,
6329 								&flock, sizeof(struct flock));
6330 						}
6331 					} else {
6332 						// a conflicting lock was found, copy back its range and
6333 						// type
6334 						if (normalizedLock.l_len == OFF_MAX)
6335 							normalizedLock.l_len = 0;
6336 
6337 						if (kernel) {
6338 							memcpy((struct flock*)argument,
6339 								&normalizedLock, sizeof(struct flock));
6340 						} else {
6341 							status = user_memcpy((struct flock*)argument,
6342 								&normalizedLock, sizeof(struct flock));
6343 						}
6344 					}
6345 				}
6346 			} else
6347 				status = B_BAD_VALUE;
6348 			break;
6349 
6350 		case F_SETLK:
6351 		case F_SETLKW:
6352 			status = normalize_flock(descriptor.Get(), &flock);
6353 			if (status != B_OK)
6354 				break;
6355 
6356 			if (vnode == NULL) {
6357 				status = B_BAD_VALUE;
6358 			} else if (flock.l_type == F_UNLCK) {
6359 				if (HAS_FS_CALL(vnode, release_lock)) {
6360 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6361 						&flock);
6362 				} else {
6363 					status = release_advisory_lock(vnode, context, NULL,
6364 						&flock);
6365 				}
6366 			} else {
6367 				// the open mode must match the lock type
6368 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6369 						&& flock.l_type == F_WRLCK)
6370 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6371 						&& flock.l_type == F_RDLCK))
6372 					status = B_FILE_ERROR;
6373 				else {
6374 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6375 						status = FS_CALL(vnode, acquire_lock,
6376 							descriptor->cookie, &flock, op == F_SETLKW);
6377 					} else {
6378 						status = acquire_advisory_lock(vnode, context, NULL,
6379 							&flock, op == F_SETLKW);
6380 					}
6381 				}
6382 			}
6383 			break;
6384 
6385 		// ToDo: add support for more ops?
6386 
6387 		default:
6388 			status = B_BAD_VALUE;
6389 	}
6390 
6391 	return status;
6392 }
6393 
6394 
6395 static status_t
6396 common_sync(int fd, bool kernel)
6397 {
6398 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6399 
6400 	struct vnode* vnode;
6401 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6402 	if (!descriptor.IsSet())
6403 		return B_FILE_ERROR;
6404 
6405 	status_t status;
6406 	if (HAS_FS_CALL(vnode, fsync))
6407 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6408 	else
6409 		status = B_UNSUPPORTED;
6410 
6411 	return status;
6412 }
6413 
6414 
6415 static status_t
6416 common_lock_node(int fd, bool kernel)
6417 {
6418 	struct vnode* vnode;
6419 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6420 	if (!descriptor.IsSet())
6421 		return B_FILE_ERROR;
6422 
6423 	status_t status = B_OK;
6424 
6425 	// We need to set the locking atomically - someone
6426 	// else might set one at the same time
6427 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6428 			descriptor.Get(), (file_descriptor*)NULL) != NULL)
6429 		status = B_BUSY;
6430 
6431 	return status;
6432 }
6433 
6434 
6435 static status_t
6436 common_unlock_node(int fd, bool kernel)
6437 {
6438 	struct vnode* vnode;
6439 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6440 	if (!descriptor.IsSet())
6441 		return B_FILE_ERROR;
6442 
6443 	status_t status = B_OK;
6444 
6445 	// We need to set the locking atomically - someone
6446 	// else might set one at the same time
6447 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6448 			(file_descriptor*)NULL, descriptor.Get()) != descriptor.Get())
6449 		status = B_BAD_VALUE;
6450 
6451 	return status;
6452 }
6453 
6454 
6455 static status_t
6456 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6457 {
6458 	if (offset < 0 || length == 0)
6459 		return B_BAD_VALUE;
6460 	if (offset > OFF_MAX - length)
6461 		return B_FILE_TOO_LARGE;
6462 
6463 	struct vnode* vnode;
6464 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6465 	if (!descriptor.IsSet() || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6466 		return B_FILE_ERROR;
6467 
6468 	switch (vnode->Type() & S_IFMT) {
6469 		case S_IFIFO:
6470 		case S_IFSOCK:
6471 			return ESPIPE;
6472 
6473 		case S_IFBLK:
6474 		case S_IFCHR:
6475 		case S_IFDIR:
6476 		case S_IFLNK:
6477 			return B_DEVICE_NOT_FOUND;
6478 
6479 		case S_IFREG:
6480 			break;
6481 	}
6482 
6483 	status_t status = B_OK;
6484 	if (HAS_FS_CALL(vnode, preallocate)) {
6485 		status = FS_CALL(vnode, preallocate, offset, length);
6486 	} else {
6487 		status = HAS_FS_CALL(vnode, write)
6488 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6489 	}
6490 
6491 	return status;
6492 }
6493 
6494 
6495 static status_t
6496 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6497 	bool kernel)
6498 {
6499 	VnodePutter vnode;
6500 	status_t status;
6501 
6502 	status = fd_and_path_to_vnode(fd, path, false, vnode, NULL, kernel);
6503 	if (status != B_OK)
6504 		return status;
6505 
6506 	if (HAS_FS_CALL(vnode, read_symlink)) {
6507 		status = FS_CALL(vnode.Get(), read_symlink, buffer, _bufferSize);
6508 	} else
6509 		status = B_BAD_VALUE;
6510 
6511 	return status;
6512 }
6513 
6514 
6515 static status_t
6516 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6517 	bool kernel)
6518 {
6519 	// path validity checks have to be in the calling function!
6520 	char name[B_FILE_NAME_LENGTH];
6521 	status_t status;
6522 
6523 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6524 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6525 
6526 	VnodePutter vnode;
6527 	status = fd_and_path_to_dir_vnode(fd, path, vnode, name, kernel);
6528 	if (status != B_OK)
6529 		return status;
6530 
6531 	if (HAS_FS_CALL(vnode, create_symlink))
6532 		status = FS_CALL(vnode.Get(), create_symlink, name, toPath, mode);
6533 	else {
6534 		status = HAS_FS_CALL(vnode, write)
6535 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6536 	}
6537 
6538 	return status;
6539 }
6540 
6541 
6542 static status_t
6543 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6544 	bool traverseLeafLink, bool kernel)
6545 {
6546 	// path validity checks have to be in the calling function!
6547 
6548 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6549 		toPath, kernel));
6550 
6551 	char name[B_FILE_NAME_LENGTH];
6552 	VnodePutter directory;
6553 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, directory, name,
6554 		kernel);
6555 	if (status != B_OK)
6556 		return status;
6557 
6558 	VnodePutter vnode;
6559 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, vnode, NULL,
6560 		kernel);
6561 	if (status != B_OK)
6562 		return status;
6563 
6564 	if (directory->mount != vnode->mount)
6565 		return B_CROSS_DEVICE_LINK;
6566 
6567 	if (HAS_FS_CALL(directory, link))
6568 		status = FS_CALL(directory.Get(), link, name, vnode.Get());
6569 	else
6570 		status = B_READ_ONLY_DEVICE;
6571 
6572 	return status;
6573 }
6574 
6575 
6576 static status_t
6577 common_unlink(int fd, char* path, bool kernel)
6578 {
6579 	char filename[B_FILE_NAME_LENGTH];
6580 	status_t status;
6581 
6582 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6583 		kernel));
6584 
6585 	VnodePutter vnode;
6586 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
6587 	if (status < 0)
6588 		return status;
6589 
6590 	if (HAS_FS_CALL(vnode, unlink))
6591 		status = FS_CALL(vnode.Get(), unlink, filename);
6592 	else
6593 		status = B_READ_ONLY_DEVICE;
6594 
6595 	return status;
6596 }
6597 
6598 
6599 static status_t
6600 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6601 {
6602 	status_t status;
6603 
6604 	// TODO: honor effectiveUserGroup argument
6605 
6606 	VnodePutter vnode;
6607 	status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
6608 	if (status != B_OK)
6609 		return status;
6610 
6611 	if (HAS_FS_CALL(vnode, access))
6612 		status = FS_CALL(vnode.Get(), access, mode);
6613 	else
6614 		status = B_OK;
6615 
6616 	return status;
6617 }
6618 
6619 
6620 static status_t
6621 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6622 {
6623 	status_t status;
6624 
6625 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6626 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6627 
6628 	VnodePutter fromVnode;
6629 	char fromName[B_FILE_NAME_LENGTH];
6630 	status = fd_and_path_to_dir_vnode(fd, path, fromVnode, fromName, kernel);
6631 	if (status != B_OK)
6632 		return status;
6633 
6634 	VnodePutter toVnode;
6635 	char toName[B_FILE_NAME_LENGTH];
6636 	status = fd_and_path_to_dir_vnode(newFD, newPath, toVnode, toName, kernel);
6637 	if (status != B_OK)
6638 		return status;
6639 
6640 	if (fromVnode->device != toVnode->device)
6641 		return B_CROSS_DEVICE_LINK;
6642 
6643 	if (fromVnode.Get() == toVnode.Get() && !strcmp(fromName, toName))
6644 		return B_OK;
6645 
6646 	if (fromName[0] == '\0' || toName[0] == '\0'
6647 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6648 		|| !strcmp(toName, ".") || !strcmp(toName, "..")) {
6649 		return B_BAD_VALUE;
6650 	}
6651 
6652 	if (HAS_FS_CALL(fromVnode, rename))
6653 		status = FS_CALL(fromVnode.Get(), rename, fromName, toVnode.Get(), toName);
6654 	else
6655 		status = B_READ_ONLY_DEVICE;
6656 
6657 	return status;
6658 }
6659 
6660 
6661 static status_t
6662 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6663 {
6664 	struct vnode* vnode = descriptor->u.vnode;
6665 
6666 	FUNCTION(("common_read_stat: stat %p\n", stat));
6667 
6668 	// TODO: remove this once all file systems properly set them!
6669 	stat->st_crtim.tv_nsec = 0;
6670 	stat->st_ctim.tv_nsec = 0;
6671 	stat->st_mtim.tv_nsec = 0;
6672 	stat->st_atim.tv_nsec = 0;
6673 
6674 	return vfs_stat_vnode(vnode, stat);
6675 }
6676 
6677 
6678 static status_t
6679 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6680 	int statMask)
6681 {
6682 	struct vnode* vnode = descriptor->u.vnode;
6683 
6684 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6685 		vnode, stat, statMask));
6686 
6687 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6688 		&& (statMask & B_STAT_SIZE) != 0) {
6689 		return B_BAD_VALUE;
6690 	}
6691 
6692 	if (!HAS_FS_CALL(vnode, write_stat))
6693 		return B_READ_ONLY_DEVICE;
6694 
6695 	return FS_CALL(vnode, write_stat, stat, statMask);
6696 }
6697 
6698 
6699 static status_t
6700 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6701 	struct stat* stat, bool kernel)
6702 {
6703 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6704 		stat));
6705 
6706 	VnodePutter vnode;
6707 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6708 		NULL, kernel);
6709 	if (status != B_OK)
6710 		return status;
6711 
6712 	status = vfs_stat_vnode(vnode.Get(), stat);
6713 
6714 	return status;
6715 }
6716 
6717 
6718 static status_t
6719 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6720 	const struct stat* stat, int statMask, bool kernel)
6721 {
6722 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6723 		"kernel %d\n", fd, path, stat, statMask, kernel));
6724 
6725 	VnodePutter vnode;
6726 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6727 		NULL, kernel);
6728 	if (status != B_OK)
6729 		return status;
6730 
6731 	if (HAS_FS_CALL(vnode, write_stat))
6732 		status = FS_CALL(vnode.Get(), write_stat, stat, statMask);
6733 	else
6734 		status = B_READ_ONLY_DEVICE;
6735 
6736 	return status;
6737 }
6738 
6739 
6740 static int
6741 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6742 {
6743 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6744 		kernel));
6745 
6746 	VnodePutter vnode;
6747 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6748 		NULL, kernel);
6749 	if (status != B_OK)
6750 		return status;
6751 
6752 	status = open_attr_dir_vnode(vnode.Get(), kernel);
6753 	if (status >= 0)
6754 		vnode.Detach();
6755 
6756 	return status;
6757 }
6758 
6759 
6760 static status_t
6761 attr_dir_close(struct file_descriptor* descriptor)
6762 {
6763 	struct vnode* vnode = descriptor->u.vnode;
6764 
6765 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6766 
6767 	if (HAS_FS_CALL(vnode, close_attr_dir))
6768 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6769 
6770 	return B_OK;
6771 }
6772 
6773 
6774 static void
6775 attr_dir_free_fd(struct file_descriptor* descriptor)
6776 {
6777 	struct vnode* vnode = descriptor->u.vnode;
6778 
6779 	if (vnode != NULL) {
6780 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6781 		put_vnode(vnode);
6782 	}
6783 }
6784 
6785 
6786 static status_t
6787 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6788 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6789 {
6790 	struct vnode* vnode = descriptor->u.vnode;
6791 
6792 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6793 
6794 	if (HAS_FS_CALL(vnode, read_attr_dir))
6795 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6796 			bufferSize, _count);
6797 
6798 	return B_UNSUPPORTED;
6799 }
6800 
6801 
6802 static status_t
6803 attr_dir_rewind(struct file_descriptor* descriptor)
6804 {
6805 	struct vnode* vnode = descriptor->u.vnode;
6806 
6807 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6808 
6809 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6810 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6811 
6812 	return B_UNSUPPORTED;
6813 }
6814 
6815 
6816 static int
6817 attr_create(int fd, char* path, const char* name, uint32 type,
6818 	int openMode, bool kernel)
6819 {
6820 	if (name == NULL || *name == '\0')
6821 		return B_BAD_VALUE;
6822 
6823 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6824 	VnodePutter vnode;
6825 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6826 		kernel);
6827 	if (status != B_OK)
6828 		return status;
6829 
6830 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6831 		return B_LINK_LIMIT;
6832 
6833 	if (!HAS_FS_CALL(vnode, create_attr))
6834 		return B_READ_ONLY_DEVICE;
6835 
6836 	void* cookie;
6837 	status = FS_CALL(vnode.Get(), create_attr, name, type, openMode, &cookie);
6838 	if (status != B_OK)
6839 		return status;
6840 
6841 	fd = get_new_fd(&sAttributeOps, NULL, vnode.Get(), cookie, openMode, kernel);
6842 	if (fd >= 0) {
6843 		vnode.Detach();
6844 		return fd;
6845 	}
6846 
6847 	status = fd;
6848 
6849 	FS_CALL(vnode.Get(), close_attr, cookie);
6850 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6851 
6852 	FS_CALL(vnode.Get(), remove_attr, name);
6853 
6854 	return status;
6855 }
6856 
6857 
6858 static int
6859 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6860 {
6861 	if (name == NULL || *name == '\0')
6862 		return B_BAD_VALUE;
6863 
6864 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6865 	VnodePutter vnode;
6866 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6867 		kernel);
6868 	if (status != B_OK)
6869 		return status;
6870 
6871 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6872 		return B_LINK_LIMIT;
6873 
6874 	if (!HAS_FS_CALL(vnode, open_attr))
6875 		return B_UNSUPPORTED;
6876 
6877 	void* cookie;
6878 	status = FS_CALL(vnode.Get(), open_attr, name, openMode, &cookie);
6879 	if (status != B_OK)
6880 		return status;
6881 
6882 	// now we only need a file descriptor for this attribute and we're done
6883 	fd = get_new_fd(&sAttributeOps, NULL, vnode.Get(), cookie, openMode, kernel);
6884 	if (fd >= 0) {
6885 		vnode.Detach();
6886 		return fd;
6887 	}
6888 
6889 	status = fd;
6890 
6891 	FS_CALL(vnode.Get(), close_attr, cookie);
6892 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6893 
6894 	return status;
6895 }
6896 
6897 
6898 static status_t
6899 attr_close(struct file_descriptor* descriptor)
6900 {
6901 	struct vnode* vnode = descriptor->u.vnode;
6902 
6903 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6904 
6905 	if (HAS_FS_CALL(vnode, close_attr))
6906 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6907 
6908 	return B_OK;
6909 }
6910 
6911 
6912 static void
6913 attr_free_fd(struct file_descriptor* descriptor)
6914 {
6915 	struct vnode* vnode = descriptor->u.vnode;
6916 
6917 	if (vnode != NULL) {
6918 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6919 		put_vnode(vnode);
6920 	}
6921 }
6922 
6923 
6924 static status_t
6925 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6926 	size_t* length)
6927 {
6928 	struct vnode* vnode = descriptor->u.vnode;
6929 
6930 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6931 		pos, length, *length));
6932 
6933 	if (!HAS_FS_CALL(vnode, read_attr))
6934 		return B_UNSUPPORTED;
6935 
6936 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6937 }
6938 
6939 
6940 static status_t
6941 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6942 	size_t* length)
6943 {
6944 	struct vnode* vnode = descriptor->u.vnode;
6945 
6946 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6947 		length));
6948 
6949 	if (!HAS_FS_CALL(vnode, write_attr))
6950 		return B_UNSUPPORTED;
6951 
6952 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6953 }
6954 
6955 
6956 static off_t
6957 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6958 {
6959 	off_t offset;
6960 
6961 	switch (seekType) {
6962 		case SEEK_SET:
6963 			offset = 0;
6964 			break;
6965 		case SEEK_CUR:
6966 			offset = descriptor->pos;
6967 			break;
6968 		case SEEK_END:
6969 		{
6970 			struct vnode* vnode = descriptor->u.vnode;
6971 			if (!HAS_FS_CALL(vnode, read_stat))
6972 				return B_UNSUPPORTED;
6973 
6974 			struct stat stat;
6975 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6976 				&stat);
6977 			if (status != B_OK)
6978 				return status;
6979 
6980 			offset = stat.st_size;
6981 			break;
6982 		}
6983 		default:
6984 			return B_BAD_VALUE;
6985 	}
6986 
6987 	// assumes off_t is 64 bits wide
6988 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6989 		return B_BUFFER_OVERFLOW;
6990 
6991 	pos += offset;
6992 	if (pos < 0)
6993 		return B_BAD_VALUE;
6994 
6995 	return descriptor->pos = pos;
6996 }
6997 
6998 
6999 static status_t
7000 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7001 {
7002 	struct vnode* vnode = descriptor->u.vnode;
7003 
7004 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7005 
7006 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7007 		return B_UNSUPPORTED;
7008 
7009 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7010 }
7011 
7012 
7013 static status_t
7014 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7015 	int statMask)
7016 {
7017 	struct vnode* vnode = descriptor->u.vnode;
7018 
7019 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7020 
7021 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7022 		return B_READ_ONLY_DEVICE;
7023 
7024 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7025 }
7026 
7027 
7028 static status_t
7029 attr_remove(int fd, const char* name, bool kernel)
7030 {
7031 	if (name == NULL || *name == '\0')
7032 		return B_BAD_VALUE;
7033 
7034 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7035 		kernel));
7036 
7037 	struct vnode* vnode;
7038 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
7039 	if (!descriptor.IsSet())
7040 		return B_FILE_ERROR;
7041 
7042 	status_t status;
7043 	if (HAS_FS_CALL(vnode, remove_attr))
7044 		status = FS_CALL(vnode, remove_attr, name);
7045 	else
7046 		status = B_READ_ONLY_DEVICE;
7047 
7048 	return status;
7049 }
7050 
7051 
7052 static status_t
7053 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7054 	bool kernel)
7055 {
7056 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7057 		|| *toName == '\0')
7058 		return B_BAD_VALUE;
7059 
7060 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7061 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7062 
7063 	struct vnode* fromVnode;
7064 	FileDescriptorPutter fromDescriptor(get_fd_and_vnode(fromFD, &fromVnode, kernel));
7065 	if (!fromDescriptor.IsSet())
7066 		return B_FILE_ERROR;
7067 
7068 	struct vnode* toVnode;
7069 	FileDescriptorPutter toDescriptor(get_fd_and_vnode(toFD, &toVnode, kernel));
7070 	if (!toDescriptor.IsSet())
7071 		return B_FILE_ERROR;
7072 
7073 	// are the files on the same volume?
7074 	if (fromVnode->device != toVnode->device)
7075 		return B_CROSS_DEVICE_LINK;
7076 
7077 	status_t status;
7078 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7079 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7080 	} else
7081 		status = B_READ_ONLY_DEVICE;
7082 
7083 	return status;
7084 }
7085 
7086 
7087 static int
7088 index_dir_open(dev_t mountID, bool kernel)
7089 {
7090 	struct fs_mount* mount;
7091 	void* cookie;
7092 
7093 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7094 		kernel));
7095 
7096 	status_t status = get_mount(mountID, &mount);
7097 	if (status != B_OK)
7098 		return status;
7099 
7100 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7101 		status = B_UNSUPPORTED;
7102 		goto error;
7103 	}
7104 
7105 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7106 	if (status != B_OK)
7107 		goto error;
7108 
7109 	// get fd for the index directory
7110 	int fd;
7111 	fd = get_new_fd(&sIndexDirectoryOps, mount, NULL, cookie, O_CLOEXEC, kernel);
7112 	if (fd >= 0)
7113 		return fd;
7114 
7115 	// something went wrong
7116 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7117 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7118 
7119 	status = fd;
7120 
7121 error:
7122 	put_mount(mount);
7123 	return status;
7124 }
7125 
7126 
7127 static status_t
7128 index_dir_close(struct file_descriptor* descriptor)
7129 {
7130 	struct fs_mount* mount = descriptor->u.mount;
7131 
7132 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7133 
7134 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7135 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7136 
7137 	return B_OK;
7138 }
7139 
7140 
7141 static void
7142 index_dir_free_fd(struct file_descriptor* descriptor)
7143 {
7144 	struct fs_mount* mount = descriptor->u.mount;
7145 
7146 	if (mount != NULL) {
7147 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7148 		put_mount(mount);
7149 	}
7150 }
7151 
7152 
7153 static status_t
7154 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7155 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7156 {
7157 	struct fs_mount* mount = descriptor->u.mount;
7158 
7159 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7160 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7161 			bufferSize, _count);
7162 	}
7163 
7164 	return B_UNSUPPORTED;
7165 }
7166 
7167 
7168 static status_t
7169 index_dir_rewind(struct file_descriptor* descriptor)
7170 {
7171 	struct fs_mount* mount = descriptor->u.mount;
7172 
7173 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7174 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7175 
7176 	return B_UNSUPPORTED;
7177 }
7178 
7179 
7180 static status_t
7181 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7182 	bool kernel)
7183 {
7184 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7185 		mountID, name, kernel));
7186 
7187 	struct fs_mount* mount;
7188 	status_t status = get_mount(mountID, &mount);
7189 	if (status != B_OK)
7190 		return status;
7191 
7192 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7193 		status = B_READ_ONLY_DEVICE;
7194 		goto out;
7195 	}
7196 
7197 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7198 
7199 out:
7200 	put_mount(mount);
7201 	return status;
7202 }
7203 
7204 
7205 #if 0
7206 static status_t
7207 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7208 {
7209 	struct vnode* vnode = descriptor->u.vnode;
7210 
7211 	// ToDo: currently unused!
7212 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7213 	if (!HAS_FS_CALL(vnode, read_index_stat))
7214 		return B_UNSUPPORTED;
7215 
7216 	return B_UNSUPPORTED;
7217 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7218 }
7219 
7220 
7221 static void
7222 index_free_fd(struct file_descriptor* descriptor)
7223 {
7224 	struct vnode* vnode = descriptor->u.vnode;
7225 
7226 	if (vnode != NULL) {
7227 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7228 		put_vnode(vnode);
7229 	}
7230 }
7231 #endif
7232 
7233 
7234 static status_t
7235 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7236 	bool kernel)
7237 {
7238 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7239 		mountID, name, kernel));
7240 
7241 	struct fs_mount* mount;
7242 	status_t status = get_mount(mountID, &mount);
7243 	if (status != B_OK)
7244 		return status;
7245 
7246 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7247 		status = B_UNSUPPORTED;
7248 		goto out;
7249 	}
7250 
7251 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7252 
7253 out:
7254 	put_mount(mount);
7255 	return status;
7256 }
7257 
7258 
7259 static status_t
7260 index_remove(dev_t mountID, const char* name, bool kernel)
7261 {
7262 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7263 		mountID, name, kernel));
7264 
7265 	struct fs_mount* mount;
7266 	status_t status = get_mount(mountID, &mount);
7267 	if (status != B_OK)
7268 		return status;
7269 
7270 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7271 		status = B_READ_ONLY_DEVICE;
7272 		goto out;
7273 	}
7274 
7275 	status = FS_MOUNT_CALL(mount, remove_index, name);
7276 
7277 out:
7278 	put_mount(mount);
7279 	return status;
7280 }
7281 
7282 
7283 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7284 		It would be nice if the FS would find some more kernel support
7285 		for them.
7286 		For example, query parsing should be moved into the kernel.
7287 */
7288 static int
7289 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7290 	int32 token, bool kernel)
7291 {
7292 	struct fs_mount* mount;
7293 	void* cookie;
7294 
7295 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7296 		device, query, kernel));
7297 
7298 	status_t status = get_mount(device, &mount);
7299 	if (status != B_OK)
7300 		return status;
7301 
7302 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7303 		status = B_UNSUPPORTED;
7304 		goto error;
7305 	}
7306 
7307 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7308 		&cookie);
7309 	if (status != B_OK)
7310 		goto error;
7311 
7312 	// get fd for the index directory
7313 	int fd;
7314 	fd = get_new_fd(&sQueryOps, mount, NULL, cookie, O_CLOEXEC, kernel);
7315 	if (fd >= 0)
7316 		return fd;
7317 
7318 	status = fd;
7319 
7320 	// something went wrong
7321 	FS_MOUNT_CALL(mount, close_query, cookie);
7322 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7323 
7324 error:
7325 	put_mount(mount);
7326 	return status;
7327 }
7328 
7329 
7330 static status_t
7331 query_close(struct file_descriptor* descriptor)
7332 {
7333 	struct fs_mount* mount = descriptor->u.mount;
7334 
7335 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7336 
7337 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7338 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7339 
7340 	return B_OK;
7341 }
7342 
7343 
7344 static void
7345 query_free_fd(struct file_descriptor* descriptor)
7346 {
7347 	struct fs_mount* mount = descriptor->u.mount;
7348 
7349 	if (mount != NULL) {
7350 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7351 		put_mount(mount);
7352 	}
7353 }
7354 
7355 
7356 static status_t
7357 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7358 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7359 {
7360 	struct fs_mount* mount = descriptor->u.mount;
7361 
7362 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7363 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7364 			bufferSize, _count);
7365 	}
7366 
7367 	return B_UNSUPPORTED;
7368 }
7369 
7370 
7371 static status_t
7372 query_rewind(struct file_descriptor* descriptor)
7373 {
7374 	struct fs_mount* mount = descriptor->u.mount;
7375 
7376 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7377 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7378 
7379 	return B_UNSUPPORTED;
7380 }
7381 
7382 
7383 //	#pragma mark - General File System functions
7384 
7385 
7386 static dev_t
7387 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7388 	const char* args, bool kernel)
7389 {
7390 	struct ::fs_mount* mount;
7391 	status_t status = B_OK;
7392 	fs_volume* volume = NULL;
7393 	int32 layer = 0;
7394 	Vnode* coveredNode = NULL;
7395 
7396 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7397 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7398 
7399 	// The path is always safe, we just have to make sure that fsName is
7400 	// almost valid - we can't make any assumptions about args, though.
7401 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7402 	// We'll get it from the DDM later.
7403 	if (fsName == NULL) {
7404 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7405 			return B_BAD_VALUE;
7406 	} else if (fsName[0] == '\0')
7407 		return B_BAD_VALUE;
7408 
7409 	RecursiveLocker mountOpLocker(sMountOpLock);
7410 
7411 	// Helper to delete a newly created file device on failure.
7412 	// Not exactly beautiful, but helps to keep the code below cleaner.
7413 	struct FileDeviceDeleter {
7414 		FileDeviceDeleter() : id(-1) {}
7415 		~FileDeviceDeleter()
7416 		{
7417 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7418 		}
7419 
7420 		partition_id id;
7421 	} fileDeviceDeleter;
7422 
7423 	// If the file system is not a "virtual" one, the device argument should
7424 	// point to a real file/device (if given at all).
7425 	// get the partition
7426 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7427 	KPartition* partition = NULL;
7428 	KPath normalizedDevice;
7429 	bool newlyCreatedFileDevice = false;
7430 
7431 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7432 		// normalize the device path
7433 		status = normalizedDevice.SetTo(device, true);
7434 		if (status != B_OK)
7435 			return status;
7436 
7437 		// get a corresponding partition from the DDM
7438 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7439 		if (partition == NULL) {
7440 			// Partition not found: This either means, the user supplied
7441 			// an invalid path, or the path refers to an image file. We try
7442 			// to let the DDM create a file device for the path.
7443 			partition_id deviceID = ddm->CreateFileDevice(
7444 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7445 			if (deviceID >= 0) {
7446 				partition = ddm->RegisterPartition(deviceID);
7447 				if (newlyCreatedFileDevice)
7448 					fileDeviceDeleter.id = deviceID;
7449 			}
7450 		}
7451 
7452 		if (!partition) {
7453 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7454 				normalizedDevice.Path()));
7455 			return B_ENTRY_NOT_FOUND;
7456 		}
7457 
7458 		device = normalizedDevice.Path();
7459 			// correct path to file device
7460 	}
7461 	PartitionRegistrar partitionRegistrar(partition, true);
7462 
7463 	// Write lock the partition's device. For the time being, we keep the lock
7464 	// until we're done mounting -- not nice, but ensure, that no-one is
7465 	// interfering.
7466 	// TODO: Just mark the partition busy while mounting!
7467 	KDiskDevice* diskDevice = NULL;
7468 	if (partition) {
7469 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7470 		if (!diskDevice) {
7471 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7472 			return B_ERROR;
7473 		}
7474 	}
7475 
7476 	DeviceWriteLocker writeLocker(diskDevice, true);
7477 		// this takes over the write lock acquired before
7478 
7479 	if (partition != NULL) {
7480 		// make sure, that the partition is not busy
7481 		if (partition->IsBusy()) {
7482 			TRACE(("fs_mount(): Partition is busy.\n"));
7483 			return B_BUSY;
7484 		}
7485 
7486 		// if no FS name had been supplied, we get it from the partition
7487 		if (fsName == NULL) {
7488 			KDiskSystem* diskSystem = partition->DiskSystem();
7489 			if (!diskSystem) {
7490 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7491 					"recognize it.\n"));
7492 				return B_BAD_VALUE;
7493 			}
7494 
7495 			if (!diskSystem->IsFileSystem()) {
7496 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7497 					"partitioning system.\n"));
7498 				return B_BAD_VALUE;
7499 			}
7500 
7501 			// The disk system name will not change, and the KDiskSystem
7502 			// object will not go away while the disk device is locked (and
7503 			// the partition has a reference to it), so this is safe.
7504 			fsName = diskSystem->Name();
7505 		}
7506 	}
7507 
7508 	mount = new(std::nothrow) (struct ::fs_mount);
7509 	if (mount == NULL)
7510 		return B_NO_MEMORY;
7511 
7512 	mount->device_name = strdup(device);
7513 		// "device" can be NULL
7514 
7515 	status = mount->entry_cache.Init();
7516 	if (status != B_OK)
7517 		goto err1;
7518 
7519 	// initialize structure
7520 	mount->id = sNextMountID++;
7521 	mount->partition = NULL;
7522 	mount->root_vnode = NULL;
7523 	mount->covers_vnode = NULL;
7524 	mount->unmounting = false;
7525 	mount->owns_file_device = false;
7526 	mount->volume = NULL;
7527 
7528 	// build up the volume(s)
7529 	while (true) {
7530 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7531 		if (layerFSName == NULL) {
7532 			if (layer == 0) {
7533 				status = B_NO_MEMORY;
7534 				goto err1;
7535 			}
7536 
7537 			break;
7538 		}
7539 		MemoryDeleter layerFSNameDeleter(layerFSName);
7540 
7541 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7542 		if (volume == NULL) {
7543 			status = B_NO_MEMORY;
7544 			goto err1;
7545 		}
7546 
7547 		volume->id = mount->id;
7548 		volume->partition = partition != NULL ? partition->ID() : -1;
7549 		volume->layer = layer++;
7550 		volume->private_volume = NULL;
7551 		volume->ops = NULL;
7552 		volume->sub_volume = NULL;
7553 		volume->super_volume = NULL;
7554 		volume->file_system = NULL;
7555 		volume->file_system_name = NULL;
7556 
7557 		volume->file_system_name = get_file_system_name(layerFSName);
7558 		if (volume->file_system_name == NULL) {
7559 			status = B_NO_MEMORY;
7560 			free(volume);
7561 			goto err1;
7562 		}
7563 
7564 		volume->file_system = get_file_system(layerFSName);
7565 		if (volume->file_system == NULL) {
7566 			status = B_DEVICE_NOT_FOUND;
7567 			free(volume->file_system_name);
7568 			free(volume);
7569 			goto err1;
7570 		}
7571 
7572 		if (mount->volume == NULL)
7573 			mount->volume = volume;
7574 		else {
7575 			volume->super_volume = mount->volume;
7576 			mount->volume->sub_volume = volume;
7577 			mount->volume = volume;
7578 		}
7579 	}
7580 
7581 	// insert mount struct into list before we call FS's mount() function
7582 	// so that vnodes can be created for this mount
7583 	rw_lock_write_lock(&sMountLock);
7584 	sMountsTable->Insert(mount);
7585 	rw_lock_write_unlock(&sMountLock);
7586 
7587 	ino_t rootID;
7588 
7589 	if (!sRoot) {
7590 		// we haven't mounted anything yet
7591 		if (strcmp(path, "/") != 0) {
7592 			status = B_ERROR;
7593 			goto err2;
7594 		}
7595 
7596 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7597 			args, &rootID);
7598 		if (status != B_OK || mount->volume->ops == NULL)
7599 			goto err2;
7600 	} else {
7601 		{
7602 			VnodePutter temp;
7603 			status = path_to_vnode(path, true, temp, NULL, kernel);
7604 			coveredNode = temp.Detach();
7605 		}
7606 		if (status != B_OK)
7607 			goto err2;
7608 
7609 		mount->covers_vnode = coveredNode;
7610 
7611 		// make sure covered_vnode is a directory
7612 		if (!S_ISDIR(coveredNode->Type())) {
7613 			status = B_NOT_A_DIRECTORY;
7614 			goto err3;
7615 		}
7616 
7617 		if (coveredNode->IsCovered()) {
7618 			// this is already a covered vnode
7619 			status = B_BUSY;
7620 			goto err3;
7621 		}
7622 
7623 		// mount it/them
7624 		fs_volume* volume = mount->volume;
7625 		while (volume) {
7626 			status = volume->file_system->mount(volume, device, flags, args,
7627 				&rootID);
7628 			if (status != B_OK || volume->ops == NULL) {
7629 				if (status == B_OK && volume->ops == NULL)
7630 					panic("fs_mount: mount() succeeded but ops is NULL!");
7631 				if (volume->sub_volume)
7632 					goto err4;
7633 				goto err3;
7634 			}
7635 
7636 			volume = volume->super_volume;
7637 		}
7638 
7639 		volume = mount->volume;
7640 		while (volume) {
7641 			if (volume->ops->all_layers_mounted != NULL)
7642 				volume->ops->all_layers_mounted(volume);
7643 			volume = volume->super_volume;
7644 		}
7645 	}
7646 
7647 	// the root node is supposed to be owned by the file system - it must
7648 	// exist at this point
7649 	rw_lock_write_lock(&sVnodeLock);
7650 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7651 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7652 		panic("fs_mount: file system does not own its root node!\n");
7653 		status = B_ERROR;
7654 		rw_lock_write_unlock(&sVnodeLock);
7655 		goto err4;
7656 	}
7657 
7658 	// set up the links between the root vnode and the vnode it covers
7659 	if (coveredNode != NULL) {
7660 		if (coveredNode->IsCovered()) {
7661 			// the vnode is covered now
7662 			status = B_BUSY;
7663 			rw_lock_write_unlock(&sVnodeLock);
7664 			goto err4;
7665 		}
7666 
7667 		mount->root_vnode->covers = coveredNode;
7668 		mount->root_vnode->SetCovering(true);
7669 
7670 		coveredNode->covered_by = mount->root_vnode;
7671 		coveredNode->SetCovered(true);
7672 	}
7673 	rw_lock_write_unlock(&sVnodeLock);
7674 
7675 	if (!sRoot) {
7676 		sRoot = mount->root_vnode;
7677 		mutex_lock(&sIOContextRootLock);
7678 		get_current_io_context(true)->root = sRoot;
7679 		mutex_unlock(&sIOContextRootLock);
7680 		inc_vnode_ref_count(sRoot);
7681 	}
7682 
7683 	// supply the partition (if any) with the mount cookie and mark it mounted
7684 	if (partition) {
7685 		partition->SetMountCookie(mount->volume->private_volume);
7686 		partition->SetVolumeID(mount->id);
7687 
7688 		// keep a partition reference as long as the partition is mounted
7689 		partitionRegistrar.Detach();
7690 		mount->partition = partition;
7691 		mount->owns_file_device = newlyCreatedFileDevice;
7692 		fileDeviceDeleter.id = -1;
7693 	}
7694 
7695 	notify_mount(mount->id,
7696 		coveredNode != NULL ? coveredNode->device : -1,
7697 		coveredNode ? coveredNode->id : -1);
7698 
7699 	return mount->id;
7700 
7701 err4:
7702 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7703 err3:
7704 	if (coveredNode != NULL)
7705 		put_vnode(coveredNode);
7706 err2:
7707 	rw_lock_write_lock(&sMountLock);
7708 	sMountsTable->Remove(mount);
7709 	rw_lock_write_unlock(&sMountLock);
7710 err1:
7711 	delete mount;
7712 
7713 	return status;
7714 }
7715 
7716 
7717 static status_t
7718 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7719 {
7720 	struct fs_mount* mount;
7721 	status_t err;
7722 
7723 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7724 		mountID, kernel));
7725 
7726 	VnodePutter pathVnode;
7727 	if (path != NULL) {
7728 		err = path_to_vnode(path, true, pathVnode, NULL, kernel);
7729 		if (err != B_OK)
7730 			return B_ENTRY_NOT_FOUND;
7731 	}
7732 
7733 	RecursiveLocker mountOpLocker(sMountOpLock);
7734 	ReadLocker mountLocker(sMountLock);
7735 
7736 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7737 	if (mount == NULL) {
7738 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7739 			pathVnode.Get());
7740 	}
7741 
7742 	mountLocker.Unlock();
7743 
7744 	if (path != NULL) {
7745 		if (mount->root_vnode != pathVnode.Get()) {
7746 			// not mountpoint
7747 			return B_BAD_VALUE;
7748 		}
7749 
7750 		pathVnode.Unset();
7751 	}
7752 
7753 	// if the volume is associated with a partition, lock the device of the
7754 	// partition as long as we are unmounting
7755 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7756 	KPartition* partition = mount->partition;
7757 	KDiskDevice* diskDevice = NULL;
7758 	if (partition != NULL) {
7759 		if (partition->Device() == NULL) {
7760 			dprintf("fs_unmount(): There is no device!\n");
7761 			return B_ERROR;
7762 		}
7763 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7764 		if (!diskDevice) {
7765 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7766 			return B_ERROR;
7767 		}
7768 	}
7769 	DeviceWriteLocker writeLocker(diskDevice, true);
7770 
7771 	// make sure, that the partition is not busy
7772 	if (partition != NULL) {
7773 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7774 			dprintf("fs_unmount(): Partition is busy.\n");
7775 			return B_BUSY;
7776 		}
7777 	}
7778 
7779 	// grab the vnode master mutex to keep someone from creating
7780 	// a vnode while we're figuring out if we can continue
7781 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7782 
7783 	bool disconnectedDescriptors = false;
7784 
7785 	while (true) {
7786 		bool busy = false;
7787 
7788 		// cycle through the list of vnodes associated with this mount and
7789 		// make sure all of them are not busy or have refs on them
7790 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7791 		while (struct vnode* vnode = iterator.Next()) {
7792 			if (vnode->IsBusy()) {
7793 				dprintf("fs_unmount(): inode %" B_PRIdINO " is busy\n", vnode->id);
7794 				busy = true;
7795 				break;
7796 			}
7797 
7798 			// check the vnode's ref count -- subtract additional references for
7799 			// covering
7800 			int32 refCount = vnode->ref_count;
7801 			if (vnode->covers != NULL)
7802 				refCount--;
7803 			if (vnode->covered_by != NULL)
7804 				refCount--;
7805 
7806 			if (refCount != 0) {
7807 				dprintf("fs_unmount(): inode %" B_PRIdINO " is still referenced\n", vnode->id);
7808 				// there are still vnodes in use on this mount, so we cannot
7809 				// unmount yet
7810 				busy = true;
7811 				break;
7812 			}
7813 		}
7814 
7815 		if (!busy)
7816 			break;
7817 
7818 		if ((flags & B_FORCE_UNMOUNT) == 0)
7819 			return B_BUSY;
7820 
7821 		if (disconnectedDescriptors) {
7822 			// wait a bit until the last access is finished, and then try again
7823 			vnodesWriteLocker.Unlock();
7824 			snooze(100000);
7825 			// TODO: if there is some kind of bug that prevents the ref counts
7826 			// from getting back to zero, this will fall into an endless loop...
7827 			vnodesWriteLocker.Lock();
7828 			continue;
7829 		}
7830 
7831 		// the file system is still busy - but we're forced to unmount it,
7832 		// so let's disconnect all open file descriptors
7833 
7834 		mount->unmounting = true;
7835 			// prevent new vnodes from being created
7836 
7837 		vnodesWriteLocker.Unlock();
7838 
7839 		disconnect_mount_or_vnode_fds(mount, NULL);
7840 		disconnectedDescriptors = true;
7841 
7842 		vnodesWriteLocker.Lock();
7843 	}
7844 
7845 	// We can safely continue. Mark all of the vnodes busy and this mount
7846 	// structure in unmounting state. Also undo the vnode covers/covered_by
7847 	// links.
7848 	mount->unmounting = true;
7849 
7850 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7851 	while (struct vnode* vnode = iterator.Next()) {
7852 		// Remove all covers/covered_by links from other mounts' nodes to this
7853 		// vnode and adjust the node ref count accordingly. We will release the
7854 		// references to the external vnodes below.
7855 		if (Vnode* coveredNode = vnode->covers) {
7856 			if (Vnode* coveringNode = vnode->covered_by) {
7857 				// We have both covered and covering vnodes, so just remove us
7858 				// from the chain.
7859 				coveredNode->covered_by = coveringNode;
7860 				coveringNode->covers = coveredNode;
7861 				vnode->ref_count -= 2;
7862 
7863 				vnode->covered_by = NULL;
7864 				vnode->covers = NULL;
7865 				vnode->SetCovering(false);
7866 				vnode->SetCovered(false);
7867 			} else {
7868 				// We only have a covered vnode. Remove its link to us.
7869 				coveredNode->covered_by = NULL;
7870 				coveredNode->SetCovered(false);
7871 				vnode->ref_count--;
7872 
7873 				// If the other node is an external vnode, we keep its link
7874 				// link around so we can put the reference later on. Otherwise
7875 				// we get rid of it right now.
7876 				if (coveredNode->mount == mount) {
7877 					vnode->covers = NULL;
7878 					coveredNode->ref_count--;
7879 				}
7880 			}
7881 		} else if (Vnode* coveringNode = vnode->covered_by) {
7882 			// We only have a covering vnode. Remove its link to us.
7883 			coveringNode->covers = NULL;
7884 			coveringNode->SetCovering(false);
7885 			vnode->ref_count--;
7886 
7887 			// If the other node is an external vnode, we keep its link
7888 			// link around so we can put the reference later on. Otherwise
7889 			// we get rid of it right now.
7890 			if (coveringNode->mount == mount) {
7891 				vnode->covered_by = NULL;
7892 				coveringNode->ref_count--;
7893 			}
7894 		}
7895 
7896 		vnode->SetBusy(true);
7897 		vnode_to_be_freed(vnode);
7898 	}
7899 
7900 	vnodesWriteLocker.Unlock();
7901 
7902 	// Free all vnodes associated with this mount.
7903 	// They will be removed from the mount list by free_vnode(), so
7904 	// we don't have to do this.
7905 	while (struct vnode* vnode = mount->vnodes.Head()) {
7906 		// Put the references to external covered/covering vnodes we kept above.
7907 		if (Vnode* coveredNode = vnode->covers)
7908 			put_vnode(coveredNode);
7909 		if (Vnode* coveringNode = vnode->covered_by)
7910 			put_vnode(coveringNode);
7911 
7912 		free_vnode(vnode, false);
7913 	}
7914 
7915 	// remove the mount structure from the hash table
7916 	rw_lock_write_lock(&sMountLock);
7917 	sMountsTable->Remove(mount);
7918 	rw_lock_write_unlock(&sMountLock);
7919 
7920 	mountOpLocker.Unlock();
7921 
7922 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7923 	notify_unmount(mount->id);
7924 
7925 	// dereference the partition and mark it unmounted
7926 	if (partition) {
7927 		partition->SetVolumeID(-1);
7928 		partition->SetMountCookie(NULL);
7929 
7930 		if (mount->owns_file_device)
7931 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7932 		partition->Unregister();
7933 	}
7934 
7935 	delete mount;
7936 	return B_OK;
7937 }
7938 
7939 
7940 static status_t
7941 fs_sync(dev_t device)
7942 {
7943 	struct fs_mount* mount;
7944 	status_t status = get_mount(device, &mount);
7945 	if (status != B_OK)
7946 		return status;
7947 
7948 	struct vnode marker;
7949 	memset(&marker, 0, sizeof(marker));
7950 	marker.SetBusy(true);
7951 	marker.SetRemoved(true);
7952 
7953 	// First, synchronize all file caches
7954 
7955 	while (true) {
7956 		WriteLocker locker(sVnodeLock);
7957 			// Note: That's the easy way. Which is probably OK for sync(),
7958 			// since it's a relatively rare call and doesn't need to allow for
7959 			// a lot of concurrency. Using a read lock would be possible, but
7960 			// also more involved, since we had to lock the individual nodes
7961 			// and take care of the locking order, which we might not want to
7962 			// do while holding fs_mount::lock.
7963 
7964 		// synchronize access to vnode list
7965 		mutex_lock(&mount->lock);
7966 
7967 		struct vnode* vnode;
7968 		if (!marker.IsRemoved()) {
7969 			vnode = mount->vnodes.GetNext(&marker);
7970 			mount->vnodes.Remove(&marker);
7971 			marker.SetRemoved(true);
7972 		} else
7973 			vnode = mount->vnodes.First();
7974 
7975 		while (vnode != NULL && (vnode->cache == NULL
7976 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7977 			// TODO: we could track writes (and writable mapped vnodes)
7978 			//	and have a simple flag that we could test for here
7979 			vnode = mount->vnodes.GetNext(vnode);
7980 		}
7981 
7982 		if (vnode != NULL) {
7983 			// insert marker vnode again
7984 			mount->vnodes.InsertBefore(mount->vnodes.GetNext(vnode), &marker);
7985 			marker.SetRemoved(false);
7986 		}
7987 
7988 		mutex_unlock(&mount->lock);
7989 
7990 		if (vnode == NULL)
7991 			break;
7992 
7993 		vnode = lookup_vnode(mount->id, vnode->id);
7994 		if (vnode == NULL || vnode->IsBusy())
7995 			continue;
7996 
7997 		if (vnode->ref_count == 0) {
7998 			// this vnode has been unused before
7999 			vnode_used(vnode);
8000 		}
8001 		inc_vnode_ref_count(vnode);
8002 
8003 		locker.Unlock();
8004 
8005 		if (vnode->cache != NULL && !vnode->IsRemoved())
8006 			vnode->cache->WriteModified();
8007 
8008 		put_vnode(vnode);
8009 	}
8010 
8011 	// Let the file systems do their synchronizing work
8012 	if (HAS_FS_MOUNT_CALL(mount, sync))
8013 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8014 
8015 	// Finally, flush the underlying device's write cache (if possible.)
8016 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8017 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8018 
8019 	put_mount(mount);
8020 	return status;
8021 }
8022 
8023 
8024 static status_t
8025 fs_read_info(dev_t device, struct fs_info* info)
8026 {
8027 	struct fs_mount* mount;
8028 	status_t status = get_mount(device, &mount);
8029 	if (status != B_OK)
8030 		return status;
8031 
8032 	memset(info, 0, sizeof(struct fs_info));
8033 
8034 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8035 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8036 
8037 	// fill in info the file system doesn't (have to) know about
8038 	if (status == B_OK) {
8039 		info->dev = mount->id;
8040 		info->root = mount->root_vnode->id;
8041 
8042 		fs_volume* volume = mount->volume;
8043 		while (volume->super_volume != NULL)
8044 			volume = volume->super_volume;
8045 
8046 		strlcpy(info->fsh_name, volume->file_system_name,
8047 			sizeof(info->fsh_name));
8048 		if (mount->device_name != NULL) {
8049 			strlcpy(info->device_name, mount->device_name,
8050 				sizeof(info->device_name));
8051 		}
8052 	}
8053 
8054 	// if the call is not supported by the file system, there are still
8055 	// the parts that we filled out ourselves
8056 
8057 	put_mount(mount);
8058 	return status;
8059 }
8060 
8061 
8062 static status_t
8063 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8064 {
8065 	struct fs_mount* mount;
8066 	status_t status = get_mount(device, &mount);
8067 	if (status != B_OK)
8068 		return status;
8069 
8070 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8071 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8072 	else
8073 		status = B_READ_ONLY_DEVICE;
8074 
8075 	put_mount(mount);
8076 	return status;
8077 }
8078 
8079 
8080 static dev_t
8081 fs_next_device(int32* _cookie)
8082 {
8083 	struct fs_mount* mount = NULL;
8084 	dev_t device = *_cookie;
8085 
8086 	rw_lock_read_lock(&sMountLock);
8087 
8088 	// Since device IDs are assigned sequentially, this algorithm
8089 	// does work good enough. It makes sure that the device list
8090 	// returned is sorted, and that no device is skipped when an
8091 	// already visited device got unmounted.
8092 
8093 	while (device < sNextMountID) {
8094 		mount = find_mount(device++);
8095 		if (mount != NULL && mount->volume->private_volume != NULL)
8096 			break;
8097 	}
8098 
8099 	*_cookie = device;
8100 
8101 	if (mount != NULL)
8102 		device = mount->id;
8103 	else
8104 		device = B_BAD_VALUE;
8105 
8106 	rw_lock_read_unlock(&sMountLock);
8107 
8108 	return device;
8109 }
8110 
8111 
8112 ssize_t
8113 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8114 	void *buffer, size_t readBytes)
8115 {
8116 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8117 	if (attrFD < 0)
8118 		return attrFD;
8119 
8120 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8121 
8122 	_kern_close(attrFD);
8123 
8124 	return bytesRead;
8125 }
8126 
8127 
8128 static status_t
8129 get_cwd(char* buffer, size_t size, bool kernel)
8130 {
8131 	// Get current working directory from io context
8132 	struct io_context* context = get_current_io_context(kernel);
8133 	status_t status;
8134 
8135 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8136 
8137 	mutex_lock(&context->io_mutex);
8138 
8139 	struct vnode* vnode = context->cwd;
8140 	if (vnode)
8141 		inc_vnode_ref_count(vnode);
8142 
8143 	mutex_unlock(&context->io_mutex);
8144 
8145 	if (vnode) {
8146 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8147 		put_vnode(vnode);
8148 	} else
8149 		status = B_ERROR;
8150 
8151 	return status;
8152 }
8153 
8154 
8155 static status_t
8156 set_cwd(int fd, char* path, bool kernel)
8157 {
8158 	struct io_context* context;
8159 	struct vnode* oldDirectory;
8160 
8161 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8162 
8163 	// Get vnode for passed path, and bail if it failed
8164 	VnodePutter vnode;
8165 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
8166 	if (status < 0)
8167 		return status;
8168 
8169 	if (!S_ISDIR(vnode->Type())) {
8170 		// nope, can't cwd to here
8171 		return B_NOT_A_DIRECTORY;
8172 	}
8173 
8174 	// We need to have the permission to enter the directory, too
8175 	if (HAS_FS_CALL(vnode, access)) {
8176 		status = FS_CALL(vnode.Get(), access, X_OK);
8177 		if (status != B_OK)
8178 			return status;
8179 	}
8180 
8181 	// Get current io context and lock
8182 	context = get_current_io_context(kernel);
8183 	mutex_lock(&context->io_mutex);
8184 
8185 	// save the old current working directory first
8186 	oldDirectory = context->cwd;
8187 	context->cwd = vnode.Detach();
8188 
8189 	mutex_unlock(&context->io_mutex);
8190 
8191 	if (oldDirectory)
8192 		put_vnode(oldDirectory);
8193 
8194 	return B_NO_ERROR;
8195 }
8196 
8197 
8198 static status_t
8199 user_copy_name(char* to, const char* from, size_t length)
8200 {
8201 	ssize_t len = user_strlcpy(to, from, length);
8202 	if (len < 0)
8203 		return len;
8204 	if (len >= (ssize_t)length)
8205 		return B_NAME_TOO_LONG;
8206 	return B_OK;
8207 }
8208 
8209 
8210 //	#pragma mark - kernel mirrored syscalls
8211 
8212 
8213 dev_t
8214 _kern_mount(const char* path, const char* device, const char* fsName,
8215 	uint32 flags, const char* args, size_t argsLength)
8216 {
8217 	KPath pathBuffer(path);
8218 	if (pathBuffer.InitCheck() != B_OK)
8219 		return B_NO_MEMORY;
8220 
8221 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8222 }
8223 
8224 
8225 status_t
8226 _kern_unmount(const char* path, uint32 flags)
8227 {
8228 	KPath pathBuffer(path);
8229 	if (pathBuffer.InitCheck() != B_OK)
8230 		return B_NO_MEMORY;
8231 
8232 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8233 }
8234 
8235 
8236 status_t
8237 _kern_read_fs_info(dev_t device, struct fs_info* info)
8238 {
8239 	if (info == NULL)
8240 		return B_BAD_VALUE;
8241 
8242 	return fs_read_info(device, info);
8243 }
8244 
8245 
8246 status_t
8247 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8248 {
8249 	if (info == NULL)
8250 		return B_BAD_VALUE;
8251 
8252 	return fs_write_info(device, info, mask);
8253 }
8254 
8255 
8256 status_t
8257 _kern_sync(void)
8258 {
8259 	// Note: _kern_sync() is also called from _user_sync()
8260 	int32 cookie = 0;
8261 	dev_t device;
8262 	while ((device = next_dev(&cookie)) >= 0) {
8263 		status_t status = fs_sync(device);
8264 		if (status != B_OK && status != B_BAD_VALUE) {
8265 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8266 				strerror(status));
8267 		}
8268 	}
8269 
8270 	return B_OK;
8271 }
8272 
8273 
8274 dev_t
8275 _kern_next_device(int32* _cookie)
8276 {
8277 	return fs_next_device(_cookie);
8278 }
8279 
8280 
8281 status_t
8282 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8283 	size_t infoSize)
8284 {
8285 	if (infoSize != sizeof(fd_info))
8286 		return B_BAD_VALUE;
8287 
8288 	// get the team
8289 	Team* team = Team::Get(teamID);
8290 	if (team == NULL)
8291 		return B_BAD_TEAM_ID;
8292 	BReference<Team> teamReference(team, true);
8293 
8294 	// now that we have a team reference, its I/O context won't go away
8295 	io_context* context = team->io_context;
8296 	MutexLocker contextLocker(context->io_mutex);
8297 
8298 	uint32 slot = *_cookie;
8299 
8300 	struct file_descriptor* descriptor;
8301 	while (slot < context->table_size
8302 		&& (descriptor = context->fds[slot]) == NULL) {
8303 		slot++;
8304 	}
8305 
8306 	if (slot >= context->table_size)
8307 		return B_ENTRY_NOT_FOUND;
8308 
8309 	info->number = slot;
8310 	info->open_mode = descriptor->open_mode;
8311 
8312 	struct vnode* vnode = fd_vnode(descriptor);
8313 	if (vnode != NULL) {
8314 		info->device = vnode->device;
8315 		info->node = vnode->id;
8316 	} else if (descriptor->u.mount != NULL) {
8317 		info->device = descriptor->u.mount->id;
8318 		info->node = -1;
8319 	}
8320 
8321 	*_cookie = slot + 1;
8322 	return B_OK;
8323 }
8324 
8325 
8326 int
8327 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8328 	int perms)
8329 {
8330 	if ((openMode & O_CREAT) != 0) {
8331 		return file_create_entry_ref(device, inode, name, openMode, perms,
8332 			true);
8333 	}
8334 
8335 	return file_open_entry_ref(device, inode, name, openMode, true);
8336 }
8337 
8338 
8339 /*!	\brief Opens a node specified by a FD + path pair.
8340 
8341 	At least one of \a fd and \a path must be specified.
8342 	If only \a fd is given, the function opens the node identified by this
8343 	FD. If only a path is given, this path is opened. If both are given and
8344 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8345 	of the directory (!) identified by \a fd.
8346 
8347 	\param fd The FD. May be < 0.
8348 	\param path The absolute or relative path. May be \c NULL.
8349 	\param openMode The open mode.
8350 	\return A FD referring to the newly opened node, or an error code,
8351 			if an error occurs.
8352 */
8353 int
8354 _kern_open(int fd, const char* path, int openMode, int perms)
8355 {
8356 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8357 	if (pathBuffer.InitCheck() != B_OK)
8358 		return B_NO_MEMORY;
8359 
8360 	if ((openMode & O_CREAT) != 0)
8361 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8362 
8363 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8364 }
8365 
8366 
8367 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8368 
8369 	The supplied name may be \c NULL, in which case directory identified
8370 	by \a device and \a inode will be opened. Otherwise \a device and
8371 	\a inode identify the parent directory of the directory to be opened
8372 	and \a name its entry name.
8373 
8374 	\param device If \a name is specified the ID of the device the parent
8375 		   directory of the directory to be opened resides on, otherwise
8376 		   the device of the directory itself.
8377 	\param inode If \a name is specified the node ID of the parent
8378 		   directory of the directory to be opened, otherwise node ID of the
8379 		   directory itself.
8380 	\param name The entry name of the directory to be opened. If \c NULL,
8381 		   the \a device + \a inode pair identify the node to be opened.
8382 	\return The FD of the newly opened directory or an error code, if
8383 			something went wrong.
8384 */
8385 int
8386 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8387 {
8388 	return dir_open_entry_ref(device, inode, name, true);
8389 }
8390 
8391 
8392 /*!	\brief Opens a directory specified by a FD + path pair.
8393 
8394 	At least one of \a fd and \a path must be specified.
8395 	If only \a fd is given, the function opens the directory identified by this
8396 	FD. If only a path is given, this path is opened. If both are given and
8397 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8398 	of the directory (!) identified by \a fd.
8399 
8400 	\param fd The FD. May be < 0.
8401 	\param path The absolute or relative path. May be \c NULL.
8402 	\return A FD referring to the newly opened directory, or an error code,
8403 			if an error occurs.
8404 */
8405 int
8406 _kern_open_dir(int fd, const char* path)
8407 {
8408 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8409 	if (pathBuffer.InitCheck() != B_OK)
8410 		return B_NO_MEMORY;
8411 
8412 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8413 }
8414 
8415 
8416 status_t
8417 _kern_fcntl(int fd, int op, size_t argument)
8418 {
8419 	return common_fcntl(fd, op, argument, true);
8420 }
8421 
8422 
8423 status_t
8424 _kern_fsync(int fd)
8425 {
8426 	return common_sync(fd, true);
8427 }
8428 
8429 
8430 status_t
8431 _kern_lock_node(int fd)
8432 {
8433 	return common_lock_node(fd, true);
8434 }
8435 
8436 
8437 status_t
8438 _kern_unlock_node(int fd)
8439 {
8440 	return common_unlock_node(fd, true);
8441 }
8442 
8443 
8444 status_t
8445 _kern_preallocate(int fd, off_t offset, off_t length)
8446 {
8447 	return common_preallocate(fd, offset, length, true);
8448 }
8449 
8450 
8451 status_t
8452 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8453 	int perms)
8454 {
8455 	return dir_create_entry_ref(device, inode, name, perms, true);
8456 }
8457 
8458 
8459 /*!	\brief Creates a directory specified by a FD + path pair.
8460 
8461 	\a path must always be specified (it contains the name of the new directory
8462 	at least). If only a path is given, this path identifies the location at
8463 	which the directory shall be created. If both \a fd and \a path are given
8464 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8465 	of the directory (!) identified by \a fd.
8466 
8467 	\param fd The FD. May be < 0.
8468 	\param path The absolute or relative path. Must not be \c NULL.
8469 	\param perms The access permissions the new directory shall have.
8470 	\return \c B_OK, if the directory has been created successfully, another
8471 			error code otherwise.
8472 */
8473 status_t
8474 _kern_create_dir(int fd, const char* path, int perms)
8475 {
8476 	KPath pathBuffer(path, KPath::DEFAULT);
8477 	if (pathBuffer.InitCheck() != B_OK)
8478 		return B_NO_MEMORY;
8479 
8480 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8481 }
8482 
8483 
8484 status_t
8485 _kern_remove_dir(int fd, const char* path)
8486 {
8487 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8488 	if (pathBuffer.InitCheck() != B_OK)
8489 		return B_NO_MEMORY;
8490 
8491 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8492 }
8493 
8494 
8495 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8496 
8497 	At least one of \a fd and \a path must be specified.
8498 	If only \a fd is given, the function the symlink to be read is the node
8499 	identified by this FD. If only a path is given, this path identifies the
8500 	symlink to be read. If both are given and the path is absolute, \a fd is
8501 	ignored; a relative path is reckoned off of the directory (!) identified
8502 	by \a fd.
8503 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8504 	will still be updated to reflect the required buffer size.
8505 
8506 	\param fd The FD. May be < 0.
8507 	\param path The absolute or relative path. May be \c NULL.
8508 	\param buffer The buffer into which the contents of the symlink shall be
8509 		   written.
8510 	\param _bufferSize A pointer to the size of the supplied buffer.
8511 	\return The length of the link on success or an appropriate error code
8512 */
8513 status_t
8514 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8515 {
8516 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8517 	if (pathBuffer.InitCheck() != B_OK)
8518 		return B_NO_MEMORY;
8519 
8520 	return common_read_link(fd, pathBuffer.LockBuffer(),
8521 		buffer, _bufferSize, true);
8522 }
8523 
8524 
8525 /*!	\brief Creates a symlink specified by a FD + path pair.
8526 
8527 	\a path must always be specified (it contains the name of the new symlink
8528 	at least). If only a path is given, this path identifies the location at
8529 	which the symlink shall be created. If both \a fd and \a path are given and
8530 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8531 	of the directory (!) identified by \a fd.
8532 
8533 	\param fd The FD. May be < 0.
8534 	\param toPath The absolute or relative path. Must not be \c NULL.
8535 	\param mode The access permissions the new symlink shall have.
8536 	\return \c B_OK, if the symlink has been created successfully, another
8537 			error code otherwise.
8538 */
8539 status_t
8540 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8541 {
8542 	KPath pathBuffer(path);
8543 	if (pathBuffer.InitCheck() != B_OK)
8544 		return B_NO_MEMORY;
8545 
8546 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8547 		toPath, mode, true);
8548 }
8549 
8550 
8551 status_t
8552 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8553 	bool traverseLeafLink)
8554 {
8555 	KPath pathBuffer(path);
8556 	KPath toPathBuffer(toPath);
8557 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8558 		return B_NO_MEMORY;
8559 
8560 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8561 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8562 }
8563 
8564 
8565 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8566 
8567 	\a path must always be specified (it contains at least the name of the entry
8568 	to be deleted). If only a path is given, this path identifies the entry
8569 	directly. If both \a fd and \a path are given and the path is absolute,
8570 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8571 	identified by \a fd.
8572 
8573 	\param fd The FD. May be < 0.
8574 	\param path The absolute or relative path. Must not be \c NULL.
8575 	\return \c B_OK, if the entry has been removed successfully, another
8576 			error code otherwise.
8577 */
8578 status_t
8579 _kern_unlink(int fd, const char* path)
8580 {
8581 	KPath pathBuffer(path);
8582 	if (pathBuffer.InitCheck() != B_OK)
8583 		return B_NO_MEMORY;
8584 
8585 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8586 }
8587 
8588 
8589 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8590 		   by another FD + path pair.
8591 
8592 	\a oldPath and \a newPath must always be specified (they contain at least
8593 	the name of the entry). If only a path is given, this path identifies the
8594 	entry directly. If both a FD and a path are given and the path is absolute,
8595 	the FD is ignored; a relative path is reckoned off of the directory (!)
8596 	identified by the respective FD.
8597 
8598 	\param oldFD The FD of the old location. May be < 0.
8599 	\param oldPath The absolute or relative path of the old location. Must not
8600 		   be \c NULL.
8601 	\param newFD The FD of the new location. May be < 0.
8602 	\param newPath The absolute or relative path of the new location. Must not
8603 		   be \c NULL.
8604 	\return \c B_OK, if the entry has been moved successfully, another
8605 			error code otherwise.
8606 */
8607 status_t
8608 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8609 {
8610 	KPath oldPathBuffer(oldPath);
8611 	KPath newPathBuffer(newPath);
8612 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8613 		return B_NO_MEMORY;
8614 
8615 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8616 		newFD, newPathBuffer.LockBuffer(), true);
8617 }
8618 
8619 
8620 status_t
8621 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8622 {
8623 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8624 	if (pathBuffer.InitCheck() != B_OK)
8625 		return B_NO_MEMORY;
8626 
8627 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8628 		true);
8629 }
8630 
8631 
8632 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8633 
8634 	If only \a fd is given, the stat operation associated with the type
8635 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8636 	given, this path identifies the entry for whose node to retrieve the
8637 	stat data. If both \a fd and \a path are given and the path is absolute,
8638 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8639 	identified by \a fd and specifies the entry whose stat data shall be
8640 	retrieved.
8641 
8642 	\param fd The FD. May be < 0.
8643 	\param path The absolute or relative path. Must not be \c NULL.
8644 	\param traverseLeafLink If \a path is given, \c true specifies that the
8645 		   function shall not stick to symlinks, but traverse them.
8646 	\param stat The buffer the stat data shall be written into.
8647 	\param statSize The size of the supplied stat buffer.
8648 	\return \c B_OK, if the the stat data have been read successfully, another
8649 			error code otherwise.
8650 */
8651 status_t
8652 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8653 	struct stat* stat, size_t statSize)
8654 {
8655 	struct stat completeStat;
8656 	struct stat* originalStat = NULL;
8657 	status_t status;
8658 
8659 	if (statSize > sizeof(struct stat))
8660 		return B_BAD_VALUE;
8661 
8662 	// this supports different stat extensions
8663 	if (statSize < sizeof(struct stat)) {
8664 		originalStat = stat;
8665 		stat = &completeStat;
8666 	}
8667 
8668 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8669 
8670 	if (status == B_OK && originalStat != NULL)
8671 		memcpy(originalStat, stat, statSize);
8672 
8673 	return status;
8674 }
8675 
8676 
8677 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8678 
8679 	If only \a fd is given, the stat operation associated with the type
8680 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8681 	given, this path identifies the entry for whose node to write the
8682 	stat data. If both \a fd and \a path are given and the path is absolute,
8683 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8684 	identified by \a fd and specifies the entry whose stat data shall be
8685 	written.
8686 
8687 	\param fd The FD. May be < 0.
8688 	\param path The absolute or relative path. May be \c NULL.
8689 	\param traverseLeafLink If \a path is given, \c true specifies that the
8690 		   function shall not stick to symlinks, but traverse them.
8691 	\param stat The buffer containing the stat data to be written.
8692 	\param statSize The size of the supplied stat buffer.
8693 	\param statMask A mask specifying which parts of the stat data shall be
8694 		   written.
8695 	\return \c B_OK, if the the stat data have been written successfully,
8696 			another error code otherwise.
8697 */
8698 status_t
8699 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8700 	const struct stat* stat, size_t statSize, int statMask)
8701 {
8702 	struct stat completeStat;
8703 
8704 	if (statSize > sizeof(struct stat))
8705 		return B_BAD_VALUE;
8706 
8707 	// this supports different stat extensions
8708 	if (statSize < sizeof(struct stat)) {
8709 		memset((uint8*)&completeStat + statSize, 0,
8710 			sizeof(struct stat) - statSize);
8711 		memcpy(&completeStat, stat, statSize);
8712 		stat = &completeStat;
8713 	}
8714 
8715 	status_t status;
8716 
8717 	if (path != NULL) {
8718 		// path given: write the stat of the node referred to by (fd, path)
8719 		KPath pathBuffer(path);
8720 		if (pathBuffer.InitCheck() != B_OK)
8721 			return B_NO_MEMORY;
8722 
8723 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8724 			traverseLeafLink, stat, statMask, true);
8725 	} else {
8726 		// no path given: get the FD and use the FD operation
8727 		FileDescriptorPutter descriptor
8728 			(get_fd(get_current_io_context(true), fd));
8729 		if (!descriptor.IsSet())
8730 			return B_FILE_ERROR;
8731 
8732 		if (descriptor->ops->fd_write_stat)
8733 			status = descriptor->ops->fd_write_stat(descriptor.Get(), stat, statMask);
8734 		else
8735 			status = B_UNSUPPORTED;
8736 	}
8737 
8738 	return status;
8739 }
8740 
8741 
8742 int
8743 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8744 {
8745 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8746 	if (pathBuffer.InitCheck() != B_OK)
8747 		return B_NO_MEMORY;
8748 
8749 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8750 }
8751 
8752 
8753 int
8754 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8755 	int openMode)
8756 {
8757 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8758 	if (pathBuffer.InitCheck() != B_OK)
8759 		return B_NO_MEMORY;
8760 
8761 	if ((openMode & O_CREAT) != 0) {
8762 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8763 			true);
8764 	}
8765 
8766 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8767 }
8768 
8769 
8770 status_t
8771 _kern_remove_attr(int fd, const char* name)
8772 {
8773 	return attr_remove(fd, name, true);
8774 }
8775 
8776 
8777 status_t
8778 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8779 	const char* toName)
8780 {
8781 	return attr_rename(fromFile, fromName, toFile, toName, true);
8782 }
8783 
8784 
8785 int
8786 _kern_open_index_dir(dev_t device)
8787 {
8788 	return index_dir_open(device, true);
8789 }
8790 
8791 
8792 status_t
8793 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8794 {
8795 	return index_create(device, name, type, flags, true);
8796 }
8797 
8798 
8799 status_t
8800 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8801 {
8802 	return index_name_read_stat(device, name, stat, true);
8803 }
8804 
8805 
8806 status_t
8807 _kern_remove_index(dev_t device, const char* name)
8808 {
8809 	return index_remove(device, name, true);
8810 }
8811 
8812 
8813 status_t
8814 _kern_getcwd(char* buffer, size_t size)
8815 {
8816 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8817 
8818 	// Call vfs to get current working directory
8819 	return get_cwd(buffer, size, true);
8820 }
8821 
8822 
8823 status_t
8824 _kern_setcwd(int fd, const char* path)
8825 {
8826 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8827 	if (pathBuffer.InitCheck() != B_OK)
8828 		return B_NO_MEMORY;
8829 
8830 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8831 }
8832 
8833 
8834 //	#pragma mark - userland syscalls
8835 
8836 
8837 dev_t
8838 _user_mount(const char* userPath, const char* userDevice,
8839 	const char* userFileSystem, uint32 flags, const char* userArgs,
8840 	size_t argsLength)
8841 {
8842 	char fileSystem[B_FILE_NAME_LENGTH];
8843 	KPath path, device;
8844 	char* args = NULL;
8845 	status_t status;
8846 
8847 	if (!IS_USER_ADDRESS(userPath))
8848 		return B_BAD_ADDRESS;
8849 
8850 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8851 		return B_NO_MEMORY;
8852 
8853 	status = user_copy_name(path.LockBuffer(), userPath,
8854 		B_PATH_NAME_LENGTH);
8855 	if (status != B_OK)
8856 		return status;
8857 	path.UnlockBuffer();
8858 
8859 	if (userFileSystem != NULL) {
8860 		if (!IS_USER_ADDRESS(userFileSystem))
8861 			return B_BAD_ADDRESS;
8862 
8863 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8864 		if (status != B_OK)
8865 			return status;
8866 	}
8867 
8868 	if (userDevice != NULL) {
8869 		if (!IS_USER_ADDRESS(userDevice))
8870 			return B_BAD_ADDRESS;
8871 
8872 		status = user_copy_name(device.LockBuffer(), userDevice,
8873 			B_PATH_NAME_LENGTH);
8874 		if (status != B_OK)
8875 			return status;
8876 		device.UnlockBuffer();
8877 	}
8878 
8879 	if (userArgs != NULL && argsLength > 0) {
8880 		if (!IS_USER_ADDRESS(userArgs))
8881 			return B_BAD_ADDRESS;
8882 
8883 		// this is a safety restriction
8884 		if (argsLength >= 65536)
8885 			return B_NAME_TOO_LONG;
8886 
8887 		args = (char*)malloc(argsLength + 1);
8888 		if (args == NULL)
8889 			return B_NO_MEMORY;
8890 
8891 		status = user_copy_name(args, userArgs, argsLength + 1);
8892 		if (status != B_OK) {
8893 			free(args);
8894 			return status;
8895 		}
8896 	}
8897 
8898 	status = fs_mount(path.LockBuffer(),
8899 		userDevice != NULL ? device.Path() : NULL,
8900 		userFileSystem ? fileSystem : NULL, flags, args, false);
8901 
8902 	free(args);
8903 	return status;
8904 }
8905 
8906 
8907 status_t
8908 _user_unmount(const char* userPath, uint32 flags)
8909 {
8910 	if (!IS_USER_ADDRESS(userPath))
8911 		return B_BAD_ADDRESS;
8912 
8913 	KPath pathBuffer;
8914 	if (pathBuffer.InitCheck() != B_OK)
8915 		return B_NO_MEMORY;
8916 
8917 	char* path = pathBuffer.LockBuffer();
8918 
8919 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8920 	if (status != B_OK)
8921 		return status;
8922 
8923 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8924 }
8925 
8926 
8927 status_t
8928 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8929 {
8930 	struct fs_info info;
8931 	status_t status;
8932 
8933 	if (userInfo == NULL)
8934 		return B_BAD_VALUE;
8935 
8936 	if (!IS_USER_ADDRESS(userInfo))
8937 		return B_BAD_ADDRESS;
8938 
8939 	status = fs_read_info(device, &info);
8940 	if (status != B_OK)
8941 		return status;
8942 
8943 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8944 		return B_BAD_ADDRESS;
8945 
8946 	return B_OK;
8947 }
8948 
8949 
8950 status_t
8951 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8952 {
8953 	struct fs_info info;
8954 
8955 	if (userInfo == NULL)
8956 		return B_BAD_VALUE;
8957 
8958 	if (!IS_USER_ADDRESS(userInfo)
8959 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8960 		return B_BAD_ADDRESS;
8961 
8962 	return fs_write_info(device, &info, mask);
8963 }
8964 
8965 
8966 dev_t
8967 _user_next_device(int32* _userCookie)
8968 {
8969 	int32 cookie;
8970 	dev_t device;
8971 
8972 	if (!IS_USER_ADDRESS(_userCookie)
8973 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8974 		return B_BAD_ADDRESS;
8975 
8976 	device = fs_next_device(&cookie);
8977 
8978 	if (device >= B_OK) {
8979 		// update user cookie
8980 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8981 			return B_BAD_ADDRESS;
8982 	}
8983 
8984 	return device;
8985 }
8986 
8987 
8988 status_t
8989 _user_sync(void)
8990 {
8991 	return _kern_sync();
8992 }
8993 
8994 
8995 status_t
8996 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8997 	size_t infoSize)
8998 {
8999 	struct fd_info info;
9000 	uint32 cookie;
9001 
9002 	// only root can do this
9003 	if (geteuid() != 0)
9004 		return B_NOT_ALLOWED;
9005 
9006 	if (infoSize != sizeof(fd_info))
9007 		return B_BAD_VALUE;
9008 
9009 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9010 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9011 		return B_BAD_ADDRESS;
9012 
9013 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9014 	if (status != B_OK)
9015 		return status;
9016 
9017 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9018 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9019 		return B_BAD_ADDRESS;
9020 
9021 	return status;
9022 }
9023 
9024 
9025 status_t
9026 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9027 	char* userPath, size_t pathLength)
9028 {
9029 	if (!IS_USER_ADDRESS(userPath))
9030 		return B_BAD_ADDRESS;
9031 
9032 	KPath path;
9033 	if (path.InitCheck() != B_OK)
9034 		return B_NO_MEMORY;
9035 
9036 	// copy the leaf name onto the stack
9037 	char stackLeaf[B_FILE_NAME_LENGTH];
9038 	if (leaf != NULL) {
9039 		if (!IS_USER_ADDRESS(leaf))
9040 			return B_BAD_ADDRESS;
9041 
9042 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9043 		if (status != B_OK)
9044 			return status;
9045 
9046 		leaf = stackLeaf;
9047 	}
9048 
9049 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9050 		false, path.LockBuffer(), path.BufferSize());
9051 	if (status != B_OK)
9052 		return status;
9053 
9054 	path.UnlockBuffer();
9055 
9056 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9057 	if (length < 0)
9058 		return length;
9059 	if (length >= (int)pathLength)
9060 		return B_BUFFER_OVERFLOW;
9061 
9062 	return B_OK;
9063 }
9064 
9065 
9066 status_t
9067 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9068 {
9069 	if (userPath == NULL || buffer == NULL)
9070 		return B_BAD_VALUE;
9071 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9072 		return B_BAD_ADDRESS;
9073 
9074 	// copy path from userland
9075 	KPath pathBuffer;
9076 	if (pathBuffer.InitCheck() != B_OK)
9077 		return B_NO_MEMORY;
9078 	char* path = pathBuffer.LockBuffer();
9079 
9080 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9081 	if (status != B_OK)
9082 		return status;
9083 
9084 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9085 		false);
9086 	if (error != B_OK)
9087 		return error;
9088 
9089 	// copy back to userland
9090 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9091 	if (len < 0)
9092 		return len;
9093 	if (len >= B_PATH_NAME_LENGTH)
9094 		return B_BUFFER_OVERFLOW;
9095 
9096 	return B_OK;
9097 }
9098 
9099 
9100 int
9101 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9102 	int openMode, int perms)
9103 {
9104 	char name[B_FILE_NAME_LENGTH];
9105 
9106 	if (userName == NULL || device < 0 || inode < 0)
9107 		return B_BAD_VALUE;
9108 	if (!IS_USER_ADDRESS(userName))
9109 		return B_BAD_ADDRESS;
9110 	status_t status = user_copy_name(name, userName, sizeof(name));
9111 	if (status != B_OK)
9112 		return status;
9113 
9114 	if ((openMode & O_CREAT) != 0) {
9115 		return file_create_entry_ref(device, inode, name, openMode, perms,
9116 			false);
9117 	}
9118 
9119 	return file_open_entry_ref(device, inode, name, openMode, false);
9120 }
9121 
9122 
9123 int
9124 _user_open(int fd, const char* userPath, int openMode, int perms)
9125 {
9126 	KPath path;
9127 	if (path.InitCheck() != B_OK)
9128 		return B_NO_MEMORY;
9129 
9130 	char* buffer = path.LockBuffer();
9131 
9132 	if (!IS_USER_ADDRESS(userPath))
9133 		return B_BAD_ADDRESS;
9134 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9135 	if (status != B_OK)
9136 		return status;
9137 
9138 	if ((openMode & O_CREAT) != 0)
9139 		return file_create(fd, buffer, openMode, perms, false);
9140 
9141 	return file_open(fd, buffer, openMode, false);
9142 }
9143 
9144 
9145 int
9146 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9147 {
9148 	if (userName != NULL) {
9149 		char name[B_FILE_NAME_LENGTH];
9150 
9151 		if (!IS_USER_ADDRESS(userName))
9152 			return B_BAD_ADDRESS;
9153 		status_t status = user_copy_name(name, userName, sizeof(name));
9154 		if (status != B_OK)
9155 			return status;
9156 
9157 		return dir_open_entry_ref(device, inode, name, false);
9158 	}
9159 	return dir_open_entry_ref(device, inode, NULL, false);
9160 }
9161 
9162 
9163 int
9164 _user_open_dir(int fd, const char* userPath)
9165 {
9166 	if (userPath == NULL)
9167 		return dir_open(fd, NULL, false);
9168 
9169 	KPath path;
9170 	if (path.InitCheck() != B_OK)
9171 		return B_NO_MEMORY;
9172 
9173 	char* buffer = path.LockBuffer();
9174 
9175 	if (!IS_USER_ADDRESS(userPath))
9176 		return B_BAD_ADDRESS;
9177 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9178 	if (status != B_OK)
9179 		return status;
9180 
9181 	return dir_open(fd, buffer, false);
9182 }
9183 
9184 
9185 /*!	\brief Opens a directory's parent directory and returns the entry name
9186 		   of the former.
9187 
9188 	Aside from that it returns the directory's entry name, this method is
9189 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9190 	equivalent, if \a userName is \c NULL.
9191 
9192 	If a name buffer is supplied and the name does not fit the buffer, the
9193 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9194 
9195 	\param fd A FD referring to a directory.
9196 	\param userName Buffer the directory's entry name shall be written into.
9197 		   May be \c NULL.
9198 	\param nameLength Size of the name buffer.
9199 	\return The file descriptor of the opened parent directory, if everything
9200 			went fine, an error code otherwise.
9201 */
9202 int
9203 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9204 {
9205 	bool kernel = false;
9206 
9207 	if (userName && !IS_USER_ADDRESS(userName))
9208 		return B_BAD_ADDRESS;
9209 
9210 	// open the parent dir
9211 	int parentFD = dir_open(fd, (char*)"..", kernel);
9212 	if (parentFD < 0)
9213 		return parentFD;
9214 	FDCloser fdCloser(parentFD, kernel);
9215 
9216 	if (userName) {
9217 		// get the vnodes
9218 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9219 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9220 		VnodePutter parentVNodePutter(parentVNode);
9221 		VnodePutter dirVNodePutter(dirVNode);
9222 		if (!parentVNode || !dirVNode)
9223 			return B_FILE_ERROR;
9224 
9225 		// get the vnode name
9226 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9227 		struct dirent* buffer = (struct dirent*)_buffer;
9228 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9229 			sizeof(_buffer), get_current_io_context(false));
9230 		if (status != B_OK)
9231 			return status;
9232 
9233 		// copy the name to the userland buffer
9234 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9235 		if (len < 0)
9236 			return len;
9237 		if (len >= (int)nameLength)
9238 			return B_BUFFER_OVERFLOW;
9239 	}
9240 
9241 	return fdCloser.Detach();
9242 }
9243 
9244 
9245 status_t
9246 _user_fcntl(int fd, int op, size_t argument)
9247 {
9248 	status_t status = common_fcntl(fd, op, argument, false);
9249 	if (op == F_SETLKW)
9250 		syscall_restart_handle_post(status);
9251 
9252 	return status;
9253 }
9254 
9255 
9256 status_t
9257 _user_fsync(int fd)
9258 {
9259 	return common_sync(fd, false);
9260 }
9261 
9262 
9263 status_t
9264 _user_flock(int fd, int operation)
9265 {
9266 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9267 
9268 	// Check if the operation is valid
9269 	switch (operation & ~LOCK_NB) {
9270 		case LOCK_UN:
9271 		case LOCK_SH:
9272 		case LOCK_EX:
9273 			break;
9274 
9275 		default:
9276 			return B_BAD_VALUE;
9277 	}
9278 
9279 	struct vnode* vnode;
9280 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, false));
9281 	if (!descriptor.IsSet())
9282 		return B_FILE_ERROR;
9283 
9284 	if (descriptor->ops != &sFileOps)
9285 		return B_BAD_VALUE;
9286 
9287 	struct flock flock;
9288 	flock.l_start = 0;
9289 	flock.l_len = OFF_MAX;
9290 	flock.l_whence = 0;
9291 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9292 
9293 	status_t status;
9294 	if ((operation & LOCK_UN) != 0) {
9295 		if (HAS_FS_CALL(vnode, release_lock))
9296 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9297 		else
9298 			status = release_advisory_lock(vnode, NULL, descriptor.Get(), &flock);
9299 	} else {
9300 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9301 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9302 				(operation & LOCK_NB) == 0);
9303 		} else {
9304 			status = acquire_advisory_lock(vnode, NULL, descriptor.Get(), &flock,
9305 				(operation & LOCK_NB) == 0);
9306 		}
9307 	}
9308 
9309 	syscall_restart_handle_post(status);
9310 
9311 	return status;
9312 }
9313 
9314 
9315 status_t
9316 _user_lock_node(int fd)
9317 {
9318 	return common_lock_node(fd, false);
9319 }
9320 
9321 
9322 status_t
9323 _user_unlock_node(int fd)
9324 {
9325 	return common_unlock_node(fd, false);
9326 }
9327 
9328 
9329 status_t
9330 _user_preallocate(int fd, off_t offset, off_t length)
9331 {
9332 	return common_preallocate(fd, offset, length, false);
9333 }
9334 
9335 
9336 status_t
9337 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9338 	int perms)
9339 {
9340 	char name[B_FILE_NAME_LENGTH];
9341 	status_t status;
9342 
9343 	if (!IS_USER_ADDRESS(userName))
9344 		return B_BAD_ADDRESS;
9345 
9346 	status = user_copy_name(name, userName, sizeof(name));
9347 	if (status != B_OK)
9348 		return status;
9349 
9350 	return dir_create_entry_ref(device, inode, name, perms, false);
9351 }
9352 
9353 
9354 status_t
9355 _user_create_dir(int fd, const char* userPath, int perms)
9356 {
9357 	KPath pathBuffer;
9358 	if (pathBuffer.InitCheck() != B_OK)
9359 		return B_NO_MEMORY;
9360 
9361 	char* path = pathBuffer.LockBuffer();
9362 
9363 	if (!IS_USER_ADDRESS(userPath))
9364 		return B_BAD_ADDRESS;
9365 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9366 	if (status != B_OK)
9367 		return status;
9368 
9369 	return dir_create(fd, path, perms, false);
9370 }
9371 
9372 
9373 status_t
9374 _user_remove_dir(int fd, const char* userPath)
9375 {
9376 	KPath pathBuffer;
9377 	if (pathBuffer.InitCheck() != B_OK)
9378 		return B_NO_MEMORY;
9379 
9380 	char* path = pathBuffer.LockBuffer();
9381 
9382 	if (userPath != NULL) {
9383 		if (!IS_USER_ADDRESS(userPath))
9384 			return B_BAD_ADDRESS;
9385 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9386 		if (status != B_OK)
9387 			return status;
9388 	}
9389 
9390 	return dir_remove(fd, userPath ? path : NULL, false);
9391 }
9392 
9393 
9394 status_t
9395 _user_read_link(int fd, const char* userPath, char* userBuffer,
9396 	size_t* userBufferSize)
9397 {
9398 	KPath pathBuffer, linkBuffer;
9399 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9400 		return B_NO_MEMORY;
9401 
9402 	size_t bufferSize;
9403 
9404 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9405 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9406 		return B_BAD_ADDRESS;
9407 
9408 	char* path = pathBuffer.LockBuffer();
9409 	char* buffer = linkBuffer.LockBuffer();
9410 
9411 	if (userPath) {
9412 		if (!IS_USER_ADDRESS(userPath))
9413 			return B_BAD_ADDRESS;
9414 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9415 		if (status != B_OK)
9416 			return status;
9417 
9418 		if (bufferSize > B_PATH_NAME_LENGTH)
9419 			bufferSize = B_PATH_NAME_LENGTH;
9420 	}
9421 
9422 	size_t newBufferSize = bufferSize;
9423 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9424 		&newBufferSize, false);
9425 
9426 	// we also update the bufferSize in case of errors
9427 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9428 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9429 		return B_BAD_ADDRESS;
9430 
9431 	if (status != B_OK)
9432 		return status;
9433 
9434 	bufferSize = min_c(newBufferSize, bufferSize);
9435 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9436 		return B_BAD_ADDRESS;
9437 
9438 	return B_OK;
9439 }
9440 
9441 
9442 status_t
9443 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9444 	int mode)
9445 {
9446 	KPath pathBuffer;
9447 	KPath toPathBuffer;
9448 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9449 		return B_NO_MEMORY;
9450 
9451 	char* path = pathBuffer.LockBuffer();
9452 	char* toPath = toPathBuffer.LockBuffer();
9453 
9454 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9455 		return B_BAD_ADDRESS;
9456 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9457 	if (status != B_OK)
9458 		return status;
9459 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9460 	if (status != B_OK)
9461 		return status;
9462 
9463 	return common_create_symlink(fd, path, toPath, mode, false);
9464 }
9465 
9466 
9467 status_t
9468 _user_create_link(int pathFD, const char* userPath, int toFD,
9469 	const char* userToPath, bool traverseLeafLink)
9470 {
9471 	KPath pathBuffer;
9472 	KPath toPathBuffer;
9473 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9474 		return B_NO_MEMORY;
9475 
9476 	char* path = pathBuffer.LockBuffer();
9477 	char* toPath = toPathBuffer.LockBuffer();
9478 
9479 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9480 		return B_BAD_ADDRESS;
9481 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9482 	if (status != B_OK)
9483 		return status;
9484 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9485 	if (status != B_OK)
9486 		return status;
9487 
9488 	status = check_path(toPath);
9489 	if (status != B_OK)
9490 		return status;
9491 
9492 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9493 		false);
9494 }
9495 
9496 
9497 status_t
9498 _user_unlink(int fd, const char* userPath)
9499 {
9500 	KPath pathBuffer;
9501 	if (pathBuffer.InitCheck() != B_OK)
9502 		return B_NO_MEMORY;
9503 
9504 	char* path = pathBuffer.LockBuffer();
9505 
9506 	if (!IS_USER_ADDRESS(userPath))
9507 		return B_BAD_ADDRESS;
9508 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9509 	if (status != B_OK)
9510 		return status;
9511 
9512 	return common_unlink(fd, path, false);
9513 }
9514 
9515 
9516 status_t
9517 _user_rename(int oldFD, const char* userOldPath, int newFD,
9518 	const char* userNewPath)
9519 {
9520 	KPath oldPathBuffer;
9521 	KPath newPathBuffer;
9522 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9523 		return B_NO_MEMORY;
9524 
9525 	char* oldPath = oldPathBuffer.LockBuffer();
9526 	char* newPath = newPathBuffer.LockBuffer();
9527 
9528 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9529 		return B_BAD_ADDRESS;
9530 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9531 	if (status != B_OK)
9532 		return status;
9533 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9534 	if (status != B_OK)
9535 		return status;
9536 
9537 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9538 }
9539 
9540 
9541 status_t
9542 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9543 {
9544 	KPath pathBuffer;
9545 	if (pathBuffer.InitCheck() != B_OK)
9546 		return B_NO_MEMORY;
9547 
9548 	char* path = pathBuffer.LockBuffer();
9549 
9550 	if (!IS_USER_ADDRESS(userPath))
9551 		return B_BAD_ADDRESS;
9552 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9553 	if (status != B_OK)
9554 		return status;
9555 
9556 	// split into directory vnode and filename path
9557 	char filename[B_FILE_NAME_LENGTH];
9558 	VnodePutter dir;
9559 	status = fd_and_path_to_dir_vnode(fd, path, dir, filename, false);
9560 	if (status != B_OK)
9561 		return status;
9562 
9563 	// the underlying FS needs to support creating FIFOs
9564 	if (!HAS_FS_CALL(dir, create_special_node))
9565 		return B_UNSUPPORTED;
9566 
9567 	// create the entry	-- the FIFO sub node is set up automatically
9568 	fs_vnode superVnode;
9569 	ino_t nodeID;
9570 	status = FS_CALL(dir.Get(), create_special_node, filename, NULL,
9571 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9572 
9573 	// create_special_node() acquired a reference for us that we don't need.
9574 	if (status == B_OK)
9575 		put_vnode(dir->mount->volume, nodeID);
9576 
9577 	return status;
9578 }
9579 
9580 
9581 status_t
9582 _user_create_pipe(int* userFDs)
9583 {
9584 	// rootfs should support creating FIFOs, but let's be sure
9585 	if (!HAS_FS_CALL(sRoot, create_special_node))
9586 		return B_UNSUPPORTED;
9587 
9588 	// create the node	-- the FIFO sub node is set up automatically
9589 	fs_vnode superVnode;
9590 	ino_t nodeID;
9591 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9592 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9593 	if (status != B_OK)
9594 		return status;
9595 
9596 	// We've got one reference to the node and need another one.
9597 	struct vnode* vnode;
9598 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9599 	if (status != B_OK) {
9600 		// that should not happen
9601 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9602 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9603 		return status;
9604 	}
9605 
9606 	// Everything looks good so far. Open two FDs for reading respectively
9607 	// writing.
9608 	int fds[2];
9609 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9610 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9611 
9612 	FDCloser closer0(fds[0], false);
9613 	FDCloser closer1(fds[1], false);
9614 
9615 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9616 
9617 	// copy FDs to userland
9618 	if (status == B_OK) {
9619 		if (!IS_USER_ADDRESS(userFDs)
9620 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9621 			status = B_BAD_ADDRESS;
9622 		}
9623 	}
9624 
9625 	// keep FDs, if everything went fine
9626 	if (status == B_OK) {
9627 		closer0.Detach();
9628 		closer1.Detach();
9629 	}
9630 
9631 	return status;
9632 }
9633 
9634 
9635 status_t
9636 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9637 {
9638 	KPath pathBuffer;
9639 	if (pathBuffer.InitCheck() != B_OK)
9640 		return B_NO_MEMORY;
9641 
9642 	char* path = pathBuffer.LockBuffer();
9643 
9644 	if (!IS_USER_ADDRESS(userPath))
9645 		return B_BAD_ADDRESS;
9646 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9647 	if (status != B_OK)
9648 		return status;
9649 
9650 	return common_access(fd, path, mode, effectiveUserGroup, false);
9651 }
9652 
9653 
9654 status_t
9655 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9656 	struct stat* userStat, size_t statSize)
9657 {
9658 	struct stat stat = {0};
9659 	status_t status;
9660 
9661 	if (statSize > sizeof(struct stat))
9662 		return B_BAD_VALUE;
9663 
9664 	if (!IS_USER_ADDRESS(userStat))
9665 		return B_BAD_ADDRESS;
9666 
9667 	if (userPath != NULL) {
9668 		// path given: get the stat of the node referred to by (fd, path)
9669 		if (!IS_USER_ADDRESS(userPath))
9670 			return B_BAD_ADDRESS;
9671 
9672 		KPath pathBuffer;
9673 		if (pathBuffer.InitCheck() != B_OK)
9674 			return B_NO_MEMORY;
9675 
9676 		char* path = pathBuffer.LockBuffer();
9677 
9678 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9679 		if (status != B_OK)
9680 			return status;
9681 
9682 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9683 	} else {
9684 		// no path given: get the FD and use the FD operation
9685 		FileDescriptorPutter descriptor
9686 			(get_fd(get_current_io_context(false), fd));
9687 		if (!descriptor.IsSet())
9688 			return B_FILE_ERROR;
9689 
9690 		if (descriptor->ops->fd_read_stat)
9691 			status = descriptor->ops->fd_read_stat(descriptor.Get(), &stat);
9692 		else
9693 			status = B_UNSUPPORTED;
9694 	}
9695 
9696 	if (status != B_OK)
9697 		return status;
9698 
9699 	return user_memcpy(userStat, &stat, statSize);
9700 }
9701 
9702 
9703 status_t
9704 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9705 	const struct stat* userStat, size_t statSize, int statMask)
9706 {
9707 	if (statSize > sizeof(struct stat))
9708 		return B_BAD_VALUE;
9709 
9710 	struct stat stat;
9711 
9712 	if (!IS_USER_ADDRESS(userStat)
9713 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9714 		return B_BAD_ADDRESS;
9715 
9716 	// clear additional stat fields
9717 	if (statSize < sizeof(struct stat))
9718 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9719 
9720 	status_t status;
9721 
9722 	if (userPath != NULL) {
9723 		// path given: write the stat of the node referred to by (fd, path)
9724 		if (!IS_USER_ADDRESS(userPath))
9725 			return B_BAD_ADDRESS;
9726 
9727 		KPath pathBuffer;
9728 		if (pathBuffer.InitCheck() != B_OK)
9729 			return B_NO_MEMORY;
9730 
9731 		char* path = pathBuffer.LockBuffer();
9732 
9733 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9734 		if (status != B_OK)
9735 			return status;
9736 
9737 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9738 			statMask, false);
9739 	} else {
9740 		// no path given: get the FD and use the FD operation
9741 		FileDescriptorPutter descriptor
9742 			(get_fd(get_current_io_context(false), fd));
9743 		if (!descriptor.IsSet())
9744 			return B_FILE_ERROR;
9745 
9746 		if (descriptor->ops->fd_write_stat) {
9747 			status = descriptor->ops->fd_write_stat(descriptor.Get(), &stat,
9748 				statMask);
9749 		} else
9750 			status = B_UNSUPPORTED;
9751 	}
9752 
9753 	return status;
9754 }
9755 
9756 
9757 int
9758 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9759 {
9760 	KPath pathBuffer;
9761 	if (pathBuffer.InitCheck() != B_OK)
9762 		return B_NO_MEMORY;
9763 
9764 	char* path = pathBuffer.LockBuffer();
9765 
9766 	if (userPath != NULL) {
9767 		if (!IS_USER_ADDRESS(userPath))
9768 			return B_BAD_ADDRESS;
9769 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9770 		if (status != B_OK)
9771 			return status;
9772 	}
9773 
9774 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9775 }
9776 
9777 
9778 ssize_t
9779 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9780 	size_t readBytes)
9781 {
9782 	char attribute[B_FILE_NAME_LENGTH];
9783 
9784 	if (userAttribute == NULL)
9785 		return B_BAD_VALUE;
9786 	if (!IS_USER_ADDRESS(userAttribute))
9787 		return B_BAD_ADDRESS;
9788 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9789 	if (status != B_OK)
9790 		return status;
9791 
9792 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9793 	if (attr < 0)
9794 		return attr;
9795 
9796 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9797 	_user_close(attr);
9798 
9799 	return bytes;
9800 }
9801 
9802 
9803 ssize_t
9804 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9805 	const void* buffer, size_t writeBytes)
9806 {
9807 	char attribute[B_FILE_NAME_LENGTH];
9808 
9809 	if (userAttribute == NULL)
9810 		return B_BAD_VALUE;
9811 	if (!IS_USER_ADDRESS(userAttribute))
9812 		return B_BAD_ADDRESS;
9813 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9814 	if (status != B_OK)
9815 		return status;
9816 
9817 	// Try to support the BeOS typical truncation as well as the position
9818 	// argument
9819 	int attr = attr_create(fd, NULL, attribute, type,
9820 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9821 	if (attr < 0)
9822 		return attr;
9823 
9824 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9825 	_user_close(attr);
9826 
9827 	return bytes;
9828 }
9829 
9830 
9831 status_t
9832 _user_stat_attr(int fd, const char* userAttribute,
9833 	struct attr_info* userAttrInfo)
9834 {
9835 	char attribute[B_FILE_NAME_LENGTH];
9836 
9837 	if (userAttribute == NULL || userAttrInfo == NULL)
9838 		return B_BAD_VALUE;
9839 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9840 		return B_BAD_ADDRESS;
9841 	status_t status = user_copy_name(attribute, userAttribute,
9842 		sizeof(attribute));
9843 	if (status != B_OK)
9844 		return status;
9845 
9846 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9847 	if (attr < 0)
9848 		return attr;
9849 
9850 	struct file_descriptor* descriptor
9851 		= get_fd(get_current_io_context(false), attr);
9852 	if (descriptor == NULL) {
9853 		_user_close(attr);
9854 		return B_FILE_ERROR;
9855 	}
9856 
9857 	struct stat stat;
9858 	if (descriptor->ops->fd_read_stat)
9859 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9860 	else
9861 		status = B_UNSUPPORTED;
9862 
9863 	put_fd(descriptor);
9864 	_user_close(attr);
9865 
9866 	if (status == B_OK) {
9867 		attr_info info;
9868 		info.type = stat.st_type;
9869 		info.size = stat.st_size;
9870 
9871 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9872 			return B_BAD_ADDRESS;
9873 	}
9874 
9875 	return status;
9876 }
9877 
9878 
9879 int
9880 _user_open_attr(int fd, const char* userPath, const char* userName,
9881 	uint32 type, int openMode)
9882 {
9883 	char name[B_FILE_NAME_LENGTH];
9884 
9885 	if (!IS_USER_ADDRESS(userName))
9886 		return B_BAD_ADDRESS;
9887 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9888 	if (status != B_OK)
9889 		return status;
9890 
9891 	KPath pathBuffer;
9892 	if (pathBuffer.InitCheck() != B_OK)
9893 		return B_NO_MEMORY;
9894 
9895 	char* path = pathBuffer.LockBuffer();
9896 
9897 	if (userPath != NULL) {
9898 		if (!IS_USER_ADDRESS(userPath))
9899 			return B_BAD_ADDRESS;
9900 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9901 		if (status != B_OK)
9902 			return status;
9903 	}
9904 
9905 	if ((openMode & O_CREAT) != 0) {
9906 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9907 			false);
9908 	}
9909 
9910 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9911 }
9912 
9913 
9914 status_t
9915 _user_remove_attr(int fd, const char* userName)
9916 {
9917 	char name[B_FILE_NAME_LENGTH];
9918 
9919 	if (!IS_USER_ADDRESS(userName))
9920 		return B_BAD_ADDRESS;
9921 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9922 	if (status != B_OK)
9923 		return status;
9924 
9925 	return attr_remove(fd, name, false);
9926 }
9927 
9928 
9929 status_t
9930 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9931 	const char* userToName)
9932 {
9933 	if (!IS_USER_ADDRESS(userFromName)
9934 		|| !IS_USER_ADDRESS(userToName))
9935 		return B_BAD_ADDRESS;
9936 
9937 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9938 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9939 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9940 		return B_NO_MEMORY;
9941 
9942 	char* fromName = fromNameBuffer.LockBuffer();
9943 	char* toName = toNameBuffer.LockBuffer();
9944 
9945 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9946 	if (status != B_OK)
9947 		return status;
9948 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9949 	if (status != B_OK)
9950 		return status;
9951 
9952 	return attr_rename(fromFile, fromName, toFile, toName, false);
9953 }
9954 
9955 
9956 int
9957 _user_open_index_dir(dev_t device)
9958 {
9959 	return index_dir_open(device, false);
9960 }
9961 
9962 
9963 status_t
9964 _user_create_index(dev_t device, const char* userName, uint32 type,
9965 	uint32 flags)
9966 {
9967 	char name[B_FILE_NAME_LENGTH];
9968 
9969 	if (!IS_USER_ADDRESS(userName))
9970 		return B_BAD_ADDRESS;
9971 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9972 	if (status != B_OK)
9973 		return status;
9974 
9975 	return index_create(device, name, type, flags, false);
9976 }
9977 
9978 
9979 status_t
9980 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9981 {
9982 	char name[B_FILE_NAME_LENGTH];
9983 	struct stat stat = {0};
9984 	status_t status;
9985 
9986 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9987 		return B_BAD_ADDRESS;
9988 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9989 	if (status != B_OK)
9990 		return status;
9991 
9992 	status = index_name_read_stat(device, name, &stat, false);
9993 	if (status == B_OK) {
9994 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9995 			return B_BAD_ADDRESS;
9996 	}
9997 
9998 	return status;
9999 }
10000 
10001 
10002 status_t
10003 _user_remove_index(dev_t device, const char* userName)
10004 {
10005 	char name[B_FILE_NAME_LENGTH];
10006 
10007 	if (!IS_USER_ADDRESS(userName))
10008 		return B_BAD_ADDRESS;
10009 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10010 	if (status != B_OK)
10011 		return status;
10012 
10013 	return index_remove(device, name, false);
10014 }
10015 
10016 
10017 status_t
10018 _user_getcwd(char* userBuffer, size_t size)
10019 {
10020 	if (size == 0)
10021 		return B_BAD_VALUE;
10022 	if (!IS_USER_ADDRESS(userBuffer))
10023 		return B_BAD_ADDRESS;
10024 
10025 	if (size > kMaxPathLength)
10026 		size = kMaxPathLength;
10027 
10028 	KPath pathBuffer(size);
10029 	if (pathBuffer.InitCheck() != B_OK)
10030 		return B_NO_MEMORY;
10031 
10032 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10033 
10034 	char* path = pathBuffer.LockBuffer();
10035 
10036 	status_t status = get_cwd(path, size, false);
10037 	if (status != B_OK)
10038 		return status;
10039 
10040 	// Copy back the result
10041 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10042 		return B_BAD_ADDRESS;
10043 
10044 	return status;
10045 }
10046 
10047 
10048 status_t
10049 _user_setcwd(int fd, const char* userPath)
10050 {
10051 	TRACE(("user_setcwd: path = %p\n", userPath));
10052 
10053 	KPath pathBuffer;
10054 	if (pathBuffer.InitCheck() != B_OK)
10055 		return B_NO_MEMORY;
10056 
10057 	char* path = pathBuffer.LockBuffer();
10058 
10059 	if (userPath != NULL) {
10060 		if (!IS_USER_ADDRESS(userPath))
10061 			return B_BAD_ADDRESS;
10062 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10063 		if (status != B_OK)
10064 			return status;
10065 	}
10066 
10067 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10068 }
10069 
10070 
10071 status_t
10072 _user_change_root(const char* userPath)
10073 {
10074 	// only root is allowed to chroot()
10075 	if (geteuid() != 0)
10076 		return B_NOT_ALLOWED;
10077 
10078 	// alloc path buffer
10079 	KPath pathBuffer;
10080 	if (pathBuffer.InitCheck() != B_OK)
10081 		return B_NO_MEMORY;
10082 
10083 	// copy userland path to kernel
10084 	char* path = pathBuffer.LockBuffer();
10085 	if (userPath != NULL) {
10086 		if (!IS_USER_ADDRESS(userPath))
10087 			return B_BAD_ADDRESS;
10088 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10089 		if (status != B_OK)
10090 			return status;
10091 	}
10092 
10093 	// get the vnode
10094 	VnodePutter vnode;
10095 	status_t status = path_to_vnode(path, true, vnode, NULL, false);
10096 	if (status != B_OK)
10097 		return status;
10098 
10099 	// set the new root
10100 	struct io_context* context = get_current_io_context(false);
10101 	mutex_lock(&sIOContextRootLock);
10102 	struct vnode* oldRoot = context->root;
10103 	context->root = vnode.Detach();
10104 	mutex_unlock(&sIOContextRootLock);
10105 
10106 	put_vnode(oldRoot);
10107 
10108 	return B_OK;
10109 }
10110 
10111 
10112 int
10113 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10114 	uint32 flags, port_id port, int32 token)
10115 {
10116 	if (device < 0 || userQuery == NULL || queryLength == 0)
10117 		return B_BAD_VALUE;
10118 
10119 	if (!IS_USER_ADDRESS(userQuery))
10120 		return B_BAD_ADDRESS;
10121 
10122 	// this is a safety restriction
10123 	if (queryLength >= 65536)
10124 		return B_NAME_TOO_LONG;
10125 
10126 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10127 	if (!query.IsValid())
10128 		return B_NO_MEMORY;
10129 
10130 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10131 		return B_BAD_ADDRESS;
10132 
10133 	return query_open(device, query, flags, port, token, false);
10134 }
10135 
10136 
10137 #include "vfs_request_io.cpp"
10138