xref: /haiku/src/system/kernel/fs/vfs.cpp (revision caed67a8cba83913b9c21ac2b06ebc6bd1cb3111)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <AutoDeleterDrivers.h>
36 #include <block_cache.h>
37 #include <boot/kernel_args.h>
38 #include <debug_heap.h>
39 #include <disk_device_manager/KDiskDevice.h>
40 #include <disk_device_manager/KDiskDeviceManager.h>
41 #include <disk_device_manager/KDiskDeviceUtils.h>
42 #include <disk_device_manager/KDiskSystem.h>
43 #include <fd.h>
44 #include <file_cache.h>
45 #include <fs/node_monitor.h>
46 #include <KPath.h>
47 #include <lock.h>
48 #include <low_resource_manager.h>
49 #include <slab/Slab.h>
50 #include <StackOrHeapArray.h>
51 #include <syscalls.h>
52 #include <syscall_restart.h>
53 #include <tracing.h>
54 #include <util/atomic.h>
55 #include <util/AutoLock.h>
56 #include <util/ThreadAutoLock.h>
57 #include <util/DoublyLinkedList.h>
58 #include <vfs.h>
59 #include <vm/vm.h>
60 #include <vm/VMCache.h>
61 #include <wait_for_objects.h>
62 
63 #include "EntryCache.h"
64 #include "fifo.h"
65 #include "IORequest.h"
66 #include "unused_vnodes.h"
67 #include "vfs_tracing.h"
68 #include "Vnode.h"
69 #include "../cache/vnode_store.h"
70 
71 
72 //#define TRACE_VFS
73 #ifdef TRACE_VFS
74 #	define TRACE(x) dprintf x
75 #	define FUNCTION(x) dprintf x
76 #else
77 #	define TRACE(x) ;
78 #	define FUNCTION(x) ;
79 #endif
80 
81 #define ADD_DEBUGGER_COMMANDS
82 
83 
84 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
85 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
86 
87 #if KDEBUG
88 #	define FS_CALL(vnode, op, params...) \
89 		( HAS_FS_CALL(vnode, op) ? \
90 			vnode->ops->op(vnode->mount->volume, vnode, params) \
91 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
92 #	define FS_CALL_NO_PARAMS(vnode, op) \
93 		( HAS_FS_CALL(vnode, op) ? \
94 			vnode->ops->op(vnode->mount->volume, vnode) \
95 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
96 #	define FS_MOUNT_CALL(mount, op, params...) \
97 		( HAS_FS_MOUNT_CALL(mount, op) ? \
98 			mount->volume->ops->op(mount->volume, params) \
99 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
100 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
101 		( HAS_FS_MOUNT_CALL(mount, op) ? \
102 			mount->volume->ops->op(mount->volume) \
103 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
104 #else
105 #	define FS_CALL(vnode, op, params...) \
106 			vnode->ops->op(vnode->mount->volume, vnode, params)
107 #	define FS_CALL_NO_PARAMS(vnode, op) \
108 			vnode->ops->op(vnode->mount->volume, vnode)
109 #	define FS_MOUNT_CALL(mount, op, params...) \
110 			mount->volume->ops->op(mount->volume, params)
111 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
112 			mount->volume->ops->op(mount->volume)
113 #endif
114 
115 
116 const static size_t kMaxPathLength = 65536;
117 	// The absolute maximum path length (for getcwd() - this is not depending
118 	// on PATH_MAX
119 
120 
121 typedef DoublyLinkedList<vnode> VnodeList;
122 
123 /*!	\brief Structure to manage a mounted file system
124 
125 	Note: The root_vnode and root_vnode->covers fields (what others?) are
126 	initialized in fs_mount() and not changed afterwards. That is as soon
127 	as the mount is mounted and it is made sure it won't be unmounted
128 	(e.g. by holding a reference to a vnode of that mount) (read) access
129 	to those fields is always safe, even without additional locking. Morever
130 	while mounted the mount holds a reference to the root_vnode->covers vnode,
131 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
132 	safe if a reference to vnode is held (note that for the root mount
133 	root_vnode->covers is NULL, though).
134 */
135 struct fs_mount {
136 	fs_mount()
137 		:
138 		volume(NULL),
139 		device_name(NULL)
140 	{
141 		mutex_init(&lock, "mount lock");
142 	}
143 
144 	~fs_mount()
145 	{
146 		mutex_destroy(&lock);
147 		free(device_name);
148 
149 		while (volume) {
150 			fs_volume* superVolume = volume->super_volume;
151 
152 			if (volume->file_system != NULL)
153 				put_module(volume->file_system->info.name);
154 
155 			free(volume->file_system_name);
156 			free(volume);
157 			volume = superVolume;
158 		}
159 	}
160 
161 	struct fs_mount* next;
162 	dev_t			id;
163 	fs_volume*		volume;
164 	char*			device_name;
165 	mutex			lock;	// guards the vnodes list
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 
176 namespace {
177 
178 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
179 	list_link		link;
180 	void*			bound_to;
181 	team_id			team;
182 	pid_t			session;
183 	off_t			start;
184 	off_t			end;
185 	bool			shared;
186 };
187 
188 typedef DoublyLinkedList<advisory_lock> LockList;
189 
190 } // namespace
191 
192 
193 struct advisory_locking {
194 	sem_id			lock;
195 	sem_id			wait_sem;
196 	LockList		locks;
197 
198 	advisory_locking()
199 		:
200 		lock(-1),
201 		wait_sem(-1)
202 	{
203 	}
204 
205 	~advisory_locking()
206 	{
207 		if (lock >= 0)
208 			delete_sem(lock);
209 		if (wait_sem >= 0)
210 			delete_sem(wait_sem);
211 	}
212 };
213 
214 /*!	\brief Guards sMountsTable.
215 
216 	The holder is allowed to read/write access the sMountsTable.
217 	Manipulation of the fs_mount structures themselves
218 	(and their destruction) requires different locks though.
219 */
220 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
221 
222 /*!	\brief Guards mount/unmount operations.
223 
224 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
225 	That is locking the lock ensures that no FS is mounted/unmounted. In
226 	particular this means that
227 	- sMountsTable will not be modified,
228 	- the fields immutable after initialization of the fs_mount structures in
229 	  sMountsTable will not be modified,
230 
231 	The thread trying to lock the lock must not hold sVnodeLock or
232 	sMountLock.
233 */
234 static recursive_lock sMountOpLock;
235 
236 /*!	\brief Guards sVnodeTable.
237 
238 	The holder is allowed read/write access to sVnodeTable and to
239 	any unbusy vnode in that table, save to the immutable fields (device, id,
240 	private_node, mount) to which only read-only access is allowed.
241 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
242 	well as the busy, removed, unused flags, and the vnode's type can also be
243 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
244 	locked. Write access to covered_by and covers requires to write lock
245 	sVnodeLock.
246 
247 	The thread trying to acquire the lock must not hold sMountLock.
248 	You must not hold this lock when calling create_sem(), as this might call
249 	vfs_free_unused_vnodes() and thus cause a deadlock.
250 */
251 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
252 
253 /*!	\brief Guards io_context::root.
254 
255 	Must be held when setting or getting the io_context::root field.
256 	The only operation allowed while holding this lock besides getting or
257 	setting the field is inc_vnode_ref_count() on io_context::root.
258 */
259 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
260 
261 
262 namespace {
263 
264 struct vnode_hash_key {
265 	dev_t	device;
266 	ino_t	vnode;
267 };
268 
269 struct VnodeHash {
270 	typedef vnode_hash_key	KeyType;
271 	typedef	struct vnode	ValueType;
272 
273 #define VHASH(mountid, vnodeid) \
274 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
275 
276 	size_t HashKey(KeyType key) const
277 	{
278 		return VHASH(key.device, key.vnode);
279 	}
280 
281 	size_t Hash(ValueType* vnode) const
282 	{
283 		return VHASH(vnode->device, vnode->id);
284 	}
285 
286 #undef VHASH
287 
288 	bool Compare(KeyType key, ValueType* vnode) const
289 	{
290 		return vnode->device == key.device && vnode->id == key.vnode;
291 	}
292 
293 	ValueType*& GetLink(ValueType* value) const
294 	{
295 		return value->next;
296 	}
297 };
298 
299 typedef BOpenHashTable<VnodeHash> VnodeTable;
300 
301 
302 struct MountHash {
303 	typedef dev_t			KeyType;
304 	typedef	struct fs_mount	ValueType;
305 
306 	size_t HashKey(KeyType key) const
307 	{
308 		return key;
309 	}
310 
311 	size_t Hash(ValueType* mount) const
312 	{
313 		return mount->id;
314 	}
315 
316 	bool Compare(KeyType key, ValueType* mount) const
317 	{
318 		return mount->id == key;
319 	}
320 
321 	ValueType*& GetLink(ValueType* value) const
322 	{
323 		return value->next;
324 	}
325 };
326 
327 typedef BOpenHashTable<MountHash> MountTable;
328 
329 } // namespace
330 
331 
332 object_cache* sPathNameCache;
333 object_cache* sVnodeCache;
334 object_cache* sFileDescriptorCache;
335 
336 #define VNODE_HASH_TABLE_SIZE 1024
337 static VnodeTable* sVnodeTable;
338 static struct vnode* sRoot;
339 
340 #define MOUNTS_HASH_TABLE_SIZE 16
341 static MountTable* sMountsTable;
342 static dev_t sNextMountID = 1;
343 
344 #define MAX_TEMP_IO_VECS 8
345 
346 // How long to wait for busy vnodes (10s)
347 #define BUSY_VNODE_RETRIES 2000
348 #define BUSY_VNODE_DELAY 5000
349 
350 mode_t __gUmask = 022;
351 
352 /* function declarations */
353 
354 static void free_unused_vnodes();
355 
356 // file descriptor operation prototypes
357 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
358 	void* buffer, size_t* _bytes);
359 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
360 	const void* buffer, size_t* _bytes);
361 static ssize_t file_readv(struct file_descriptor* descriptor, off_t pos,
362 	const struct iovec *vecs, int count);
363 static ssize_t file_writev(struct file_descriptor* descriptor, off_t pos,
364 	const struct iovec *vecs, int count);
365 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
366 	int seekType);
367 static void file_free_fd(struct file_descriptor* descriptor);
368 static status_t file_close(struct file_descriptor* descriptor);
369 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
370 	struct selectsync* sync);
371 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
372 	struct selectsync* sync);
373 static status_t dir_read(struct io_context* context,
374 	struct file_descriptor* descriptor, struct dirent* buffer,
375 	size_t bufferSize, uint32* _count);
376 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
377 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
378 static status_t dir_rewind(struct file_descriptor* descriptor);
379 static void dir_free_fd(struct file_descriptor* descriptor);
380 static status_t dir_close(struct file_descriptor* descriptor);
381 static status_t attr_dir_read(struct io_context* context,
382 	struct file_descriptor* descriptor, struct dirent* buffer,
383 	size_t bufferSize, uint32* _count);
384 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
385 static void attr_dir_free_fd(struct file_descriptor* descriptor);
386 static status_t attr_dir_close(struct file_descriptor* descriptor);
387 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
388 	void* buffer, size_t* _bytes);
389 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
390 	const void* buffer, size_t* _bytes);
391 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
392 	int seekType);
393 static void attr_free_fd(struct file_descriptor* descriptor);
394 static status_t attr_close(struct file_descriptor* descriptor);
395 static status_t attr_read_stat(struct file_descriptor* descriptor,
396 	struct stat* statData);
397 static status_t attr_write_stat(struct file_descriptor* descriptor,
398 	const struct stat* stat, int statMask);
399 static status_t index_dir_read(struct io_context* context,
400 	struct file_descriptor* descriptor, struct dirent* buffer,
401 	size_t bufferSize, uint32* _count);
402 static status_t index_dir_rewind(struct file_descriptor* descriptor);
403 static void index_dir_free_fd(struct file_descriptor* descriptor);
404 static status_t index_dir_close(struct file_descriptor* descriptor);
405 static status_t query_read(struct io_context* context,
406 	struct file_descriptor* descriptor, struct dirent* buffer,
407 	size_t bufferSize, uint32* _count);
408 static status_t query_rewind(struct file_descriptor* descriptor);
409 static void query_free_fd(struct file_descriptor* descriptor);
410 static status_t query_close(struct file_descriptor* descriptor);
411 
412 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
413 	void* buffer, size_t length);
414 static status_t common_read_stat(struct file_descriptor* descriptor,
415 	struct stat* statData);
416 static status_t common_write_stat(struct file_descriptor* descriptor,
417 	const struct stat* statData, int statMask);
418 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
419 	struct stat* stat, bool kernel);
420 
421 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
422 	bool traverseLeafLink, bool kernel,
423 	VnodePutter& _vnode, ino_t* _parentID, char* leafName = NULL);
424 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
425 	size_t bufferSize, bool kernel);
426 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
427 	VnodePutter& _vnode, ino_t* _parentID, bool kernel);
428 static void inc_vnode_ref_count(struct vnode* vnode);
429 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
430 	bool reenter);
431 static inline void put_vnode(struct vnode* vnode);
432 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
433 	bool kernel);
434 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
435 
436 
437 static struct fd_ops sFileOps = {
438 	file_close,
439 	file_free_fd,
440 	file_read,
441 	file_write,
442 	file_readv,
443 	file_writev,
444 	file_seek,
445 	common_ioctl,
446 	NULL,		// set_flags()
447 	file_select,
448 	file_deselect,
449 	NULL,		// read_dir()
450 	NULL,		// rewind_dir()
451 	common_read_stat,
452 	common_write_stat,
453 };
454 
455 static struct fd_ops sDirectoryOps = {
456 	dir_close,
457 	dir_free_fd,
458 	NULL, NULL,	// read(), write()
459 	NULL, NULL,	// readv(), writev()
460 	NULL,		// seek()
461 	common_ioctl,
462 	NULL,		// set_flags
463 	NULL,		// select()
464 	NULL,		// deselect()
465 	dir_read,
466 	dir_rewind,
467 	common_read_stat,
468 	common_write_stat,
469 };
470 
471 static struct fd_ops sAttributeDirectoryOps = {
472 	attr_dir_close,
473 	attr_dir_free_fd,
474 	NULL, NULL,	// read(), write()
475 	NULL, NULL,	// readv(), writev()
476 	NULL,		// seek()
477 	common_ioctl,
478 	NULL,		// set_flags
479 	NULL,		// select()
480 	NULL,		// deselect()
481 	attr_dir_read,
482 	attr_dir_rewind,
483 	common_read_stat,
484 	common_write_stat,
485 };
486 
487 static struct fd_ops sAttributeOps = {
488 	attr_close,
489 	attr_free_fd,
490 	attr_read,
491 	attr_write,
492 	NULL,		// readv()
493 	NULL,		// writev()
494 	attr_seek,
495 	common_ioctl,
496 	NULL,		// set_flags()
497 	NULL,		// select()
498 	NULL,		// deselect()
499 	NULL,		// read_dir()
500 	NULL,		// rewind_dir()
501 	attr_read_stat,
502 	attr_write_stat,
503 };
504 
505 static struct fd_ops sIndexDirectoryOps = {
506 	index_dir_close,
507 	index_dir_free_fd,
508 	NULL, NULL,	// read(), write()
509 	NULL, NULL,	// readv(), writev()
510 	NULL,		// seek()
511 	NULL,		// ioctl()
512 	NULL,		// set_flags()
513 	NULL,		// select()
514 	NULL,		// deselect()
515 	index_dir_read,
516 	index_dir_rewind,
517 	NULL,		// read_stat()
518 	NULL,		// write_stat()
519 };
520 
521 #if 0
522 static struct fd_ops sIndexOps = {
523 	NULL,		// dir_close()
524 	NULL,		// free_fd()
525 	NULL, NULL,	// read(), write()
526 	NULL, NULL,	// readv(), writev()
527 	NULL,		// seek()
528 	NULL,		// ioctl()
529 	NULL,		// set_flags
530 	NULL,		// select()
531 	NULL,		// deselect()
532 	NULL,		// dir_read()
533 	NULL,		// dir_rewind()
534 	index_read_stat,	// read_stat()
535 	NULL,		// write_stat()
536 };
537 #endif
538 
539 static struct fd_ops sQueryOps = {
540 	query_close,
541 	query_free_fd,
542 	NULL, NULL,	// read(), write()
543 	NULL, NULL,	// readv(), writev()
544 	NULL,		// seek()
545 	NULL,		// ioctl()
546 	NULL,		// set_flags()
547 	NULL,		// select()
548 	NULL,		// deselect()
549 	query_read,
550 	query_rewind,
551 	NULL,		// read_stat()
552 	NULL,		// write_stat()
553 };
554 
555 
556 namespace {
557 
558 class FDCloser {
559 public:
560 	FDCloser() : fFD(-1), fKernel(true) {}
561 
562 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
563 
564 	~FDCloser()
565 	{
566 		Close();
567 	}
568 
569 	void SetTo(int fd, bool kernel)
570 	{
571 		Close();
572 		fFD = fd;
573 		fKernel = kernel;
574 	}
575 
576 	void Close()
577 	{
578 		if (fFD >= 0) {
579 			if (fKernel)
580 				_kern_close(fFD);
581 			else
582 				_user_close(fFD);
583 			fFD = -1;
584 		}
585 	}
586 
587 	int Detach()
588 	{
589 		int fd = fFD;
590 		fFD = -1;
591 		return fd;
592 	}
593 
594 private:
595 	int		fFD;
596 	bool	fKernel;
597 };
598 
599 } // namespace
600 
601 
602 #if VFS_PAGES_IO_TRACING
603 
604 namespace VFSPagesIOTracing {
605 
606 class PagesIOTraceEntry : public AbstractTraceEntry {
607 protected:
608 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
609 		const generic_io_vec* vecs, uint32 count, uint32 flags,
610 		generic_size_t bytesRequested, status_t status,
611 		generic_size_t bytesTransferred)
612 		:
613 		fVnode(vnode),
614 		fMountID(vnode->mount->id),
615 		fNodeID(vnode->id),
616 		fCookie(cookie),
617 		fPos(pos),
618 		fCount(count),
619 		fFlags(flags),
620 		fBytesRequested(bytesRequested),
621 		fStatus(status),
622 		fBytesTransferred(bytesTransferred)
623 	{
624 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
625 			sizeof(generic_io_vec) * count, false);
626 	}
627 
628 	void AddDump(TraceOutput& out, const char* mode)
629 	{
630 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
631 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
632 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
633 			(uint64)fBytesRequested);
634 
635 		if (fVecs != NULL) {
636 			for (uint32 i = 0; i < fCount; i++) {
637 				if (i > 0)
638 					out.Print(", ");
639 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
640 					(uint64)fVecs[i].length);
641 			}
642 		}
643 
644 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
645 			"transferred: %" B_PRIu64, fFlags, fStatus,
646 			(uint64)fBytesTransferred);
647 	}
648 
649 protected:
650 	struct vnode*	fVnode;
651 	dev_t			fMountID;
652 	ino_t			fNodeID;
653 	void*			fCookie;
654 	off_t			fPos;
655 	generic_io_vec*	fVecs;
656 	uint32			fCount;
657 	uint32			fFlags;
658 	generic_size_t	fBytesRequested;
659 	status_t		fStatus;
660 	generic_size_t	fBytesTransferred;
661 };
662 
663 
664 class ReadPages : public PagesIOTraceEntry {
665 public:
666 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
667 		const generic_io_vec* vecs, uint32 count, uint32 flags,
668 		generic_size_t bytesRequested, status_t status,
669 		generic_size_t bytesTransferred)
670 		:
671 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
672 			bytesRequested, status, bytesTransferred)
673 	{
674 		Initialized();
675 	}
676 
677 	virtual void AddDump(TraceOutput& out)
678 	{
679 		PagesIOTraceEntry::AddDump(out, "read");
680 	}
681 };
682 
683 
684 class WritePages : public PagesIOTraceEntry {
685 public:
686 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
687 		const generic_io_vec* vecs, uint32 count, uint32 flags,
688 		generic_size_t bytesRequested, status_t status,
689 		generic_size_t bytesTransferred)
690 		:
691 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
692 			bytesRequested, status, bytesTransferred)
693 	{
694 		Initialized();
695 	}
696 
697 	virtual void AddDump(TraceOutput& out)
698 	{
699 		PagesIOTraceEntry::AddDump(out, "write");
700 	}
701 };
702 
703 }	// namespace VFSPagesIOTracing
704 
705 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
706 #else
707 #	define TPIO(x) ;
708 #endif	// VFS_PAGES_IO_TRACING
709 
710 
711 /*! Finds the mounted device (the fs_mount structure) with the given ID.
712 	Note, you must hold the sMountLock lock when you call this function.
713 */
714 static struct fs_mount*
715 find_mount(dev_t id)
716 {
717 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
718 
719 	return sMountsTable->Lookup(id);
720 }
721 
722 
723 static status_t
724 get_mount(dev_t id, struct fs_mount** _mount)
725 {
726 	struct fs_mount* mount;
727 
728 	ReadLocker nodeLocker(sVnodeLock);
729 	ReadLocker mountLocker(sMountLock);
730 
731 	mount = find_mount(id);
732 	if (mount == NULL)
733 		return B_BAD_VALUE;
734 
735 	struct vnode* rootNode = mount->root_vnode;
736 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
737 		|| rootNode->ref_count == 0) {
738 		// might have been called during a mount/unmount operation
739 		return B_BUSY;
740 	}
741 
742 	inc_vnode_ref_count(rootNode);
743 	*_mount = mount;
744 	return B_OK;
745 }
746 
747 
748 static void
749 put_mount(struct fs_mount* mount)
750 {
751 	if (mount)
752 		put_vnode(mount->root_vnode);
753 }
754 
755 
756 /*!	Tries to open the specified file system module.
757 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
758 	Returns a pointer to file system module interface, or NULL if it
759 	could not open the module.
760 */
761 static file_system_module_info*
762 get_file_system(const char* fsName)
763 {
764 	char name[B_FILE_NAME_LENGTH];
765 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
766 		// construct module name if we didn't get one
767 		// (we currently support only one API)
768 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
769 		fsName = NULL;
770 	}
771 
772 	file_system_module_info* info;
773 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
774 		return NULL;
775 
776 	return info;
777 }
778 
779 
780 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
781 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
782 	The name is allocated for you, and you have to free() it when you're
783 	done with it.
784 	Returns NULL if the required memory is not available.
785 */
786 static char*
787 get_file_system_name(const char* fsName)
788 {
789 	const size_t length = strlen("file_systems/");
790 
791 	if (strncmp(fsName, "file_systems/", length)) {
792 		// the name already seems to be the module's file name
793 		return strdup(fsName);
794 	}
795 
796 	fsName += length;
797 	const char* end = strchr(fsName, '/');
798 	if (end == NULL) {
799 		// this doesn't seem to be a valid name, but well...
800 		return strdup(fsName);
801 	}
802 
803 	// cut off the trailing /v1
804 
805 	char* name = (char*)malloc(end + 1 - fsName);
806 	if (name == NULL)
807 		return NULL;
808 
809 	strlcpy(name, fsName, end + 1 - fsName);
810 	return name;
811 }
812 
813 
814 /*!	Accepts a list of file system names separated by a colon, one for each
815 	layer and returns the file system name for the specified layer.
816 	The name is allocated for you, and you have to free() it when you're
817 	done with it.
818 	Returns NULL if the required memory is not available or if there is no
819 	name for the specified layer.
820 */
821 static char*
822 get_file_system_name_for_layer(const char* fsNames, int32 layer)
823 {
824 	while (layer >= 0) {
825 		const char* end = strchr(fsNames, ':');
826 		if (end == NULL) {
827 			if (layer == 0)
828 				return strdup(fsNames);
829 			return NULL;
830 		}
831 
832 		if (layer == 0) {
833 			size_t length = end - fsNames + 1;
834 			char* result = (char*)malloc(length);
835 			strlcpy(result, fsNames, length);
836 			return result;
837 		}
838 
839 		fsNames = end + 1;
840 		layer--;
841 	}
842 
843 	return NULL;
844 }
845 
846 
847 static void
848 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
849 {
850 	MutexLocker _(mount->lock);
851 	mount->vnodes.Add(vnode);
852 }
853 
854 
855 static void
856 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
857 {
858 	MutexLocker _(mount->lock);
859 	mount->vnodes.Remove(vnode);
860 }
861 
862 
863 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
864 
865 	The caller must hold the sVnodeLock (read lock at least).
866 
867 	\param mountID the mount ID.
868 	\param vnodeID the node ID.
869 
870 	\return The vnode structure, if it was found in the hash table, \c NULL
871 			otherwise.
872 */
873 static struct vnode*
874 lookup_vnode(dev_t mountID, ino_t vnodeID)
875 {
876 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
877 
878 	struct vnode_hash_key key;
879 
880 	key.device = mountID;
881 	key.vnode = vnodeID;
882 
883 	return sVnodeTable->Lookup(key);
884 }
885 
886 
887 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
888 
889 	This will also wait for BUSY_VNODE_DELAY before returning if one should
890 	still wait for the vnode becoming unbusy.
891 
892 	\return \c true if one should retry, \c false if not.
893 */
894 static bool
895 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
896 {
897 	if (--tries < 0) {
898 		// vnode doesn't seem to become unbusy
899 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
900 			" is not becoming unbusy!\n", mountID, vnodeID);
901 		return false;
902 	}
903 	snooze(BUSY_VNODE_DELAY);
904 	return true;
905 }
906 
907 
908 /*!	Creates a new vnode with the given mount and node ID.
909 	If the node already exists, it is returned instead and no new node is
910 	created. In either case -- but not, if an error occurs -- the function write
911 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
912 	error the lock is not held on return.
913 
914 	\param mountID The mount ID.
915 	\param vnodeID The vnode ID.
916 	\param _vnode Will be set to the new vnode on success.
917 	\param _nodeCreated Will be set to \c true when the returned vnode has
918 		been newly created, \c false when it already existed. Will not be
919 		changed on error.
920 	\return \c B_OK, when the vnode was successfully created and inserted or
921 		a node with the given ID was found, \c B_NO_MEMORY or
922 		\c B_ENTRY_NOT_FOUND on error.
923 */
924 static status_t
925 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
926 	bool& _nodeCreated)
927 {
928 	FUNCTION(("create_new_vnode_and_lock()\n"));
929 
930 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
931 	if (vnode == NULL)
932 		return B_NO_MEMORY;
933 
934 	// initialize basic values
935 	memset(vnode, 0, sizeof(struct vnode));
936 	vnode->device = mountID;
937 	vnode->id = vnodeID;
938 	vnode->ref_count = 1;
939 	vnode->SetBusy(true);
940 
941 	// look up the node -- it might have been added by someone else in the
942 	// meantime
943 	rw_lock_write_lock(&sVnodeLock);
944 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
945 	if (existingVnode != NULL) {
946 		object_cache_free(sVnodeCache, vnode, 0);
947 		_vnode = existingVnode;
948 		_nodeCreated = false;
949 		return B_OK;
950 	}
951 
952 	// get the mount structure
953 	rw_lock_read_lock(&sMountLock);
954 	vnode->mount = find_mount(mountID);
955 	if (!vnode->mount || vnode->mount->unmounting) {
956 		rw_lock_read_unlock(&sMountLock);
957 		rw_lock_write_unlock(&sVnodeLock);
958 		object_cache_free(sVnodeCache, vnode, 0);
959 		return B_ENTRY_NOT_FOUND;
960 	}
961 
962 	// add the vnode to the mount's node list and the hash table
963 	sVnodeTable->Insert(vnode);
964 	add_vnode_to_mount_list(vnode, vnode->mount);
965 
966 	rw_lock_read_unlock(&sMountLock);
967 
968 	_vnode = vnode;
969 	_nodeCreated = true;
970 
971 	// keep the vnode lock locked
972 	return B_OK;
973 }
974 
975 
976 /*!	Frees the vnode and all resources it has acquired, and removes
977 	it from the vnode hash as well as from its mount structure.
978 	Will also make sure that any cache modifications are written back.
979 */
980 static void
981 free_vnode(struct vnode* vnode, bool reenter)
982 {
983 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
984 		vnode);
985 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
986 
987 	// write back any changes in this vnode's cache -- but only
988 	// if the vnode won't be deleted, in which case the changes
989 	// will be discarded
990 
991 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
992 		FS_CALL_NO_PARAMS(vnode, fsync);
993 
994 	// Note: If this vnode has a cache attached, there will still be two
995 	// references to that cache at this point. The last one belongs to the vnode
996 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
997 	// cache. Each but the last reference to a cache also includes a reference
998 	// to the vnode. The file cache, however, released its reference (cf.
999 	// file_cache_create()), so that this vnode's ref count has the chance to
1000 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1001 	// cache reference to be released, which will also release a (no longer
1002 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1003 	// count, so that it will neither become negative nor 0.
1004 	vnode->ref_count = 2;
1005 
1006 	if (!vnode->IsUnpublished()) {
1007 		if (vnode->IsRemoved())
1008 			FS_CALL(vnode, remove_vnode, reenter);
1009 		else
1010 			FS_CALL(vnode, put_vnode, reenter);
1011 	}
1012 
1013 	// If the vnode has a VMCache attached, make sure that it won't try to get
1014 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1015 	// long as the vnode is busy and in the hash, that won't happen, but as
1016 	// soon as we've removed it from the hash, it could reload the vnode -- with
1017 	// a new cache attached!
1018 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1019 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1020 
1021 	// The file system has removed the resources of the vnode now, so we can
1022 	// make it available again (by removing the busy vnode from the hash).
1023 	rw_lock_write_lock(&sVnodeLock);
1024 	sVnodeTable->Remove(vnode);
1025 	rw_lock_write_unlock(&sVnodeLock);
1026 
1027 	// if we have a VMCache attached, remove it
1028 	if (vnode->cache)
1029 		vnode->cache->ReleaseRef();
1030 
1031 	vnode->cache = NULL;
1032 
1033 	remove_vnode_from_mount_list(vnode, vnode->mount);
1034 
1035 	object_cache_free(sVnodeCache, vnode, 0);
1036 }
1037 
1038 
1039 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1040 	if the counter dropped to 0.
1041 
1042 	The caller must, of course, own a reference to the vnode to call this
1043 	function.
1044 	The caller must not hold the sVnodeLock or the sMountLock.
1045 
1046 	\param vnode the vnode.
1047 	\param alwaysFree don't move this vnode into the unused list, but really
1048 		   delete it if possible.
1049 	\param reenter \c true, if this function is called (indirectly) from within
1050 		   a file system. This will be passed to file system hooks only.
1051 	\return \c B_OK, if everything went fine, an error code otherwise.
1052 */
1053 static status_t
1054 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1055 {
1056 	ReadLocker locker(sVnodeLock);
1057 	AutoLocker<Vnode> nodeLocker(vnode);
1058 
1059 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1060 
1061 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1062 
1063 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1064 		vnode->ref_count));
1065 
1066 	if (oldRefCount != 1)
1067 		return B_OK;
1068 
1069 	if (vnode->IsBusy())
1070 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1071 
1072 	bool freeNode = false;
1073 	bool freeUnusedNodes = false;
1074 
1075 	// Just insert the vnode into an unused list if we don't need
1076 	// to delete it
1077 	if (vnode->IsRemoved() || alwaysFree) {
1078 		vnode_to_be_freed(vnode);
1079 		vnode->SetBusy(true);
1080 		freeNode = true;
1081 	} else
1082 		freeUnusedNodes = vnode_unused(vnode);
1083 
1084 	nodeLocker.Unlock();
1085 	locker.Unlock();
1086 
1087 	if (freeNode)
1088 		free_vnode(vnode, reenter);
1089 	else if (freeUnusedNodes)
1090 		free_unused_vnodes();
1091 
1092 	return B_OK;
1093 }
1094 
1095 
1096 /*!	\brief Increments the reference counter of the given vnode.
1097 
1098 	The caller must make sure that the node isn't deleted while this function
1099 	is called. This can be done either:
1100 	- by ensuring that a reference to the node exists and remains in existence,
1101 	  or
1102 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1103 	  or by holding sVnodeLock write locked.
1104 
1105 	In the second case the caller is responsible for dealing with the ref count
1106 	0 -> 1 transition. That is 1. this function must not be invoked when the
1107 	node is busy in the first place and 2. vnode_used() must be called for the
1108 	node.
1109 
1110 	\param vnode the vnode.
1111 */
1112 static void
1113 inc_vnode_ref_count(struct vnode* vnode)
1114 {
1115 	atomic_add(&vnode->ref_count, 1);
1116 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1117 		vnode->ref_count));
1118 }
1119 
1120 
1121 static bool
1122 is_special_node_type(int type)
1123 {
1124 	// at the moment only FIFOs are supported
1125 	return S_ISFIFO(type);
1126 }
1127 
1128 
1129 static status_t
1130 create_special_sub_node(struct vnode* vnode, uint32 flags)
1131 {
1132 	if (S_ISFIFO(vnode->Type()))
1133 		return create_fifo_vnode(vnode->mount->volume, vnode);
1134 
1135 	return B_BAD_VALUE;
1136 }
1137 
1138 
1139 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1140 
1141 	If the node is not yet in memory, it will be loaded.
1142 
1143 	The caller must not hold the sVnodeLock or the sMountLock.
1144 
1145 	\param mountID the mount ID.
1146 	\param vnodeID the node ID.
1147 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1148 		   retrieved vnode structure shall be written.
1149 	\param reenter \c true, if this function is called (indirectly) from within
1150 		   a file system.
1151 	\return \c B_OK, if everything when fine, an error code otherwise.
1152 */
1153 static status_t
1154 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1155 	int reenter)
1156 {
1157 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1158 		mountID, vnodeID, _vnode));
1159 
1160 	rw_lock_read_lock(&sVnodeLock);
1161 
1162 	int32 tries = BUSY_VNODE_RETRIES;
1163 restart:
1164 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1165 	AutoLocker<Vnode> nodeLocker(vnode);
1166 
1167 	if (vnode && vnode->IsBusy()) {
1168 		// vnodes in the Removed state (except ones still Unpublished)
1169 		// which are also Busy will disappear soon, so we do not wait for them.
1170 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1171 
1172 		nodeLocker.Unlock();
1173 		rw_lock_read_unlock(&sVnodeLock);
1174 		if (!canWait) {
1175 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1176 				mountID, vnodeID);
1177 			return B_BUSY;
1178 		}
1179 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1180 			return B_BUSY;
1181 
1182 		rw_lock_read_lock(&sVnodeLock);
1183 		goto restart;
1184 	}
1185 
1186 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1187 
1188 	status_t status;
1189 
1190 	if (vnode) {
1191 		if (vnode->ref_count == 0) {
1192 			// this vnode has been unused before
1193 			vnode_used(vnode);
1194 		}
1195 		inc_vnode_ref_count(vnode);
1196 
1197 		nodeLocker.Unlock();
1198 		rw_lock_read_unlock(&sVnodeLock);
1199 	} else {
1200 		// we need to create a new vnode and read it in
1201 		rw_lock_read_unlock(&sVnodeLock);
1202 			// unlock -- create_new_vnode_and_lock() write-locks on success
1203 		bool nodeCreated;
1204 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1205 			nodeCreated);
1206 		if (status != B_OK)
1207 			return status;
1208 
1209 		if (!nodeCreated) {
1210 			rw_lock_read_lock(&sVnodeLock);
1211 			rw_lock_write_unlock(&sVnodeLock);
1212 			goto restart;
1213 		}
1214 
1215 		rw_lock_write_unlock(&sVnodeLock);
1216 
1217 		int type;
1218 		uint32 flags;
1219 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1220 			&flags, reenter);
1221 		if (status == B_OK && vnode->private_node == NULL)
1222 			status = B_BAD_VALUE;
1223 
1224 		bool gotNode = status == B_OK;
1225 		bool publishSpecialSubNode = false;
1226 		if (gotNode) {
1227 			vnode->SetType(type);
1228 			publishSpecialSubNode = is_special_node_type(type)
1229 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1230 		}
1231 
1232 		if (gotNode && publishSpecialSubNode)
1233 			status = create_special_sub_node(vnode, flags);
1234 
1235 		if (status != B_OK) {
1236 			if (gotNode)
1237 				FS_CALL(vnode, put_vnode, reenter);
1238 
1239 			rw_lock_write_lock(&sVnodeLock);
1240 			sVnodeTable->Remove(vnode);
1241 			remove_vnode_from_mount_list(vnode, vnode->mount);
1242 			rw_lock_write_unlock(&sVnodeLock);
1243 
1244 			object_cache_free(sVnodeCache, vnode, 0);
1245 			return status;
1246 		}
1247 
1248 		rw_lock_read_lock(&sVnodeLock);
1249 		vnode->Lock();
1250 
1251 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1252 		vnode->SetBusy(false);
1253 
1254 		vnode->Unlock();
1255 		rw_lock_read_unlock(&sVnodeLock);
1256 	}
1257 
1258 	TRACE(("get_vnode: returning %p\n", vnode));
1259 
1260 	*_vnode = vnode;
1261 	return B_OK;
1262 }
1263 
1264 
1265 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1266 	if the counter dropped to 0.
1267 
1268 	The caller must, of course, own a reference to the vnode to call this
1269 	function.
1270 	The caller must not hold the sVnodeLock or the sMountLock.
1271 
1272 	\param vnode the vnode.
1273 */
1274 static inline void
1275 put_vnode(struct vnode* vnode)
1276 {
1277 	dec_vnode_ref_count(vnode, false, false);
1278 }
1279 
1280 
1281 static void
1282 free_unused_vnodes(int32 level)
1283 {
1284 	unused_vnodes_check_started();
1285 
1286 	if (level == B_NO_LOW_RESOURCE) {
1287 		unused_vnodes_check_done();
1288 		return;
1289 	}
1290 
1291 	flush_hot_vnodes();
1292 
1293 	// determine how many nodes to free
1294 	uint32 count = 1;
1295 	{
1296 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1297 
1298 		switch (level) {
1299 			case B_LOW_RESOURCE_NOTE:
1300 				count = sUnusedVnodes / 100;
1301 				break;
1302 			case B_LOW_RESOURCE_WARNING:
1303 				count = sUnusedVnodes / 10;
1304 				break;
1305 			case B_LOW_RESOURCE_CRITICAL:
1306 				count = sUnusedVnodes;
1307 				break;
1308 		}
1309 
1310 		if (count > sUnusedVnodes)
1311 			count = sUnusedVnodes;
1312 	}
1313 
1314 	// Write back the modified pages of some unused vnodes and free them.
1315 
1316 	for (uint32 i = 0; i < count; i++) {
1317 		ReadLocker vnodesReadLocker(sVnodeLock);
1318 
1319 		// get the first node
1320 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1321 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1322 			&sUnusedVnodeList);
1323 		unusedVnodesLocker.Unlock();
1324 
1325 		if (vnode == NULL)
1326 			break;
1327 
1328 		// lock the node
1329 		AutoLocker<Vnode> nodeLocker(vnode);
1330 
1331 		// Check whether the node is still unused -- since we only append to the
1332 		// tail of the unused queue, the vnode should still be at its head.
1333 		// Alternatively we could check its ref count for 0 and its busy flag,
1334 		// but if the node is no longer at the head of the queue, it means it
1335 		// has been touched in the meantime, i.e. it is no longer the least
1336 		// recently used unused vnode and we rather don't free it.
1337 		unusedVnodesLocker.Lock();
1338 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1339 			continue;
1340 		unusedVnodesLocker.Unlock();
1341 
1342 		ASSERT(!vnode->IsBusy());
1343 
1344 		// grab a reference
1345 		inc_vnode_ref_count(vnode);
1346 		vnode_used(vnode);
1347 
1348 		// write back changes and free the node
1349 		nodeLocker.Unlock();
1350 		vnodesReadLocker.Unlock();
1351 
1352 		if (vnode->cache != NULL)
1353 			vnode->cache->WriteModified();
1354 
1355 		dec_vnode_ref_count(vnode, true, false);
1356 			// this should free the vnode when it's still unused
1357 	}
1358 
1359 	unused_vnodes_check_done();
1360 }
1361 
1362 
1363 /*!	Gets the vnode the given vnode is covering.
1364 
1365 	The caller must have \c sVnodeLock read-locked at least.
1366 
1367 	The function returns a reference to the retrieved vnode (if any), the caller
1368 	is responsible to free.
1369 
1370 	\param vnode The vnode whose covered node shall be returned.
1371 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1372 		vnode.
1373 */
1374 static inline Vnode*
1375 get_covered_vnode_locked(Vnode* vnode)
1376 {
1377 	if (Vnode* coveredNode = vnode->covers) {
1378 		while (coveredNode->covers != NULL)
1379 			coveredNode = coveredNode->covers;
1380 
1381 		inc_vnode_ref_count(coveredNode);
1382 		return coveredNode;
1383 	}
1384 
1385 	return NULL;
1386 }
1387 
1388 
1389 /*!	Gets the vnode the given vnode is covering.
1390 
1391 	The caller must not hold \c sVnodeLock. Note that this implies a race
1392 	condition, since the situation can change at any time.
1393 
1394 	The function returns a reference to the retrieved vnode (if any), the caller
1395 	is responsible to free.
1396 
1397 	\param vnode The vnode whose covered node shall be returned.
1398 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1399 		vnode.
1400 */
1401 static inline Vnode*
1402 get_covered_vnode(Vnode* vnode)
1403 {
1404 	if (!vnode->IsCovering())
1405 		return NULL;
1406 
1407 	ReadLocker vnodeReadLocker(sVnodeLock);
1408 	return get_covered_vnode_locked(vnode);
1409 }
1410 
1411 
1412 /*!	Gets the vnode the given vnode is covered by.
1413 
1414 	The caller must have \c sVnodeLock read-locked at least.
1415 
1416 	The function returns a reference to the retrieved vnode (if any), the caller
1417 	is responsible to free.
1418 
1419 	\param vnode The vnode whose covering node shall be returned.
1420 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1421 		any vnode.
1422 */
1423 static Vnode*
1424 get_covering_vnode_locked(Vnode* vnode)
1425 {
1426 	if (Vnode* coveringNode = vnode->covered_by) {
1427 		while (coveringNode->covered_by != NULL)
1428 			coveringNode = coveringNode->covered_by;
1429 
1430 		inc_vnode_ref_count(coveringNode);
1431 		return coveringNode;
1432 	}
1433 
1434 	return NULL;
1435 }
1436 
1437 
1438 /*!	Gets the vnode the given vnode is covered by.
1439 
1440 	The caller must not hold \c sVnodeLock. Note that this implies a race
1441 	condition, since the situation can change at any time.
1442 
1443 	The function returns a reference to the retrieved vnode (if any), the caller
1444 	is responsible to free.
1445 
1446 	\param vnode The vnode whose covering node shall be returned.
1447 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1448 		any vnode.
1449 */
1450 static inline Vnode*
1451 get_covering_vnode(Vnode* vnode)
1452 {
1453 	if (!vnode->IsCovered())
1454 		return NULL;
1455 
1456 	ReadLocker vnodeReadLocker(sVnodeLock);
1457 	return get_covering_vnode_locked(vnode);
1458 }
1459 
1460 
1461 static void
1462 free_unused_vnodes()
1463 {
1464 	free_unused_vnodes(
1465 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1466 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1467 }
1468 
1469 
1470 static void
1471 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1472 {
1473 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1474 
1475 	free_unused_vnodes(level);
1476 }
1477 
1478 
1479 static inline void
1480 put_advisory_locking(struct advisory_locking* locking)
1481 {
1482 	release_sem(locking->lock);
1483 }
1484 
1485 
1486 /*!	Returns the advisory_locking object of the \a vnode in case it
1487 	has one, and locks it.
1488 	You have to call put_advisory_locking() when you're done with
1489 	it.
1490 	Note, you must not have the vnode mutex locked when calling
1491 	this function.
1492 */
1493 static struct advisory_locking*
1494 get_advisory_locking(struct vnode* vnode)
1495 {
1496 	rw_lock_read_lock(&sVnodeLock);
1497 	vnode->Lock();
1498 
1499 	struct advisory_locking* locking = vnode->advisory_locking;
1500 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1501 
1502 	vnode->Unlock();
1503 	rw_lock_read_unlock(&sVnodeLock);
1504 
1505 	if (lock >= 0)
1506 		lock = acquire_sem(lock);
1507 	if (lock < 0) {
1508 		// This means the locking has been deleted in the mean time
1509 		// or had never existed in the first place - otherwise, we
1510 		// would get the lock at some point.
1511 		return NULL;
1512 	}
1513 
1514 	return locking;
1515 }
1516 
1517 
1518 /*!	Creates a locked advisory_locking object, and attaches it to the
1519 	given \a vnode.
1520 	Returns B_OK in case of success - also if the vnode got such an
1521 	object from someone else in the mean time, you'll still get this
1522 	one locked then.
1523 */
1524 static status_t
1525 create_advisory_locking(struct vnode* vnode)
1526 {
1527 	if (vnode == NULL)
1528 		return B_FILE_ERROR;
1529 
1530 	ObjectDeleter<advisory_locking> lockingDeleter;
1531 	struct advisory_locking* locking = NULL;
1532 
1533 	while (get_advisory_locking(vnode) == NULL) {
1534 		// no locking object set on the vnode yet, create one
1535 		if (locking == NULL) {
1536 			locking = new(std::nothrow) advisory_locking;
1537 			if (locking == NULL)
1538 				return B_NO_MEMORY;
1539 			lockingDeleter.SetTo(locking);
1540 
1541 			locking->wait_sem = create_sem(0, "advisory lock");
1542 			if (locking->wait_sem < 0)
1543 				return locking->wait_sem;
1544 
1545 			locking->lock = create_sem(0, "advisory locking");
1546 			if (locking->lock < 0)
1547 				return locking->lock;
1548 		}
1549 
1550 		// set our newly created locking object
1551 		ReadLocker _(sVnodeLock);
1552 		AutoLocker<Vnode> nodeLocker(vnode);
1553 		if (vnode->advisory_locking == NULL) {
1554 			vnode->advisory_locking = locking;
1555 			lockingDeleter.Detach();
1556 			return B_OK;
1557 		}
1558 	}
1559 
1560 	// The vnode already had a locking object. That's just as well.
1561 
1562 	return B_OK;
1563 }
1564 
1565 
1566 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1567 	with the advisory_lock \a lock.
1568 */
1569 static bool
1570 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1571 {
1572 	if (flock == NULL)
1573 		return true;
1574 
1575 	return lock->start <= flock->l_start - 1 + flock->l_len
1576 		&& lock->end >= flock->l_start;
1577 }
1578 
1579 
1580 /*!	Tests whether acquiring a lock would block.
1581 */
1582 static status_t
1583 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1584 {
1585 	flock->l_type = F_UNLCK;
1586 
1587 	struct advisory_locking* locking = get_advisory_locking(vnode);
1588 	if (locking == NULL)
1589 		return B_OK;
1590 
1591 	team_id team = team_get_current_team_id();
1592 
1593 	LockList::Iterator iterator = locking->locks.GetIterator();
1594 	while (iterator.HasNext()) {
1595 		struct advisory_lock* lock = iterator.Next();
1596 
1597 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1598 			// locks do overlap
1599 			if (flock->l_type != F_RDLCK || !lock->shared) {
1600 				// collision
1601 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1602 				flock->l_whence = SEEK_SET;
1603 				flock->l_start = lock->start;
1604 				flock->l_len = lock->end - lock->start + 1;
1605 				flock->l_pid = lock->team;
1606 				break;
1607 			}
1608 		}
1609 	}
1610 
1611 	put_advisory_locking(locking);
1612 	return B_OK;
1613 }
1614 
1615 
1616 /*!	Removes the specified lock, or all locks of the calling team
1617 	if \a flock is NULL.
1618 */
1619 static status_t
1620 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1621 	struct file_descriptor* descriptor, struct flock* flock)
1622 {
1623 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1624 
1625 	struct advisory_locking* locking = get_advisory_locking(vnode);
1626 	if (locking == NULL)
1627 		return B_OK;
1628 
1629 	// find matching lock entries
1630 
1631 	LockList::Iterator iterator = locking->locks.GetIterator();
1632 	while (iterator.HasNext()) {
1633 		struct advisory_lock* lock = iterator.Next();
1634 		bool removeLock = false;
1635 
1636 		if (descriptor != NULL && lock->bound_to == descriptor) {
1637 			// Remove flock() locks
1638 			removeLock = true;
1639 		} else if (lock->bound_to == context
1640 				&& advisory_lock_intersects(lock, flock)) {
1641 			// Remove POSIX locks
1642 			bool endsBeyond = false;
1643 			bool startsBefore = false;
1644 			if (flock != NULL) {
1645 				startsBefore = lock->start < flock->l_start;
1646 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1647 			}
1648 
1649 			if (!startsBefore && !endsBeyond) {
1650 				// lock is completely contained in flock
1651 				removeLock = true;
1652 			} else if (startsBefore && !endsBeyond) {
1653 				// cut the end of the lock
1654 				lock->end = flock->l_start - 1;
1655 			} else if (!startsBefore && endsBeyond) {
1656 				// cut the start of the lock
1657 				lock->start = flock->l_start + flock->l_len;
1658 			} else {
1659 				// divide the lock into two locks
1660 				struct advisory_lock* secondLock = new advisory_lock;
1661 				if (secondLock == NULL) {
1662 					// TODO: we should probably revert the locks we already
1663 					// changed... (ie. allocate upfront)
1664 					put_advisory_locking(locking);
1665 					return B_NO_MEMORY;
1666 				}
1667 
1668 				lock->end = flock->l_start - 1;
1669 
1670 				secondLock->bound_to = context;
1671 				secondLock->team = lock->team;
1672 				secondLock->session = lock->session;
1673 				// values must already be normalized when getting here
1674 				secondLock->start = flock->l_start + flock->l_len;
1675 				secondLock->end = lock->end;
1676 				secondLock->shared = lock->shared;
1677 
1678 				locking->locks.Add(secondLock);
1679 			}
1680 		}
1681 
1682 		if (removeLock) {
1683 			// this lock is no longer used
1684 			iterator.Remove();
1685 			delete lock;
1686 		}
1687 	}
1688 
1689 	bool removeLocking = locking->locks.IsEmpty();
1690 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1691 
1692 	put_advisory_locking(locking);
1693 
1694 	if (removeLocking) {
1695 		// We can remove the whole advisory locking structure; it's no
1696 		// longer used
1697 		locking = get_advisory_locking(vnode);
1698 		if (locking != NULL) {
1699 			ReadLocker locker(sVnodeLock);
1700 			AutoLocker<Vnode> nodeLocker(vnode);
1701 
1702 			// the locking could have been changed in the mean time
1703 			if (locking->locks.IsEmpty()) {
1704 				vnode->advisory_locking = NULL;
1705 				nodeLocker.Unlock();
1706 				locker.Unlock();
1707 
1708 				// we've detached the locking from the vnode, so we can
1709 				// safely delete it
1710 				delete locking;
1711 			} else {
1712 				// the locking is in use again
1713 				nodeLocker.Unlock();
1714 				locker.Unlock();
1715 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1716 			}
1717 		}
1718 	}
1719 
1720 	return B_OK;
1721 }
1722 
1723 
1724 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1725 	will wait for the lock to become available, if there are any collisions
1726 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1727 
1728 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1729 	BSD flock() semantics are used, that is, all children can unlock the file
1730 	in question (we even allow parents to remove the lock, though, but that
1731 	seems to be in line to what the BSD's are doing).
1732 */
1733 static status_t
1734 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1735 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1736 {
1737 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1738 		vnode, flock, wait ? "yes" : "no"));
1739 
1740 	bool shared = flock->l_type == F_RDLCK;
1741 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1742 	status_t status = B_OK;
1743 
1744 	// TODO: do deadlock detection!
1745 
1746 	struct advisory_locking* locking;
1747 
1748 	while (true) {
1749 		// if this vnode has an advisory_locking structure attached,
1750 		// lock that one and search for any colliding file lock
1751 		status = create_advisory_locking(vnode);
1752 		if (status != B_OK)
1753 			return status;
1754 
1755 		locking = vnode->advisory_locking;
1756 		team_id team = team_get_current_team_id();
1757 		sem_id waitForLock = -1;
1758 
1759 		// test for collisions
1760 		LockList::Iterator iterator = locking->locks.GetIterator();
1761 		while (iterator.HasNext()) {
1762 			struct advisory_lock* lock = iterator.Next();
1763 
1764 			// TODO: locks from the same team might be joinable!
1765 			if ((lock->team != team || lock->bound_to != boundTo)
1766 					&& advisory_lock_intersects(lock, flock)) {
1767 				// locks do overlap
1768 				if (!shared || !lock->shared) {
1769 					// we need to wait
1770 					waitForLock = locking->wait_sem;
1771 					break;
1772 				}
1773 			}
1774 		}
1775 
1776 		if (waitForLock < 0)
1777 			break;
1778 
1779 		// We need to wait. Do that or fail now, if we've been asked not to.
1780 
1781 		if (!wait) {
1782 			put_advisory_locking(locking);
1783 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1784 		}
1785 
1786 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1787 			B_CAN_INTERRUPT, 0);
1788 		if (status != B_OK && status != B_BAD_SEM_ID)
1789 			return status;
1790 
1791 		// We have been notified, but we need to re-lock the locking object. So
1792 		// go another round...
1793 	}
1794 
1795 	// install new lock
1796 
1797 	struct advisory_lock* lock = new(std::nothrow) advisory_lock;
1798 	if (lock == NULL) {
1799 		put_advisory_locking(locking);
1800 		return B_NO_MEMORY;
1801 	}
1802 
1803 	lock->bound_to = boundTo;
1804 	lock->team = team_get_current_team_id();
1805 	lock->session = thread_get_current_thread()->team->session_id;
1806 	// values must already be normalized when getting here
1807 	lock->start = flock->l_start;
1808 	lock->end = flock->l_start - 1 + flock->l_len;
1809 	lock->shared = shared;
1810 
1811 	locking->locks.Add(lock);
1812 	put_advisory_locking(locking);
1813 
1814 	return status;
1815 }
1816 
1817 
1818 /*!	Normalizes the \a flock structure to make it easier to compare the
1819 	structure with others. The l_start and l_len fields are set to absolute
1820 	values according to the l_whence field.
1821 */
1822 static status_t
1823 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1824 {
1825 	switch (flock->l_whence) {
1826 		case SEEK_SET:
1827 			break;
1828 		case SEEK_CUR:
1829 			flock->l_start += descriptor->pos;
1830 			break;
1831 		case SEEK_END:
1832 		{
1833 			struct vnode* vnode = descriptor->u.vnode;
1834 			struct stat stat;
1835 			status_t status;
1836 
1837 			if (!HAS_FS_CALL(vnode, read_stat))
1838 				return B_UNSUPPORTED;
1839 
1840 			status = FS_CALL(vnode, read_stat, &stat);
1841 			if (status != B_OK)
1842 				return status;
1843 
1844 			flock->l_start += stat.st_size;
1845 			break;
1846 		}
1847 		default:
1848 			return B_BAD_VALUE;
1849 	}
1850 
1851 	if (flock->l_start < 0)
1852 		flock->l_start = 0;
1853 	if (flock->l_len == 0)
1854 		flock->l_len = OFF_MAX;
1855 
1856 	// don't let the offset and length overflow
1857 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1858 		flock->l_len = OFF_MAX - flock->l_start;
1859 
1860 	if (flock->l_len < 0) {
1861 		// a negative length reverses the region
1862 		flock->l_start += flock->l_len;
1863 		flock->l_len = -flock->l_len;
1864 	}
1865 
1866 	return B_OK;
1867 }
1868 
1869 
1870 static void
1871 replace_vnode_if_disconnected(struct fs_mount* mount,
1872 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1873 	struct vnode* fallBack, bool lockRootLock)
1874 {
1875 	struct vnode* givenVnode = vnode;
1876 	bool vnodeReplaced = false;
1877 
1878 	ReadLocker vnodeReadLocker(sVnodeLock);
1879 
1880 	if (lockRootLock)
1881 		mutex_lock(&sIOContextRootLock);
1882 
1883 	while (vnode != NULL && vnode->mount == mount
1884 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1885 		if (vnode->covers != NULL) {
1886 			// redirect the vnode to the covered vnode
1887 			vnode = vnode->covers;
1888 		} else
1889 			vnode = fallBack;
1890 
1891 		vnodeReplaced = true;
1892 	}
1893 
1894 	// If we've replaced the node, grab a reference for the new one.
1895 	if (vnodeReplaced && vnode != NULL)
1896 		inc_vnode_ref_count(vnode);
1897 
1898 	if (lockRootLock)
1899 		mutex_unlock(&sIOContextRootLock);
1900 
1901 	vnodeReadLocker.Unlock();
1902 
1903 	if (vnodeReplaced)
1904 		put_vnode(givenVnode);
1905 }
1906 
1907 
1908 /*!	Disconnects all file descriptors that are associated with the
1909 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1910 	\a mount object.
1911 
1912 	Note, after you've called this function, there might still be ongoing
1913 	accesses - they won't be interrupted if they already happened before.
1914 	However, any subsequent access will fail.
1915 
1916 	This is not a cheap function and should be used with care and rarely.
1917 	TODO: there is currently no means to stop a blocking read/write!
1918 */
1919 static void
1920 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1921 	struct vnode* vnodeToDisconnect)
1922 {
1923 	// iterate over all teams and peek into their file descriptors
1924 	TeamListIterator teamIterator;
1925 	while (Team* team = teamIterator.Next()) {
1926 		BReference<Team> teamReference(team, true);
1927 		TeamLocker teamLocker(team);
1928 
1929 		// lock the I/O context
1930 		io_context* context = team->io_context;
1931 		if (context == NULL)
1932 			continue;
1933 		MutexLocker contextLocker(context->io_mutex);
1934 
1935 		teamLocker.Unlock();
1936 
1937 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1938 			sRoot, true);
1939 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1940 			sRoot, false);
1941 
1942 		for (uint32 i = 0; i < context->table_size; i++) {
1943 			struct file_descriptor* descriptor = context->fds[i];
1944 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1945 				continue;
1946 
1947 			inc_fd_ref_count(descriptor);
1948 
1949 			// if this descriptor points at this mount, we
1950 			// need to disconnect it to be able to unmount
1951 			struct vnode* vnode = fd_vnode(descriptor);
1952 			if (vnodeToDisconnect != NULL) {
1953 				if (vnode == vnodeToDisconnect)
1954 					disconnect_fd(descriptor);
1955 			} else if ((vnode != NULL && vnode->mount == mount)
1956 				|| (vnode == NULL && descriptor->u.mount == mount))
1957 				disconnect_fd(descriptor);
1958 
1959 			put_fd(descriptor);
1960 		}
1961 	}
1962 }
1963 
1964 
1965 /*!	\brief Gets the root node of the current IO context.
1966 	If \a kernel is \c true, the kernel IO context will be used.
1967 	The caller obtains a reference to the returned node.
1968 */
1969 struct vnode*
1970 get_root_vnode(bool kernel)
1971 {
1972 	if (!kernel) {
1973 		// Get current working directory from io context
1974 		struct io_context* context = get_current_io_context(kernel);
1975 
1976 		mutex_lock(&sIOContextRootLock);
1977 
1978 		struct vnode* root = context->root;
1979 		if (root != NULL)
1980 			inc_vnode_ref_count(root);
1981 
1982 		mutex_unlock(&sIOContextRootLock);
1983 
1984 		if (root != NULL)
1985 			return root;
1986 
1987 		// That should never happen.
1988 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1989 			"have a root\n", team_get_current_team_id());
1990 	}
1991 
1992 	inc_vnode_ref_count(sRoot);
1993 	return sRoot;
1994 }
1995 
1996 
1997 /*!	\brief Gets the directory path and leaf name for a given path.
1998 
1999 	The supplied \a path is transformed to refer to the directory part of
2000 	the entry identified by the original path, and into the buffer \a filename
2001 	the leaf name of the original entry is written.
2002 	Neither the returned path nor the leaf name can be expected to be
2003 	canonical.
2004 
2005 	\param path The path to be analyzed. Must be able to store at least one
2006 		   additional character.
2007 	\param filename The buffer into which the leaf name will be written.
2008 		   Must be of size B_FILE_NAME_LENGTH at least.
2009 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2010 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2011 		   if the given path name is empty.
2012 */
2013 static status_t
2014 get_dir_path_and_leaf(char* path, char* filename)
2015 {
2016 	if (*path == '\0')
2017 		return B_ENTRY_NOT_FOUND;
2018 
2019 	char* last = strrchr(path, '/');
2020 		// '/' are not allowed in file names!
2021 
2022 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2023 
2024 	if (last == NULL) {
2025 		// this path is single segment with no '/' in it
2026 		// ex. "foo"
2027 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2028 			return B_NAME_TOO_LONG;
2029 
2030 		strcpy(path, ".");
2031 	} else {
2032 		last++;
2033 		if (last[0] == '\0') {
2034 			// special case: the path ends in one or more '/' - remove them
2035 			while (*--last == '/' && last != path);
2036 			last[1] = '\0';
2037 
2038 			if (last == path && last[0] == '/') {
2039 				// This path points to the root of the file system
2040 				strcpy(filename, ".");
2041 				return B_OK;
2042 			}
2043 			for (; last != path && *(last - 1) != '/'; last--);
2044 				// rewind to the start of the leaf before the '/'
2045 		}
2046 
2047 		// normal leaf: replace the leaf portion of the path with a '.'
2048 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2049 			return B_NAME_TOO_LONG;
2050 
2051 		last[0] = '.';
2052 		last[1] = '\0';
2053 	}
2054 	return B_OK;
2055 }
2056 
2057 
2058 static status_t
2059 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2060 	bool traverse, bool kernel, VnodePutter& _vnode)
2061 {
2062 	char clonedName[B_FILE_NAME_LENGTH + 1];
2063 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2064 		return B_NAME_TOO_LONG;
2065 
2066 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2067 	struct vnode* directory;
2068 
2069 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2070 	if (status < 0)
2071 		return status;
2072 
2073 	return vnode_path_to_vnode(directory, clonedName, traverse, kernel,
2074 		_vnode, NULL);
2075 }
2076 
2077 
2078 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2079 	and returns the respective vnode.
2080 	On success a reference to the vnode is acquired for the caller.
2081 */
2082 static status_t
2083 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2084 {
2085 	ino_t id;
2086 	bool missing;
2087 
2088 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2089 		return missing ? B_ENTRY_NOT_FOUND
2090 			: get_vnode(dir->device, id, _vnode, true, false);
2091 	}
2092 
2093 	status_t status = FS_CALL(dir, lookup, name, &id);
2094 	if (status != B_OK)
2095 		return status;
2096 
2097 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2098 	// have a reference and just need to look the node up.
2099 	rw_lock_read_lock(&sVnodeLock);
2100 	*_vnode = lookup_vnode(dir->device, id);
2101 	rw_lock_read_unlock(&sVnodeLock);
2102 
2103 	if (*_vnode == NULL) {
2104 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2105 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2106 		return B_ENTRY_NOT_FOUND;
2107 	}
2108 
2109 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2110 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2111 //		(*_vnode)->mount->id, (*_vnode)->id);
2112 
2113 	return B_OK;
2114 }
2115 
2116 
2117 /*!	Returns the vnode for the relative \a path starting at the specified \a vnode.
2118 
2119 	\param[in,out] path The relative path being searched. Must not be NULL.
2120 	If the function returns successfully, \a path contains the name of the last path
2121 	component. This function clobbers the buffer pointed to by \a path only
2122 	if it does contain more than one component.
2123 
2124 	If the function fails and leafName is not NULL, \a _vnode contains the last directory,
2125 	the caller has the responsibility to call put_vnode() on it.
2126 
2127 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2128 	it is successful or not!
2129 
2130 	\param[out] _vnode If the function returns B_OK, points to the found node.
2131 	\param[out] _vnode If the function returns something else and leafname is not NULL: set to the
2132 		last existing directory in the path. The caller has responsibility to release it using
2133 		put_vnode().
2134 	\param[out] _vnode If the function returns something else and leafname is NULL: not used.
2135 */
2136 static status_t
2137 vnode_path_to_vnode(struct vnode* start, char* path, bool traverseLeafLink,
2138 	int count, struct io_context* ioContext, VnodePutter& _vnode,
2139 	ino_t* _parentID, char* leafName)
2140 {
2141 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2142 	ASSERT(!_vnode.IsSet());
2143 
2144 	VnodePutter vnode(start);
2145 
2146 	if (path == NULL)
2147 		return B_BAD_VALUE;
2148 	if (*path == '\0')
2149 		return B_ENTRY_NOT_FOUND;
2150 
2151 	status_t status = B_OK;
2152 	ino_t lastParentID = vnode->id;
2153 	while (true) {
2154 		char* nextPath;
2155 
2156 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2157 			path));
2158 
2159 		// done?
2160 		if (path[0] == '\0')
2161 			break;
2162 
2163 		// walk to find the next path component ("path" will point to a single
2164 		// path component), and filter out multiple slashes
2165 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2166 				nextPath++);
2167 
2168 		bool directoryFound = false;
2169 		if (*nextPath == '/') {
2170 			directoryFound = true;
2171 			*nextPath = '\0';
2172 			do
2173 				nextPath++;
2174 			while (*nextPath == '/');
2175 		}
2176 
2177 		// See if the '..' is at a covering vnode move to the covered
2178 		// vnode so we pass the '..' path to the underlying filesystem.
2179 		// Also prevent breaking the root of the IO context.
2180 		if (strcmp("..", path) == 0) {
2181 			if (vnode.Get() == ioContext->root) {
2182 				// Attempted prison break! Keep it contained.
2183 				path = nextPath;
2184 				continue;
2185 			}
2186 
2187 			if (Vnode* coveredVnode = get_covered_vnode(vnode.Get()))
2188 				vnode.SetTo(coveredVnode);
2189 		}
2190 
2191 		// check if vnode is really a directory
2192 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2193 			status = B_NOT_A_DIRECTORY;
2194 
2195 		// Check if we have the right to search the current directory vnode.
2196 		// If a file system doesn't have the access() function, we assume that
2197 		// searching a directory is always allowed
2198 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2199 			status = FS_CALL(vnode.Get(), access, X_OK);
2200 
2201 		// Tell the filesystem to get the vnode of this path component (if we
2202 		// got the permission from the call above)
2203 		VnodePutter nextVnode;
2204 		if (status == B_OK) {
2205 			struct vnode* temp = NULL;
2206 			status = lookup_dir_entry(vnode.Get(), path, &temp);
2207 			nextVnode.SetTo(temp);
2208 		}
2209 
2210 		if (status != B_OK) {
2211 			if (leafName != NULL) {
2212 				strlcpy(leafName, path, B_FILE_NAME_LENGTH);
2213 				_vnode.SetTo(vnode.Detach());
2214 			}
2215 			return status;
2216 		}
2217 
2218 		// If the new node is a symbolic link, resolve it (if we've been told
2219 		// to do it)
2220 		if (S_ISLNK(nextVnode->Type())
2221 			&& (traverseLeafLink || directoryFound)) {
2222 			size_t bufferSize;
2223 			char* buffer;
2224 
2225 			TRACE(("traverse link\n"));
2226 
2227 			if (count + 1 > B_MAX_SYMLINKS)
2228 				return B_LINK_LIMIT;
2229 
2230 			bufferSize = B_PATH_NAME_LENGTH;
2231 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2232 			if (buffer == NULL)
2233 				return B_NO_MEMORY;
2234 
2235 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2236 				bufferSize--;
2237 				status = FS_CALL(nextVnode.Get(), read_symlink, buffer, &bufferSize);
2238 				// null-terminate
2239 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2240 					buffer[bufferSize] = '\0';
2241 			} else
2242 				status = B_BAD_VALUE;
2243 
2244 			if (status != B_OK) {
2245 				free(buffer);
2246 				return status;
2247 			}
2248 			nextVnode.Unset();
2249 
2250 			// Check if we start from the root directory or the current
2251 			// directory ("vnode" still points to that one).
2252 			// Cut off all leading slashes if it's the root directory
2253 			path = buffer;
2254 			bool absoluteSymlink = false;
2255 			if (path[0] == '/') {
2256 				// we don't need the old directory anymore
2257 				vnode.Unset();
2258 
2259 				while (*++path == '/')
2260 					;
2261 
2262 				mutex_lock(&sIOContextRootLock);
2263 				vnode.SetTo(ioContext->root);
2264 				inc_vnode_ref_count(vnode.Get());
2265 				mutex_unlock(&sIOContextRootLock);
2266 
2267 				absoluteSymlink = true;
2268 			}
2269 
2270 			inc_vnode_ref_count(vnode.Get());
2271 				// balance the next recursion - we will decrement the
2272 				// ref_count of the vnode, no matter if we succeeded or not
2273 
2274 			if (absoluteSymlink && *path == '\0') {
2275 				// symlink was just "/"
2276 				nextVnode.SetTo(vnode.Get());
2277 			} else {
2278 				status = vnode_path_to_vnode(vnode.Get(), path, true, count + 1,
2279 					ioContext, nextVnode, &lastParentID, leafName);
2280 			}
2281 
2282 			object_cache_free(sPathNameCache, buffer, 0);
2283 
2284 			if (status != B_OK) {
2285 				if (leafName != NULL)
2286 					_vnode.SetTo(nextVnode.Detach());
2287 				return status;
2288 			}
2289 		} else
2290 			lastParentID = vnode->id;
2291 
2292 		// decrease the ref count on the old dir we just looked up into
2293 		vnode.Unset();
2294 
2295 		path = nextPath;
2296 		vnode.SetTo(nextVnode.Detach());
2297 
2298 		// see if we hit a covered node
2299 		if (Vnode* coveringNode = get_covering_vnode(vnode.Get()))
2300 			vnode.SetTo(coveringNode);
2301 	}
2302 
2303 	_vnode.SetTo(vnode.Detach());
2304 	if (_parentID)
2305 		*_parentID = lastParentID;
2306 
2307 	return B_OK;
2308 }
2309 
2310 
2311 static status_t
2312 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2313 	bool kernel, VnodePutter& _vnode, ino_t* _parentID, char* leafName)
2314 {
2315 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2316 		get_current_io_context(kernel), _vnode, _parentID, leafName);
2317 }
2318 
2319 
2320 static status_t
2321 path_to_vnode(char* path, bool traverseLink, VnodePutter& _vnode,
2322 	ino_t* _parentID, bool kernel)
2323 {
2324 	struct vnode* start = NULL;
2325 
2326 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2327 
2328 	if (!path)
2329 		return B_BAD_VALUE;
2330 
2331 	if (*path == '\0')
2332 		return B_ENTRY_NOT_FOUND;
2333 
2334 	// figure out if we need to start at root or at cwd
2335 	if (*path == '/') {
2336 		if (sRoot == NULL) {
2337 			// we're a bit early, aren't we?
2338 			return B_ERROR;
2339 		}
2340 
2341 		while (*++path == '/')
2342 			;
2343 		start = get_root_vnode(kernel);
2344 
2345 		if (*path == '\0') {
2346 			_vnode.SetTo(start);
2347 			return B_OK;
2348 		}
2349 
2350 	} else {
2351 		struct io_context* context = get_current_io_context(kernel);
2352 
2353 		mutex_lock(&context->io_mutex);
2354 		start = context->cwd;
2355 		if (start != NULL)
2356 			inc_vnode_ref_count(start);
2357 		mutex_unlock(&context->io_mutex);
2358 
2359 		if (start == NULL)
2360 			return B_ERROR;
2361 	}
2362 
2363 	return vnode_path_to_vnode(start, path, traverseLink, kernel, _vnode,
2364 		_parentID);
2365 }
2366 
2367 
2368 /*! Returns the vnode in the next to last segment of the path, and returns
2369 	the last portion in filename.
2370 	The path buffer must be able to store at least one additional character.
2371 */
2372 static status_t
2373 path_to_dir_vnode(char* path, VnodePutter& _vnode, char* filename,
2374 	bool kernel)
2375 {
2376 	status_t status = get_dir_path_and_leaf(path, filename);
2377 	if (status != B_OK)
2378 		return status;
2379 
2380 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2381 }
2382 
2383 
2384 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2385 		   to by a FD + path pair.
2386 
2387 	\a path must be given in either case. \a fd might be omitted, in which
2388 	case \a path is either an absolute path or one relative to the current
2389 	directory. If both a supplied and \a path is relative it is reckoned off
2390 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2391 	ignored.
2392 
2393 	The caller has the responsibility to call put_vnode() on the returned
2394 	directory vnode.
2395 
2396 	\param fd The FD. May be < 0.
2397 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2398 	       is modified by this function. It must have at least room for a
2399 	       string one character longer than the path it contains.
2400 	\param _vnode A pointer to a variable the directory vnode shall be written
2401 		   into.
2402 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2403 		   the leaf name of the specified entry will be written.
2404 	\param kernel \c true, if invoked from inside the kernel, \c false if
2405 		   invoked from userland.
2406 	\return \c B_OK, if everything went fine, another error code otherwise.
2407 */
2408 static status_t
2409 fd_and_path_to_dir_vnode(int fd, char* path, VnodePutter& _vnode,
2410 	char* filename, bool kernel)
2411 {
2412 	if (!path)
2413 		return B_BAD_VALUE;
2414 	if (*path == '\0')
2415 		return B_ENTRY_NOT_FOUND;
2416 	if (fd < 0)
2417 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2418 
2419 	status_t status = get_dir_path_and_leaf(path, filename);
2420 	if (status != B_OK)
2421 		return status;
2422 
2423 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2424 }
2425 
2426 
2427 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2428 		   to by a vnode + path pair.
2429 
2430 	\a path must be given in either case. \a vnode might be omitted, in which
2431 	case \a path is either an absolute path or one relative to the current
2432 	directory. If both a supplied and \a path is relative it is reckoned off
2433 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2434 	ignored.
2435 
2436 	The caller has the responsibility to call put_vnode() on the returned
2437 	directory vnode.
2438 
2439 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2440 	it is successful or not.
2441 
2442 	\param vnode The vnode. May be \c NULL.
2443 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2444 	       is modified by this function. It must have at least room for a
2445 	       string one character longer than the path it contains.
2446 	\param _vnode A pointer to a variable the directory vnode shall be written
2447 		   into.
2448 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2449 		   the leaf name of the specified entry will be written.
2450 	\param kernel \c true, if invoked from inside the kernel, \c false if
2451 		   invoked from userland.
2452 	\return \c B_OK, if everything went fine, another error code otherwise.
2453 */
2454 static status_t
2455 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2456 	VnodePutter& _vnode, char* filename, bool kernel)
2457 {
2458 	VnodePutter vnodePutter(vnode);
2459 
2460 	if (!path)
2461 		return B_BAD_VALUE;
2462 	if (*path == '\0')
2463 		return B_ENTRY_NOT_FOUND;
2464 	if (vnode == NULL || path[0] == '/')
2465 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2466 
2467 	status_t status = get_dir_path_and_leaf(path, filename);
2468 	if (status != B_OK)
2469 		return status;
2470 
2471 	vnodePutter.Detach();
2472 	return vnode_path_to_vnode(vnode, path, true, kernel, _vnode, NULL);
2473 }
2474 
2475 
2476 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2477 */
2478 static status_t
2479 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2480 	size_t bufferSize, struct io_context* ioContext)
2481 {
2482 	if (bufferSize < sizeof(struct dirent))
2483 		return B_BAD_VALUE;
2484 
2485 	// See if the vnode is covering another vnode and move to the covered
2486 	// vnode so we get the underlying file system
2487 	VnodePutter vnodePutter;
2488 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2489 		vnode = coveredVnode;
2490 		vnodePutter.SetTo(vnode);
2491 	}
2492 
2493 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2494 		// The FS supports getting the name of a vnode.
2495 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2496 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2497 			return B_OK;
2498 	}
2499 
2500 	// The FS doesn't support getting the name of a vnode. So we search the
2501 	// parent directory for the vnode, if the caller let us.
2502 
2503 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2504 		return B_UNSUPPORTED;
2505 
2506 	void* cookie;
2507 
2508 	status_t status = FS_CALL(parent, open_dir, &cookie);
2509 	if (status >= B_OK) {
2510 		while (true) {
2511 			uint32 num = 1;
2512 			// We use the FS hook directly instead of dir_read(), since we don't
2513 			// want the entries to be fixed. We have already resolved vnode to
2514 			// the covered node.
2515 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2516 				&num);
2517 			if (status != B_OK)
2518 				break;
2519 			if (num == 0) {
2520 				status = B_ENTRY_NOT_FOUND;
2521 				break;
2522 			}
2523 
2524 			if (vnode->id == buffer->d_ino) {
2525 				// found correct entry!
2526 				break;
2527 			}
2528 		}
2529 
2530 		FS_CALL(parent, close_dir, cookie);
2531 		FS_CALL(parent, free_dir_cookie, cookie);
2532 	}
2533 	return status;
2534 }
2535 
2536 
2537 static status_t
2538 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2539 	size_t nameSize, bool kernel)
2540 {
2541 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2542 	struct dirent* dirent = (struct dirent*)buffer;
2543 
2544 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2545 		get_current_io_context(kernel));
2546 	if (status != B_OK)
2547 		return status;
2548 
2549 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2550 		return B_BUFFER_OVERFLOW;
2551 
2552 	return B_OK;
2553 }
2554 
2555 
2556 /*!	Gets the full path to a given directory vnode.
2557 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2558 	file system doesn't support this call, it will fall back to iterating
2559 	through the parent directory to get the name of the child.
2560 
2561 	To protect against circular loops, it supports a maximum tree depth
2562 	of 256 levels.
2563 
2564 	Note that the path may not be correct the time this function returns!
2565 	It doesn't use any locking to prevent returning the correct path, as
2566 	paths aren't safe anyway: the path to a file can change at any time.
2567 
2568 	It might be a good idea, though, to check if the returned path exists
2569 	in the calling function (it's not done here because of efficiency)
2570 */
2571 static status_t
2572 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2573 	bool kernel)
2574 {
2575 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2576 
2577 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2578 		return B_BAD_VALUE;
2579 
2580 	if (!S_ISDIR(vnode->Type()))
2581 		return B_NOT_A_DIRECTORY;
2582 
2583 	char* path = buffer;
2584 	int32 insert = bufferSize;
2585 	int32 maxLevel = 256;
2586 	int32 length;
2587 	status_t status = B_OK;
2588 	struct io_context* ioContext = get_current_io_context(kernel);
2589 
2590 	// we don't use get_vnode() here because this call is more
2591 	// efficient and does all we need from get_vnode()
2592 	inc_vnode_ref_count(vnode);
2593 
2594 	path[--insert] = '\0';
2595 		// the path is filled right to left
2596 
2597 	while (true) {
2598 		// If the node is the context's root, bail out. Otherwise resolve mount
2599 		// points.
2600 		if (vnode == ioContext->root)
2601 			break;
2602 
2603 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2604 			put_vnode(vnode);
2605 			vnode = coveredVnode;
2606 		}
2607 
2608 		// lookup the parent vnode
2609 		struct vnode* parentVnode;
2610 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2611 		if (status != B_OK)
2612 			goto out;
2613 
2614 		if (parentVnode == vnode) {
2615 			// The caller apparently got their hands on a node outside of their
2616 			// context's root. Now we've hit the global root.
2617 			put_vnode(parentVnode);
2618 			break;
2619 		}
2620 
2621 		// get the node's name
2622 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2623 			// also used for fs_read_dir()
2624 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2625 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2626 			sizeof(nameBuffer), ioContext);
2627 
2628 		// release the current vnode, we only need its parent from now on
2629 		put_vnode(vnode);
2630 		vnode = parentVnode;
2631 
2632 		if (status != B_OK)
2633 			goto out;
2634 
2635 		// TODO: add an explicit check for loops in about 10 levels to do
2636 		// real loop detection
2637 
2638 		// don't go deeper as 'maxLevel' to prevent circular loops
2639 		if (maxLevel-- < 0) {
2640 			status = B_LINK_LIMIT;
2641 			goto out;
2642 		}
2643 
2644 		// add the name in front of the current path
2645 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2646 		length = strlen(name);
2647 		insert -= length;
2648 		if (insert <= 0) {
2649 			status = B_RESULT_NOT_REPRESENTABLE;
2650 			goto out;
2651 		}
2652 		memcpy(path + insert, name, length);
2653 		path[--insert] = '/';
2654 	}
2655 
2656 	// the root dir will result in an empty path: fix it
2657 	if (path[insert] == '\0')
2658 		path[--insert] = '/';
2659 
2660 	TRACE(("  path is: %s\n", path + insert));
2661 
2662 	// move the path to the start of the buffer
2663 	length = bufferSize - insert;
2664 	memmove(buffer, path + insert, length);
2665 
2666 out:
2667 	put_vnode(vnode);
2668 	return status;
2669 }
2670 
2671 
2672 /*!	Checks the length of every path component, and adds a '.'
2673 	if the path ends in a slash.
2674 	The given path buffer must be able to store at least one
2675 	additional character.
2676 */
2677 static status_t
2678 check_path(char* to)
2679 {
2680 	int32 length = 0;
2681 
2682 	// check length of every path component
2683 
2684 	while (*to) {
2685 		char* begin;
2686 		if (*to == '/')
2687 			to++, length++;
2688 
2689 		begin = to;
2690 		while (*to != '/' && *to)
2691 			to++, length++;
2692 
2693 		if (to - begin > B_FILE_NAME_LENGTH)
2694 			return B_NAME_TOO_LONG;
2695 	}
2696 
2697 	if (length == 0)
2698 		return B_ENTRY_NOT_FOUND;
2699 
2700 	// complete path if there is a slash at the end
2701 
2702 	if (*(to - 1) == '/') {
2703 		if (length > B_PATH_NAME_LENGTH - 2)
2704 			return B_NAME_TOO_LONG;
2705 
2706 		to[0] = '.';
2707 		to[1] = '\0';
2708 	}
2709 
2710 	return B_OK;
2711 }
2712 
2713 
2714 static struct file_descriptor*
2715 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2716 {
2717 	struct file_descriptor* descriptor
2718 		= get_fd(get_current_io_context(kernel), fd);
2719 	if (descriptor == NULL)
2720 		return NULL;
2721 
2722 	struct vnode* vnode = fd_vnode(descriptor);
2723 	if (vnode == NULL) {
2724 		put_fd(descriptor);
2725 		return NULL;
2726 	}
2727 
2728 	// ToDo: when we can close a file descriptor at any point, investigate
2729 	//	if this is still valid to do (accessing the vnode without ref_count
2730 	//	or locking)
2731 	*_vnode = vnode;
2732 	return descriptor;
2733 }
2734 
2735 
2736 static struct vnode*
2737 get_vnode_from_fd(int fd, bool kernel)
2738 {
2739 	struct file_descriptor* descriptor;
2740 	struct vnode* vnode;
2741 
2742 	descriptor = get_fd(get_current_io_context(kernel), fd);
2743 	if (descriptor == NULL)
2744 		return NULL;
2745 
2746 	vnode = fd_vnode(descriptor);
2747 	if (vnode != NULL)
2748 		inc_vnode_ref_count(vnode);
2749 
2750 	put_fd(descriptor);
2751 	return vnode;
2752 }
2753 
2754 
2755 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2756 	only the path will be considered. In this case, the \a path must not be
2757 	NULL.
2758 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2759 	and should be NULL for files.
2760 */
2761 static status_t
2762 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2763 	VnodePutter& _vnode, ino_t* _parentID, bool kernel)
2764 {
2765 	if (fd < 0 && !path)
2766 		return B_BAD_VALUE;
2767 
2768 	if (path != NULL && *path == '\0')
2769 		return B_ENTRY_NOT_FOUND;
2770 
2771 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2772 		// no FD or absolute path
2773 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2774 	}
2775 
2776 	// FD only, or FD + relative path
2777 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2778 	if (vnode == NULL)
2779 		return B_FILE_ERROR;
2780 
2781 	if (path != NULL) {
2782 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, kernel,
2783 			_vnode, _parentID);
2784 	}
2785 
2786 	// there is no relative path to take into account
2787 
2788 	_vnode.SetTo(vnode);
2789 	if (_parentID)
2790 		*_parentID = -1;
2791 
2792 	return B_OK;
2793 }
2794 
2795 
2796 struct vnode*
2797 fd_vnode(struct file_descriptor* descriptor)
2798 {
2799 	if (descriptor->ops == &sFileOps
2800 			|| descriptor->ops == &sDirectoryOps
2801 			|| descriptor->ops == &sAttributeOps
2802 			|| descriptor->ops == &sAttributeDirectoryOps)
2803 		return descriptor->u.vnode;
2804 
2805 	return NULL;
2806 }
2807 
2808 
2809 bool
2810 fd_is_file(struct file_descriptor* descriptor)
2811 {
2812 	return descriptor->ops == &sFileOps;
2813 }
2814 
2815 
2816 static int
2817 get_new_fd(struct fd_ops* ops, struct fs_mount* mount, struct vnode* vnode,
2818 	void* cookie, int openMode, bool kernel)
2819 {
2820 	struct file_descriptor* descriptor;
2821 	int fd;
2822 
2823 	// If the vnode is locked, we don't allow creating a new file/directory
2824 	// file_descriptor for it
2825 	if (vnode && vnode->mandatory_locked_by != NULL
2826 		&& (ops == &sFileOps || ops == &sDirectoryOps))
2827 		return B_BUSY;
2828 
2829 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2830 		return B_BAD_VALUE;
2831 
2832 	descriptor = alloc_fd();
2833 	if (!descriptor)
2834 		return B_NO_MEMORY;
2835 
2836 	if (vnode)
2837 		descriptor->u.vnode = vnode;
2838 	else
2839 		descriptor->u.mount = mount;
2840 	descriptor->cookie = cookie;
2841 
2842 	descriptor->ops = ops;
2843 	descriptor->open_mode = openMode;
2844 
2845 	if (descriptor->ops->fd_seek != NULL) {
2846 		// some kinds of files are not seekable
2847 		switch (vnode->Type() & S_IFMT) {
2848 			case S_IFIFO:
2849 			case S_IFSOCK:
2850 				ASSERT(descriptor->pos == -1);
2851 				break;
2852 
2853 			// The Open Group Base Specs don't mention any file types besides pipes,
2854 			// FIFOs, and sockets specially, so we allow seeking all others.
2855 			default:
2856 				descriptor->pos = 0;
2857 				break;
2858 		}
2859 	}
2860 
2861 	io_context* context = get_current_io_context(kernel);
2862 	fd = new_fd(context, descriptor);
2863 	if (fd < 0) {
2864 		descriptor->ops = NULL;
2865 		put_fd(descriptor);
2866 		return B_NO_MORE_FDS;
2867 	}
2868 
2869 	mutex_lock(&context->io_mutex);
2870 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2871 	mutex_unlock(&context->io_mutex);
2872 
2873 	return fd;
2874 }
2875 
2876 
2877 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2878 	vfs_normalize_path(). See there for more documentation.
2879 */
2880 static status_t
2881 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2882 {
2883 	VnodePutter dir;
2884 	status_t error;
2885 
2886 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2887 		// get dir vnode + leaf name
2888 		char leaf[B_FILE_NAME_LENGTH];
2889 		error = vnode_and_path_to_dir_vnode(dir.Detach(), path, dir, leaf, kernel);
2890 		if (error != B_OK)
2891 			return error;
2892 		strcpy(path, leaf);
2893 
2894 		// get file vnode, if we shall resolve links
2895 		bool fileExists = false;
2896 		VnodePutter fileVnode;
2897 		if (traverseLink) {
2898 			inc_vnode_ref_count(dir.Get());
2899 			if (vnode_path_to_vnode(dir.Get(), path, false, kernel, fileVnode,
2900 					NULL) == B_OK) {
2901 				fileExists = true;
2902 			}
2903 		}
2904 
2905 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2906 			// we're done -- construct the path
2907 			bool hasLeaf = true;
2908 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2909 				// special cases "." and ".." -- get the dir, forget the leaf
2910 				error = vnode_path_to_vnode(dir.Detach(), leaf, false, kernel,
2911 					dir, NULL);
2912 				if (error != B_OK)
2913 					return error;
2914 				hasLeaf = false;
2915 			}
2916 
2917 			// get the directory path
2918 			error = dir_vnode_to_path(dir.Get(), path, B_PATH_NAME_LENGTH, kernel);
2919 			if (error != B_OK)
2920 				return error;
2921 
2922 			// append the leaf name
2923 			if (hasLeaf) {
2924 				// insert a directory separator if this is not the file system
2925 				// root
2926 				if ((strcmp(path, "/") != 0
2927 					&& strlcat(path, "/", pathSize) >= pathSize)
2928 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2929 					return B_NAME_TOO_LONG;
2930 				}
2931 			}
2932 
2933 			return B_OK;
2934 		}
2935 
2936 		// read link
2937 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2938 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2939 			error = FS_CALL(fileVnode.Get(), read_symlink, path, &bufferSize);
2940 			if (error != B_OK)
2941 				return error;
2942 			if (bufferSize < B_PATH_NAME_LENGTH)
2943 				path[bufferSize] = '\0';
2944 		} else
2945 			return B_BAD_VALUE;
2946 	}
2947 
2948 	return B_LINK_LIMIT;
2949 }
2950 
2951 
2952 static status_t
2953 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2954 	struct io_context* ioContext)
2955 {
2956 	// Make sure the IO context root is not bypassed.
2957 	if (parent == ioContext->root) {
2958 		*_device = parent->device;
2959 		*_node = parent->id;
2960 		return B_OK;
2961 	}
2962 
2963 	inc_vnode_ref_count(parent);
2964 		// vnode_path_to_vnode() puts the node
2965 
2966 	// ".." is guaranteed not to be clobbered by this call
2967 	VnodePutter vnode;
2968 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false,
2969 		ioContext, vnode, NULL);
2970 	if (status == B_OK) {
2971 		*_device = vnode->device;
2972 		*_node = vnode->id;
2973 	}
2974 
2975 	return status;
2976 }
2977 
2978 
2979 #ifdef ADD_DEBUGGER_COMMANDS
2980 
2981 
2982 static void
2983 _dump_advisory_locking(advisory_locking* locking)
2984 {
2985 	if (locking == NULL)
2986 		return;
2987 
2988 	kprintf("   lock:        %" B_PRId32, locking->lock);
2989 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2990 
2991 	int32 index = 0;
2992 	LockList::Iterator iterator = locking->locks.GetIterator();
2993 	while (iterator.HasNext()) {
2994 		struct advisory_lock* lock = iterator.Next();
2995 
2996 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2997 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2998 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2999 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3000 	}
3001 }
3002 
3003 
3004 static void
3005 _dump_mount(struct fs_mount* mount)
3006 {
3007 	kprintf("MOUNT: %p\n", mount);
3008 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3009 	kprintf(" device_name:   %s\n", mount->device_name);
3010 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3011 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3012 	kprintf(" partition:     %p\n", mount->partition);
3013 	kprintf(" lock:          %p\n", &mount->lock);
3014 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3015 		mount->owns_file_device ? " owns_file_device" : "");
3016 
3017 	fs_volume* volume = mount->volume;
3018 	while (volume != NULL) {
3019 		kprintf(" volume %p:\n", volume);
3020 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3021 		kprintf("  private_volume:   %p\n", volume->private_volume);
3022 		kprintf("  ops:              %p\n", volume->ops);
3023 		kprintf("  file_system:      %p\n", volume->file_system);
3024 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3025 		volume = volume->super_volume;
3026 	}
3027 
3028 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3029 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3030 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3031 	set_debug_variable("_partition", (addr_t)mount->partition);
3032 }
3033 
3034 
3035 static bool
3036 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3037 	const char* name)
3038 {
3039 	bool insertSlash = buffer[bufferSize] != '\0';
3040 	size_t nameLength = strlen(name);
3041 
3042 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3043 		return false;
3044 
3045 	if (insertSlash)
3046 		buffer[--bufferSize] = '/';
3047 
3048 	bufferSize -= nameLength;
3049 	memcpy(buffer + bufferSize, name, nameLength);
3050 
3051 	return true;
3052 }
3053 
3054 
3055 static bool
3056 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3057 	ino_t nodeID)
3058 {
3059 	if (bufferSize == 0)
3060 		return false;
3061 
3062 	bool insertSlash = buffer[bufferSize] != '\0';
3063 	if (insertSlash)
3064 		buffer[--bufferSize] = '/';
3065 
3066 	size_t size = snprintf(buffer, bufferSize,
3067 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3068 	if (size > bufferSize) {
3069 		if (insertSlash)
3070 			bufferSize++;
3071 		return false;
3072 	}
3073 
3074 	if (size < bufferSize)
3075 		memmove(buffer + bufferSize - size, buffer, size);
3076 
3077 	bufferSize -= size;
3078 	return true;
3079 }
3080 
3081 
3082 static char*
3083 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3084 	bool& _truncated)
3085 {
3086 	// null-terminate the path
3087 	buffer[--bufferSize] = '\0';
3088 
3089 	while (true) {
3090 		while (vnode->covers != NULL)
3091 			vnode = vnode->covers;
3092 
3093 		if (vnode == sRoot) {
3094 			_truncated = bufferSize == 0;
3095 			if (!_truncated)
3096 				buffer[--bufferSize] = '/';
3097 			return buffer + bufferSize;
3098 		}
3099 
3100 		// resolve the name
3101 		ino_t dirID;
3102 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3103 			vnode->id, dirID);
3104 		if (name == NULL) {
3105 			// Failed to resolve the name -- prepend "<dev,node>/".
3106 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3107 				vnode->mount->id, vnode->id);
3108 			return buffer + bufferSize;
3109 		}
3110 
3111 		// prepend the name
3112 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3113 			_truncated = true;
3114 			return buffer + bufferSize;
3115 		}
3116 
3117 		// resolve the directory node
3118 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3119 		if (nextVnode == NULL) {
3120 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3121 				vnode->mount->id, dirID);
3122 			return buffer + bufferSize;
3123 		}
3124 
3125 		vnode = nextVnode;
3126 	}
3127 }
3128 
3129 
3130 static void
3131 _dump_vnode(struct vnode* vnode, bool printPath)
3132 {
3133 	kprintf("VNODE: %p\n", vnode);
3134 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3135 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3136 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3137 	kprintf(" private_node:  %p\n", vnode->private_node);
3138 	kprintf(" mount:         %p\n", vnode->mount);
3139 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3140 	kprintf(" covers:        %p\n", vnode->covers);
3141 	kprintf(" cache:         %p\n", vnode->cache);
3142 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3143 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3144 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3145 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3146 
3147 	_dump_advisory_locking(vnode->advisory_locking);
3148 
3149 	if (printPath) {
3150 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3151 		if (buffer != NULL) {
3152 			bool truncated;
3153 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3154 				B_PATH_NAME_LENGTH, truncated);
3155 			if (path != NULL) {
3156 				kprintf(" path:          ");
3157 				if (truncated)
3158 					kputs("<truncated>/");
3159 				kputs(path);
3160 				kputs("\n");
3161 			} else
3162 				kprintf("Failed to resolve vnode path.\n");
3163 
3164 			debug_free(buffer);
3165 		} else
3166 			kprintf("Failed to allocate memory for constructing the path.\n");
3167 	}
3168 
3169 	set_debug_variable("_node", (addr_t)vnode->private_node);
3170 	set_debug_variable("_mount", (addr_t)vnode->mount);
3171 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3172 	set_debug_variable("_covers", (addr_t)vnode->covers);
3173 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3174 }
3175 
3176 
3177 static int
3178 dump_mount(int argc, char** argv)
3179 {
3180 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3181 		kprintf("usage: %s [id|address]\n", argv[0]);
3182 		return 0;
3183 	}
3184 
3185 	ulong val = parse_expression(argv[1]);
3186 	uint32 id = val;
3187 
3188 	struct fs_mount* mount = sMountsTable->Lookup(id);
3189 	if (mount == NULL) {
3190 		if (IS_USER_ADDRESS(id)) {
3191 			kprintf("fs_mount not found\n");
3192 			return 0;
3193 		}
3194 		mount = (fs_mount*)val;
3195 	}
3196 
3197 	_dump_mount(mount);
3198 	return 0;
3199 }
3200 
3201 
3202 static int
3203 dump_mounts(int argc, char** argv)
3204 {
3205 	if (argc != 1) {
3206 		kprintf("usage: %s\n", argv[0]);
3207 		return 0;
3208 	}
3209 
3210 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3211 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3212 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3213 
3214 	struct fs_mount* mount;
3215 
3216 	MountTable::Iterator iterator(sMountsTable);
3217 	while (iterator.HasNext()) {
3218 		mount = iterator.Next();
3219 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3220 			mount->root_vnode->covers, mount->volume->private_volume,
3221 			mount->volume->file_system_name);
3222 
3223 		fs_volume* volume = mount->volume;
3224 		while (volume->super_volume != NULL) {
3225 			volume = volume->super_volume;
3226 			kprintf("                                     %p %s\n",
3227 				volume->private_volume, volume->file_system_name);
3228 		}
3229 	}
3230 
3231 	return 0;
3232 }
3233 
3234 
3235 static int
3236 dump_vnode(int argc, char** argv)
3237 {
3238 	bool printPath = false;
3239 	int argi = 1;
3240 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3241 		printPath = true;
3242 		argi++;
3243 	}
3244 
3245 	if (argi >= argc || argi + 2 < argc || strcmp(argv[argi], "--help") == 0) {
3246 		print_debugger_command_usage(argv[0]);
3247 		return 0;
3248 	}
3249 
3250 	struct vnode* vnode = NULL;
3251 
3252 	if (argi + 1 == argc) {
3253 		vnode = (struct vnode*)parse_expression(argv[argi]);
3254 		if (IS_USER_ADDRESS(vnode)) {
3255 			kprintf("invalid vnode address\n");
3256 			return 0;
3257 		}
3258 		_dump_vnode(vnode, printPath);
3259 		return 0;
3260 	}
3261 
3262 	dev_t device = parse_expression(argv[argi]);
3263 	ino_t id = parse_expression(argv[argi + 1]);
3264 
3265 	VnodeTable::Iterator iterator(sVnodeTable);
3266 	while (iterator.HasNext()) {
3267 		vnode = iterator.Next();
3268 		if (vnode->id != id || vnode->device != device)
3269 			continue;
3270 
3271 		_dump_vnode(vnode, printPath);
3272 	}
3273 
3274 	return 0;
3275 }
3276 
3277 
3278 static int
3279 dump_vnodes(int argc, char** argv)
3280 {
3281 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3282 		kprintf("usage: %s [device]\n", argv[0]);
3283 		return 0;
3284 	}
3285 
3286 	// restrict dumped nodes to a certain device if requested
3287 	dev_t device = parse_expression(argv[1]);
3288 
3289 	struct vnode* vnode;
3290 
3291 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3292 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3293 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3294 
3295 	VnodeTable::Iterator iterator(sVnodeTable);
3296 	while (iterator.HasNext()) {
3297 		vnode = iterator.Next();
3298 		if (vnode->device != device)
3299 			continue;
3300 
3301 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3302 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3303 			vnode->private_node, vnode->advisory_locking,
3304 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3305 			vnode->IsUnpublished() ? "u" : "-");
3306 	}
3307 
3308 	return 0;
3309 }
3310 
3311 
3312 static int
3313 dump_vnode_caches(int argc, char** argv)
3314 {
3315 	struct vnode* vnode;
3316 
3317 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3318 		kprintf("usage: %s [device]\n", argv[0]);
3319 		return 0;
3320 	}
3321 
3322 	// restrict dumped nodes to a certain device if requested
3323 	dev_t device = -1;
3324 	if (argc > 1)
3325 		device = parse_expression(argv[1]);
3326 
3327 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3328 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3329 
3330 	VnodeTable::Iterator iterator(sVnodeTable);
3331 	while (iterator.HasNext()) {
3332 		vnode = iterator.Next();
3333 		if (vnode->cache == NULL)
3334 			continue;
3335 		if (device != -1 && vnode->device != device)
3336 			continue;
3337 
3338 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3339 			vnode, vnode->device, vnode->id, vnode->cache,
3340 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3341 			vnode->cache->page_count);
3342 	}
3343 
3344 	return 0;
3345 }
3346 
3347 
3348 int
3349 dump_io_context(int argc, char** argv)
3350 {
3351 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3352 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3353 		return 0;
3354 	}
3355 
3356 	struct io_context* context = NULL;
3357 
3358 	if (argc > 1) {
3359 		ulong num = parse_expression(argv[1]);
3360 		if (IS_KERNEL_ADDRESS(num))
3361 			context = (struct io_context*)num;
3362 		else {
3363 			Team* team = team_get_team_struct_locked(num);
3364 			if (team == NULL) {
3365 				kprintf("could not find team with ID %lu\n", num);
3366 				return 0;
3367 			}
3368 			context = (struct io_context*)team->io_context;
3369 		}
3370 	} else
3371 		context = get_current_io_context(true);
3372 
3373 	kprintf("I/O CONTEXT: %p\n", context);
3374 	kprintf(" root vnode:\t%p\n", context->root);
3375 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3376 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3377 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3378 
3379 	if (context->num_used_fds) {
3380 		kprintf("   no.    %*s  ref  open  mode         pos    %*s\n",
3381 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3382 	}
3383 
3384 	for (uint32 i = 0; i < context->table_size; i++) {
3385 		struct file_descriptor* fd = context->fds[i];
3386 		if (fd == NULL)
3387 			continue;
3388 
3389 		kprintf("  %3" B_PRIu32 ":  %p  %3" B_PRId32 "  %4"
3390 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3391 			fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3392 			fd->pos, fd->cookie,
3393 			(fd_vnode(fd) != NULL) ? "vnode" : "mount",
3394 			fd->u.vnode);
3395 	}
3396 
3397 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3398 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3399 
3400 	set_debug_variable("_cwd", (addr_t)context->cwd);
3401 
3402 	return 0;
3403 }
3404 
3405 
3406 int
3407 dump_vnode_usage(int argc, char** argv)
3408 {
3409 	if (argc != 1) {
3410 		kprintf("usage: %s\n", argv[0]);
3411 		return 0;
3412 	}
3413 
3414 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3415 		sUnusedVnodes, kMaxUnusedVnodes);
3416 
3417 	uint32 count = sVnodeTable->CountElements();
3418 
3419 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3420 		count - sUnusedVnodes);
3421 	return 0;
3422 }
3423 
3424 #endif	// ADD_DEBUGGER_COMMANDS
3425 
3426 
3427 /*!	Clears memory specified by an iovec array.
3428 */
3429 static void
3430 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3431 {
3432 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3433 		size_t length = std::min(vecs[i].iov_len, bytes);
3434 		memset(vecs[i].iov_base, 0, length);
3435 		bytes -= length;
3436 	}
3437 }
3438 
3439 
3440 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3441 	and calls the file system hooks to read/write the request to disk.
3442 */
3443 static status_t
3444 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3445 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3446 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3447 	bool doWrite)
3448 {
3449 	if (fileVecCount == 0) {
3450 		// There are no file vecs at this offset, so we're obviously trying
3451 		// to access the file outside of its bounds
3452 		return B_BAD_VALUE;
3453 	}
3454 
3455 	size_t numBytes = *_numBytes;
3456 	uint32 fileVecIndex;
3457 	size_t vecOffset = *_vecOffset;
3458 	uint32 vecIndex = *_vecIndex;
3459 	status_t status;
3460 	size_t size;
3461 
3462 	if (!doWrite && vecOffset == 0) {
3463 		// now directly read the data from the device
3464 		// the first file_io_vec can be read directly
3465 		// TODO: we could also write directly
3466 
3467 		if (fileVecs[0].length < (off_t)numBytes)
3468 			size = fileVecs[0].length;
3469 		else
3470 			size = numBytes;
3471 
3472 		if (fileVecs[0].offset >= 0) {
3473 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3474 				&vecs[vecIndex], vecCount - vecIndex, &size);
3475 		} else {
3476 			// sparse read
3477 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3478 			status = B_OK;
3479 		}
3480 		if (status != B_OK)
3481 			return status;
3482 
3483 		ASSERT((off_t)size <= fileVecs[0].length);
3484 
3485 		// If the file portion was contiguous, we're already done now
3486 		if (size == numBytes)
3487 			return B_OK;
3488 
3489 		// if we reached the end of the file, we can return as well
3490 		if ((off_t)size != fileVecs[0].length) {
3491 			*_numBytes = size;
3492 			return B_OK;
3493 		}
3494 
3495 		fileVecIndex = 1;
3496 
3497 		// first, find out where we have to continue in our iovecs
3498 		for (; vecIndex < vecCount; vecIndex++) {
3499 			if (size < vecs[vecIndex].iov_len)
3500 				break;
3501 
3502 			size -= vecs[vecIndex].iov_len;
3503 		}
3504 
3505 		vecOffset = size;
3506 	} else {
3507 		fileVecIndex = 0;
3508 		size = 0;
3509 	}
3510 
3511 	// Too bad, let's process the rest of the file_io_vecs
3512 
3513 	size_t totalSize = size;
3514 	size_t bytesLeft = numBytes - size;
3515 
3516 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3517 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3518 		off_t fileOffset = fileVec.offset;
3519 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3520 
3521 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3522 			fileLeft));
3523 
3524 		// process the complete fileVec
3525 		while (fileLeft > 0) {
3526 			iovec tempVecs[MAX_TEMP_IO_VECS];
3527 			uint32 tempCount = 0;
3528 
3529 			// size tracks how much of what is left of the current fileVec
3530 			// (fileLeft) has been assigned to tempVecs
3531 			size = 0;
3532 
3533 			// assign what is left of the current fileVec to the tempVecs
3534 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3535 					&& tempCount < MAX_TEMP_IO_VECS;) {
3536 				// try to satisfy one iovec per iteration (or as much as
3537 				// possible)
3538 
3539 				// bytes left of the current iovec
3540 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3541 				if (vecLeft == 0) {
3542 					vecOffset = 0;
3543 					vecIndex++;
3544 					continue;
3545 				}
3546 
3547 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3548 					vecIndex, vecOffset, size));
3549 
3550 				// actually available bytes
3551 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3552 
3553 				tempVecs[tempCount].iov_base
3554 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3555 				tempVecs[tempCount].iov_len = tempVecSize;
3556 				tempCount++;
3557 
3558 				size += tempVecSize;
3559 				vecOffset += tempVecSize;
3560 			}
3561 
3562 			size_t bytes = size;
3563 
3564 			if (fileOffset == -1) {
3565 				if (doWrite) {
3566 					panic("sparse write attempt: vnode %p", vnode);
3567 					status = B_IO_ERROR;
3568 				} else {
3569 					// sparse read
3570 					zero_iovecs(tempVecs, tempCount, bytes);
3571 					status = B_OK;
3572 				}
3573 			} else if (doWrite) {
3574 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3575 					tempVecs, tempCount, &bytes);
3576 			} else {
3577 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3578 					tempVecs, tempCount, &bytes);
3579 			}
3580 			if (status != B_OK)
3581 				return status;
3582 
3583 			totalSize += bytes;
3584 			bytesLeft -= size;
3585 			if (fileOffset >= 0)
3586 				fileOffset += size;
3587 			fileLeft -= size;
3588 			//dprintf("-> file left = %Lu\n", fileLeft);
3589 
3590 			if (size != bytes || vecIndex >= vecCount) {
3591 				// there are no more bytes or iovecs, let's bail out
3592 				*_numBytes = totalSize;
3593 				return B_OK;
3594 			}
3595 		}
3596 	}
3597 
3598 	*_vecIndex = vecIndex;
3599 	*_vecOffset = vecOffset;
3600 	*_numBytes = totalSize;
3601 	return B_OK;
3602 }
3603 
3604 
3605 static bool
3606 is_user_in_group(gid_t gid)
3607 {
3608 	if (gid == getegid())
3609 		return true;
3610 
3611 	gid_t groups[NGROUPS_MAX];
3612 	int groupCount = getgroups(NGROUPS_MAX, groups);
3613 	for (int i = 0; i < groupCount; i++) {
3614 		if (gid == groups[i])
3615 			return true;
3616 	}
3617 
3618 	return false;
3619 }
3620 
3621 
3622 static status_t
3623 free_io_context(io_context* context)
3624 {
3625 	uint32 i;
3626 
3627 	TIOC(FreeIOContext(context));
3628 
3629 	if (context->root)
3630 		put_vnode(context->root);
3631 
3632 	if (context->cwd)
3633 		put_vnode(context->cwd);
3634 
3635 	mutex_lock(&context->io_mutex);
3636 
3637 	for (i = 0; i < context->table_size; i++) {
3638 		if (struct file_descriptor* descriptor = context->fds[i]) {
3639 			close_fd(context, descriptor);
3640 			put_fd(descriptor);
3641 		}
3642 	}
3643 
3644 	mutex_destroy(&context->io_mutex);
3645 
3646 	remove_node_monitors(context);
3647 	free(context->fds);
3648 	free(context);
3649 
3650 	return B_OK;
3651 }
3652 
3653 
3654 static status_t
3655 resize_monitor_table(struct io_context* context, const int newSize)
3656 {
3657 	int	status = B_OK;
3658 
3659 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3660 		return B_BAD_VALUE;
3661 
3662 	mutex_lock(&context->io_mutex);
3663 
3664 	if ((size_t)newSize < context->num_monitors) {
3665 		status = B_BUSY;
3666 		goto out;
3667 	}
3668 	context->max_monitors = newSize;
3669 
3670 out:
3671 	mutex_unlock(&context->io_mutex);
3672 	return status;
3673 }
3674 
3675 
3676 //	#pragma mark - public API for file systems
3677 
3678 
3679 extern "C" status_t
3680 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3681 	fs_vnode_ops* ops)
3682 {
3683 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3684 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3685 
3686 	if (privateNode == NULL)
3687 		return B_BAD_VALUE;
3688 
3689 	int32 tries = BUSY_VNODE_RETRIES;
3690 restart:
3691 	// create the node
3692 	bool nodeCreated;
3693 	struct vnode* vnode;
3694 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3695 		nodeCreated);
3696 	if (status != B_OK)
3697 		return status;
3698 
3699 	WriteLocker nodeLocker(sVnodeLock, true);
3700 		// create_new_vnode_and_lock() has locked for us
3701 
3702 	if (!nodeCreated && vnode->IsBusy()) {
3703 		nodeLocker.Unlock();
3704 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3705 			return B_BUSY;
3706 		goto restart;
3707 	}
3708 
3709 	// file system integrity check:
3710 	// test if the vnode already exists and bail out if this is the case!
3711 	if (!nodeCreated) {
3712 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3713 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3714 			vnode->private_node);
3715 		return B_ERROR;
3716 	}
3717 
3718 	vnode->private_node = privateNode;
3719 	vnode->ops = ops;
3720 	vnode->SetUnpublished(true);
3721 
3722 	TRACE(("returns: %s\n", strerror(status)));
3723 
3724 	return status;
3725 }
3726 
3727 
3728 extern "C" status_t
3729 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3730 	fs_vnode_ops* ops, int type, uint32 flags)
3731 {
3732 	FUNCTION(("publish_vnode()\n"));
3733 
3734 	int32 tries = BUSY_VNODE_RETRIES;
3735 restart:
3736 	WriteLocker locker(sVnodeLock);
3737 
3738 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3739 
3740 	bool nodeCreated = false;
3741 	if (vnode == NULL) {
3742 		if (privateNode == NULL)
3743 			return B_BAD_VALUE;
3744 
3745 		// create the node
3746 		locker.Unlock();
3747 			// create_new_vnode_and_lock() will re-lock for us on success
3748 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3749 			nodeCreated);
3750 		if (status != B_OK)
3751 			return status;
3752 
3753 		locker.SetTo(sVnodeLock, true);
3754 	}
3755 
3756 	if (nodeCreated) {
3757 		vnode->private_node = privateNode;
3758 		vnode->ops = ops;
3759 		vnode->SetUnpublished(true);
3760 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3761 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3762 		// already known, but not published
3763 	} else if (vnode->IsBusy()) {
3764 		locker.Unlock();
3765 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3766 			return B_BUSY;
3767 		goto restart;
3768 	} else
3769 		return B_BAD_VALUE;
3770 
3771 	bool publishSpecialSubNode = false;
3772 
3773 	vnode->SetType(type);
3774 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3775 	publishSpecialSubNode = is_special_node_type(type)
3776 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3777 
3778 	status_t status = B_OK;
3779 
3780 	// create sub vnodes, if necessary
3781 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3782 		locker.Unlock();
3783 
3784 		fs_volume* subVolume = volume;
3785 		if (volume->sub_volume != NULL) {
3786 			while (status == B_OK && subVolume->sub_volume != NULL) {
3787 				subVolume = subVolume->sub_volume;
3788 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3789 					vnode);
3790 			}
3791 		}
3792 
3793 		if (status == B_OK && publishSpecialSubNode)
3794 			status = create_special_sub_node(vnode, flags);
3795 
3796 		if (status != B_OK) {
3797 			// error -- clean up the created sub vnodes
3798 			while (subVolume->super_volume != volume) {
3799 				subVolume = subVolume->super_volume;
3800 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3801 			}
3802 		}
3803 
3804 		if (status == B_OK) {
3805 			ReadLocker vnodesReadLocker(sVnodeLock);
3806 			AutoLocker<Vnode> nodeLocker(vnode);
3807 			vnode->SetBusy(false);
3808 			vnode->SetUnpublished(false);
3809 		} else {
3810 			locker.Lock();
3811 			sVnodeTable->Remove(vnode);
3812 			remove_vnode_from_mount_list(vnode, vnode->mount);
3813 			object_cache_free(sVnodeCache, vnode, 0);
3814 		}
3815 	} else {
3816 		// we still hold the write lock -- mark the node unbusy and published
3817 		vnode->SetBusy(false);
3818 		vnode->SetUnpublished(false);
3819 	}
3820 
3821 	TRACE(("returns: %s\n", strerror(status)));
3822 
3823 	return status;
3824 }
3825 
3826 
3827 extern "C" status_t
3828 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3829 {
3830 	struct vnode* vnode;
3831 
3832 	if (volume == NULL)
3833 		return B_BAD_VALUE;
3834 
3835 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3836 	if (status != B_OK)
3837 		return status;
3838 
3839 	// If this is a layered FS, we need to get the node cookie for the requested
3840 	// layer.
3841 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3842 		fs_vnode resolvedNode;
3843 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3844 			&resolvedNode);
3845 		if (status != B_OK) {
3846 			panic("get_vnode(): Failed to get super node for vnode %p, "
3847 				"volume: %p", vnode, volume);
3848 			put_vnode(vnode);
3849 			return status;
3850 		}
3851 
3852 		if (_privateNode != NULL)
3853 			*_privateNode = resolvedNode.private_node;
3854 	} else if (_privateNode != NULL)
3855 		*_privateNode = vnode->private_node;
3856 
3857 	return B_OK;
3858 }
3859 
3860 
3861 extern "C" status_t
3862 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3863 {
3864 	ReadLocker nodeLocker(sVnodeLock);
3865 
3866 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3867 	if (vnode == NULL)
3868 		return B_BAD_VALUE;
3869 
3870 	inc_vnode_ref_count(vnode);
3871 	return B_OK;
3872 }
3873 
3874 
3875 extern "C" status_t
3876 put_vnode(fs_volume* volume, ino_t vnodeID)
3877 {
3878 	struct vnode* vnode;
3879 
3880 	rw_lock_read_lock(&sVnodeLock);
3881 	vnode = lookup_vnode(volume->id, vnodeID);
3882 	rw_lock_read_unlock(&sVnodeLock);
3883 
3884 	if (vnode == NULL)
3885 		return B_BAD_VALUE;
3886 
3887 	dec_vnode_ref_count(vnode, false, true);
3888 	return B_OK;
3889 }
3890 
3891 
3892 extern "C" status_t
3893 remove_vnode(fs_volume* volume, ino_t vnodeID)
3894 {
3895 	ReadLocker locker(sVnodeLock);
3896 
3897 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3898 	if (vnode == NULL)
3899 		return B_ENTRY_NOT_FOUND;
3900 
3901 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3902 		// this vnode is in use
3903 		return B_BUSY;
3904 	}
3905 
3906 	vnode->Lock();
3907 
3908 	vnode->SetRemoved(true);
3909 	bool removeUnpublished = false;
3910 
3911 	if (vnode->IsUnpublished()) {
3912 		// prepare the vnode for deletion
3913 		removeUnpublished = true;
3914 		vnode->SetBusy(true);
3915 	}
3916 
3917 	vnode->Unlock();
3918 	locker.Unlock();
3919 
3920 	if (removeUnpublished) {
3921 		// If the vnode hasn't been published yet, we delete it here
3922 		atomic_add(&vnode->ref_count, -1);
3923 		free_vnode(vnode, true);
3924 	}
3925 
3926 	return B_OK;
3927 }
3928 
3929 
3930 extern "C" status_t
3931 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3932 {
3933 	struct vnode* vnode;
3934 
3935 	rw_lock_read_lock(&sVnodeLock);
3936 
3937 	vnode = lookup_vnode(volume->id, vnodeID);
3938 	if (vnode) {
3939 		AutoLocker<Vnode> nodeLocker(vnode);
3940 		vnode->SetRemoved(false);
3941 	}
3942 
3943 	rw_lock_read_unlock(&sVnodeLock);
3944 	return B_OK;
3945 }
3946 
3947 
3948 extern "C" status_t
3949 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3950 {
3951 	ReadLocker _(sVnodeLock);
3952 
3953 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3954 		if (_removed != NULL)
3955 			*_removed = vnode->IsRemoved();
3956 		return B_OK;
3957 	}
3958 
3959 	return B_BAD_VALUE;
3960 }
3961 
3962 
3963 extern "C" fs_volume*
3964 volume_for_vnode(fs_vnode* _vnode)
3965 {
3966 	if (_vnode == NULL)
3967 		return NULL;
3968 
3969 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3970 	return vnode->mount->volume;
3971 }
3972 
3973 
3974 extern "C" status_t
3975 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3976 	uid_t nodeUserID)
3977 {
3978 	// get node permissions
3979 	int userPermissions = (mode & S_IRWXU) >> 6;
3980 	int groupPermissions = (mode & S_IRWXG) >> 3;
3981 	int otherPermissions = mode & S_IRWXO;
3982 
3983 	// get the node permissions for this uid/gid
3984 	int permissions = 0;
3985 	uid_t uid = geteuid();
3986 
3987 	if (uid == 0) {
3988 		// user is root
3989 		// root has always read/write permission, but at least one of the
3990 		// X bits must be set for execute permission
3991 		permissions = userPermissions | groupPermissions | otherPermissions
3992 			| S_IROTH | S_IWOTH;
3993 		if (S_ISDIR(mode))
3994 			permissions |= S_IXOTH;
3995 	} else if (uid == nodeUserID) {
3996 		// user is node owner
3997 		permissions = userPermissions;
3998 	} else if (is_user_in_group(nodeGroupID)) {
3999 		// user is in owning group
4000 		permissions = groupPermissions;
4001 	} else {
4002 		// user is one of the others
4003 		permissions = otherPermissions;
4004 	}
4005 
4006 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4007 }
4008 
4009 
4010 #if 0
4011 extern "C" status_t
4012 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4013 	size_t* _numBytes)
4014 {
4015 	struct file_descriptor* descriptor;
4016 	struct vnode* vnode;
4017 
4018 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4019 	if (descriptor == NULL)
4020 		return B_FILE_ERROR;
4021 
4022 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4023 		count, 0, _numBytes);
4024 
4025 	put_fd(descriptor);
4026 	return status;
4027 }
4028 
4029 
4030 extern "C" status_t
4031 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4032 	size_t* _numBytes)
4033 {
4034 	struct file_descriptor* descriptor;
4035 	struct vnode* vnode;
4036 
4037 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4038 	if (descriptor == NULL)
4039 		return B_FILE_ERROR;
4040 
4041 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4042 		count, 0, _numBytes);
4043 
4044 	put_fd(descriptor);
4045 	return status;
4046 }
4047 #endif
4048 
4049 
4050 extern "C" status_t
4051 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4052 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4053 	size_t* _bytes)
4054 {
4055 	struct vnode* vnode;
4056 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4057 	if (!descriptor.IsSet())
4058 		return B_FILE_ERROR;
4059 
4060 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4061 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4062 		false);
4063 
4064 	return status;
4065 }
4066 
4067 
4068 extern "C" status_t
4069 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4070 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4071 	size_t* _bytes)
4072 {
4073 	struct vnode* vnode;
4074 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4075 	if (!descriptor.IsSet())
4076 		return B_FILE_ERROR;
4077 
4078 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4079 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4080 		true);
4081 
4082 	return status;
4083 }
4084 
4085 
4086 extern "C" status_t
4087 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4088 {
4089 	// lookup mount -- the caller is required to make sure that the mount
4090 	// won't go away
4091 	ReadLocker locker(sMountLock);
4092 	struct fs_mount* mount = find_mount(mountID);
4093 	if (mount == NULL)
4094 		return B_BAD_VALUE;
4095 	locker.Unlock();
4096 
4097 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4098 }
4099 
4100 
4101 extern "C" status_t
4102 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4103 {
4104 	// lookup mount -- the caller is required to make sure that the mount
4105 	// won't go away
4106 	ReadLocker locker(sMountLock);
4107 	struct fs_mount* mount = find_mount(mountID);
4108 	if (mount == NULL)
4109 		return B_BAD_VALUE;
4110 	locker.Unlock();
4111 
4112 	return mount->entry_cache.Add(dirID, name, -1, true);
4113 }
4114 
4115 
4116 extern "C" status_t
4117 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4118 {
4119 	// lookup mount -- the caller is required to make sure that the mount
4120 	// won't go away
4121 	ReadLocker locker(sMountLock);
4122 	struct fs_mount* mount = find_mount(mountID);
4123 	if (mount == NULL)
4124 		return B_BAD_VALUE;
4125 	locker.Unlock();
4126 
4127 	return mount->entry_cache.Remove(dirID, name);
4128 }
4129 
4130 
4131 //	#pragma mark - private VFS API
4132 //	Functions the VFS exports for other parts of the kernel
4133 
4134 
4135 /*! Acquires another reference to the vnode that has to be released
4136 	by calling vfs_put_vnode().
4137 */
4138 void
4139 vfs_acquire_vnode(struct vnode* vnode)
4140 {
4141 	inc_vnode_ref_count(vnode);
4142 }
4143 
4144 
4145 /*! This is currently called from file_cache_create() only.
4146 	It's probably a temporary solution as long as devfs requires that
4147 	fs_read_pages()/fs_write_pages() are called with the standard
4148 	open cookie and not with a device cookie.
4149 	If that's done differently, remove this call; it has no other
4150 	purpose.
4151 */
4152 extern "C" status_t
4153 vfs_get_cookie_from_fd(int fd, void** _cookie)
4154 {
4155 	struct file_descriptor* descriptor;
4156 
4157 	descriptor = get_fd(get_current_io_context(true), fd);
4158 	if (descriptor == NULL)
4159 		return B_FILE_ERROR;
4160 
4161 	*_cookie = descriptor->cookie;
4162 	return B_OK;
4163 }
4164 
4165 
4166 extern "C" status_t
4167 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4168 {
4169 	*vnode = get_vnode_from_fd(fd, kernel);
4170 
4171 	if (*vnode == NULL)
4172 		return B_FILE_ERROR;
4173 
4174 	return B_NO_ERROR;
4175 }
4176 
4177 
4178 extern "C" status_t
4179 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4180 {
4181 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4182 		path, kernel));
4183 
4184 	KPath pathBuffer;
4185 	if (pathBuffer.InitCheck() != B_OK)
4186 		return B_NO_MEMORY;
4187 
4188 	char* buffer = pathBuffer.LockBuffer();
4189 	strlcpy(buffer, path, pathBuffer.BufferSize());
4190 
4191 	VnodePutter vnode;
4192 	status_t status = path_to_vnode(buffer, true, vnode, NULL, kernel);
4193 	if (status != B_OK)
4194 		return status;
4195 
4196 	*_vnode = vnode.Detach();
4197 	return B_OK;
4198 }
4199 
4200 
4201 extern "C" status_t
4202 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4203 {
4204 	struct vnode* vnode = NULL;
4205 
4206 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4207 	if (status != B_OK)
4208 		return status;
4209 
4210 	*_vnode = vnode;
4211 	return B_OK;
4212 }
4213 
4214 
4215 extern "C" status_t
4216 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4217 	const char* name, struct vnode** _vnode)
4218 {
4219 	VnodePutter vnode;
4220 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, false, true, vnode);
4221 	*_vnode = vnode.Detach();
4222 	return status;
4223 }
4224 
4225 
4226 extern "C" void
4227 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4228 {
4229 	*_mountID = vnode->device;
4230 	*_vnodeID = vnode->id;
4231 }
4232 
4233 
4234 /*!
4235 	Helper function abstracting the process of "converting" a given
4236 	vnode-pointer to a fs_vnode-pointer.
4237 	Currently only used in bindfs.
4238 */
4239 extern "C" fs_vnode*
4240 vfs_fsnode_for_vnode(struct vnode* vnode)
4241 {
4242 	return vnode;
4243 }
4244 
4245 
4246 /*!
4247 	Calls fs_open() on the given vnode and returns a new
4248 	file descriptor for it
4249 */
4250 int
4251 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4252 {
4253 	return open_vnode(vnode, openMode, kernel);
4254 }
4255 
4256 
4257 /*!	Looks up a vnode with the given mount and vnode ID.
4258 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4259 	to the node.
4260 	It's currently only be used by file_cache_create().
4261 */
4262 extern "C" status_t
4263 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4264 {
4265 	rw_lock_read_lock(&sVnodeLock);
4266 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4267 	rw_lock_read_unlock(&sVnodeLock);
4268 
4269 	if (vnode == NULL)
4270 		return B_ERROR;
4271 
4272 	*_vnode = vnode;
4273 	return B_OK;
4274 }
4275 
4276 
4277 extern "C" status_t
4278 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4279 	bool traverseLeafLink, bool kernel, void** _node)
4280 {
4281 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4282 		volume, path, kernel));
4283 
4284 	KPath pathBuffer;
4285 	if (pathBuffer.InitCheck() != B_OK)
4286 		return B_NO_MEMORY;
4287 
4288 	fs_mount* mount;
4289 	status_t status = get_mount(volume->id, &mount);
4290 	if (status != B_OK)
4291 		return status;
4292 
4293 	char* buffer = pathBuffer.LockBuffer();
4294 	strlcpy(buffer, path, pathBuffer.BufferSize());
4295 
4296 	VnodePutter vnode;
4297 
4298 	if (buffer[0] == '/')
4299 		status = path_to_vnode(buffer, traverseLeafLink, vnode, NULL, kernel);
4300 	else {
4301 		inc_vnode_ref_count(mount->root_vnode);
4302 			// vnode_path_to_vnode() releases a reference to the starting vnode
4303 		status = vnode_path_to_vnode(mount->root_vnode, buffer, traverseLeafLink,
4304 			kernel, vnode, NULL);
4305 	}
4306 
4307 	put_mount(mount);
4308 
4309 	if (status != B_OK)
4310 		return status;
4311 
4312 	if (vnode->device != volume->id) {
4313 		// wrong mount ID - must not gain access on foreign file system nodes
4314 		return B_BAD_VALUE;
4315 	}
4316 
4317 	// Use get_vnode() to resolve the cookie for the right layer.
4318 	status = get_vnode(volume, vnode->id, _node);
4319 
4320 	return status;
4321 }
4322 
4323 
4324 status_t
4325 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4326 	struct stat* stat, bool kernel)
4327 {
4328 	status_t status;
4329 
4330 	if (path != NULL) {
4331 		// path given: get the stat of the node referred to by (fd, path)
4332 		KPath pathBuffer(path);
4333 		if (pathBuffer.InitCheck() != B_OK)
4334 			return B_NO_MEMORY;
4335 
4336 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4337 			traverseLeafLink, stat, kernel);
4338 	} else {
4339 		// no path given: get the FD and use the FD operation
4340 		FileDescriptorPutter descriptor
4341 			(get_fd(get_current_io_context(kernel), fd));
4342 		if (!descriptor.IsSet())
4343 			return B_FILE_ERROR;
4344 
4345 		if (descriptor->ops->fd_read_stat)
4346 			status = descriptor->ops->fd_read_stat(descriptor.Get(), stat);
4347 		else
4348 			status = B_UNSUPPORTED;
4349 	}
4350 
4351 	return status;
4352 }
4353 
4354 
4355 /*!	Finds the full path to the file that contains the module \a moduleName,
4356 	puts it into \a pathBuffer, and returns B_OK for success.
4357 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4358 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4359 	\a pathBuffer is clobbered in any case and must not be relied on if this
4360 	functions returns unsuccessfully.
4361 	\a basePath and \a pathBuffer must not point to the same space.
4362 */
4363 status_t
4364 vfs_get_module_path(const char* basePath, const char* moduleName,
4365 	char* pathBuffer, size_t bufferSize)
4366 {
4367 	status_t status;
4368 	size_t length;
4369 	char* path;
4370 
4371 	if (bufferSize == 0
4372 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4373 		return B_BUFFER_OVERFLOW;
4374 
4375 	VnodePutter dir;
4376 	status = path_to_vnode(pathBuffer, true, dir, NULL, true);
4377 	if (status != B_OK)
4378 		return status;
4379 
4380 	// the path buffer had been clobbered by the above call
4381 	length = strlcpy(pathBuffer, basePath, bufferSize);
4382 	if (pathBuffer[length - 1] != '/')
4383 		pathBuffer[length++] = '/';
4384 
4385 	path = pathBuffer + length;
4386 	bufferSize -= length;
4387 
4388 	VnodePutter file;
4389 	while (moduleName) {
4390 		char* nextPath = strchr(moduleName, '/');
4391 		if (nextPath == NULL)
4392 			length = strlen(moduleName);
4393 		else {
4394 			length = nextPath - moduleName;
4395 			nextPath++;
4396 		}
4397 
4398 		if (length + 1 >= bufferSize)
4399 			return B_BUFFER_OVERFLOW;
4400 
4401 		memcpy(path, moduleName, length);
4402 		path[length] = '\0';
4403 		moduleName = nextPath;
4404 
4405 		// vnode_path_to_vnode() assumes ownership of the passed dir
4406 		status = vnode_path_to_vnode(dir.Detach(), path, true, true, file, NULL);
4407 		if (status != B_OK)
4408 			return status;
4409 
4410 		if (S_ISDIR(file->Type())) {
4411 			// goto the next directory
4412 			path[length] = '/';
4413 			path[length + 1] = '\0';
4414 			path += length + 1;
4415 			bufferSize -= length + 1;
4416 
4417 			dir.SetTo(file.Detach());
4418 		} else if (S_ISREG(file->Type())) {
4419 			// it's a file so it should be what we've searched for
4420 			return B_OK;
4421 		} else {
4422 			TRACE(("vfs_get_module_path(): something is strange here: "
4423 				"0x%08" B_PRIx32 "...\n", file->Type()));
4424 			return B_ERROR;
4425 		}
4426 	}
4427 
4428 	// if we got here, the moduleName just pointed to a directory, not to
4429 	// a real module - what should we do in this case?
4430 	return B_ENTRY_NOT_FOUND;
4431 }
4432 
4433 
4434 /*!	\brief Normalizes a given path.
4435 
4436 	The path must refer to an existing or non-existing entry in an existing
4437 	directory, that is chopping off the leaf component the remaining path must
4438 	refer to an existing directory.
4439 
4440 	The returned will be canonical in that it will be absolute, will not
4441 	contain any "." or ".." components or duplicate occurrences of '/'s,
4442 	and none of the directory components will by symbolic links.
4443 
4444 	Any two paths referring to the same entry, will result in the same
4445 	normalized path (well, that is pretty much the definition of `normalized',
4446 	isn't it :-).
4447 
4448 	\param path The path to be normalized.
4449 	\param buffer The buffer into which the normalized path will be written.
4450 		   May be the same one as \a path.
4451 	\param bufferSize The size of \a buffer.
4452 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4453 	\param kernel \c true, if the IO context of the kernel shall be used,
4454 		   otherwise that of the team this thread belongs to. Only relevant,
4455 		   if the path is relative (to get the CWD).
4456 	\return \c B_OK if everything went fine, another error code otherwise.
4457 */
4458 status_t
4459 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4460 	bool traverseLink, bool kernel)
4461 {
4462 	if (!path || !buffer || bufferSize < 1)
4463 		return B_BAD_VALUE;
4464 
4465 	if (path != buffer) {
4466 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4467 			return B_BUFFER_OVERFLOW;
4468 	}
4469 
4470 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4471 }
4472 
4473 
4474 /*!	\brief Gets the parent of the passed in node.
4475 
4476 	Gets the parent of the passed in node, and correctly resolves covered
4477 	nodes.
4478 */
4479 extern "C" status_t
4480 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4481 {
4482 	return resolve_covered_parent(parent, device, node,
4483 		get_current_io_context(true));
4484 }
4485 
4486 
4487 /*!	\brief Creates a special node in the file system.
4488 
4489 	The caller gets a reference to the newly created node (which is passed
4490 	back through \a _createdVnode) and is responsible for releasing it.
4491 
4492 	\param path The path where to create the entry for the node. Can be \c NULL,
4493 		in which case the node is created without an entry in the root FS -- it
4494 		will automatically be deleted when the last reference has been released.
4495 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4496 		the target file system will just create the node with its standard
4497 		operations. Depending on the type of the node a subnode might be created
4498 		automatically, though.
4499 	\param mode The type and permissions for the node to be created.
4500 	\param flags Flags to be passed to the creating FS.
4501 	\param kernel \c true, if called in the kernel context (relevant only if
4502 		\a path is not \c NULL and not absolute).
4503 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4504 		file system creating the node, with the private data pointer and
4505 		operations for the super node. Can be \c NULL.
4506 	\param _createVnode Pointer to pre-allocated storage where to store the
4507 		pointer to the newly created node.
4508 	\return \c B_OK, if everything went fine, another error code otherwise.
4509 */
4510 status_t
4511 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4512 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4513 	struct vnode** _createdVnode)
4514 {
4515 	VnodePutter dirNode;
4516 	char _leaf[B_FILE_NAME_LENGTH];
4517 	char* leaf = NULL;
4518 
4519 	if (path) {
4520 		// We've got a path. Get the dir vnode and the leaf name.
4521 		KPath tmpPathBuffer;
4522 		if (tmpPathBuffer.InitCheck() != B_OK)
4523 			return B_NO_MEMORY;
4524 
4525 		char* tmpPath = tmpPathBuffer.LockBuffer();
4526 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4527 			return B_NAME_TOO_LONG;
4528 
4529 		// get the dir vnode and the leaf name
4530 		leaf = _leaf;
4531 		status_t error = path_to_dir_vnode(tmpPath, dirNode, leaf, kernel);
4532 		if (error != B_OK)
4533 			return error;
4534 	} else {
4535 		// No path. Create the node in the root FS.
4536 		dirNode.SetTo(sRoot);
4537 		inc_vnode_ref_count(dirNode.Get());
4538 	}
4539 
4540 	// check support for creating special nodes
4541 	if (!HAS_FS_CALL(dirNode, create_special_node))
4542 		return B_UNSUPPORTED;
4543 
4544 	// create the node
4545 	fs_vnode superVnode;
4546 	ino_t nodeID;
4547 	status_t status = FS_CALL(dirNode.Get(), create_special_node, leaf, subVnode,
4548 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4549 	if (status != B_OK)
4550 		return status;
4551 
4552 	// lookup the node
4553 	rw_lock_read_lock(&sVnodeLock);
4554 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4555 	rw_lock_read_unlock(&sVnodeLock);
4556 
4557 	if (*_createdVnode == NULL) {
4558 		panic("vfs_create_special_node(): lookup of node failed");
4559 		return B_ERROR;
4560 	}
4561 
4562 	return B_OK;
4563 }
4564 
4565 
4566 extern "C" void
4567 vfs_put_vnode(struct vnode* vnode)
4568 {
4569 	put_vnode(vnode);
4570 }
4571 
4572 
4573 extern "C" status_t
4574 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4575 {
4576 	// Get current working directory from io context
4577 	struct io_context* context = get_current_io_context(false);
4578 	status_t status = B_OK;
4579 
4580 	mutex_lock(&context->io_mutex);
4581 
4582 	if (context->cwd != NULL) {
4583 		*_mountID = context->cwd->device;
4584 		*_vnodeID = context->cwd->id;
4585 	} else
4586 		status = B_ERROR;
4587 
4588 	mutex_unlock(&context->io_mutex);
4589 	return status;
4590 }
4591 
4592 
4593 status_t
4594 vfs_unmount(dev_t mountID, uint32 flags)
4595 {
4596 	return fs_unmount(NULL, mountID, flags, true);
4597 }
4598 
4599 
4600 extern "C" status_t
4601 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4602 {
4603 	struct vnode* vnode;
4604 
4605 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4606 	if (status != B_OK)
4607 		return status;
4608 
4609 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4610 	put_vnode(vnode);
4611 	return B_OK;
4612 }
4613 
4614 
4615 extern "C" void
4616 vfs_free_unused_vnodes(int32 level)
4617 {
4618 	vnode_low_resource_handler(NULL,
4619 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4620 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4621 		level);
4622 }
4623 
4624 
4625 extern "C" bool
4626 vfs_can_page(struct vnode* vnode, void* cookie)
4627 {
4628 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4629 
4630 	if (HAS_FS_CALL(vnode, can_page))
4631 		return FS_CALL(vnode, can_page, cookie);
4632 	return false;
4633 }
4634 
4635 
4636 extern "C" status_t
4637 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4638 	const generic_io_vec* vecs, size_t count, uint32 flags,
4639 	generic_size_t* _numBytes)
4640 {
4641 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4642 		vecs, pos));
4643 
4644 #if VFS_PAGES_IO_TRACING
4645 	generic_size_t bytesRequested = *_numBytes;
4646 #endif
4647 
4648 	IORequest request;
4649 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4650 	if (status == B_OK) {
4651 		status = vfs_vnode_io(vnode, cookie, &request);
4652 		if (status == B_OK)
4653 			status = request.Wait();
4654 		*_numBytes = request.TransferredBytes();
4655 	}
4656 
4657 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4658 		status, *_numBytes));
4659 
4660 	return status;
4661 }
4662 
4663 
4664 extern "C" status_t
4665 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4666 	const generic_io_vec* vecs, size_t count, uint32 flags,
4667 	generic_size_t* _numBytes)
4668 {
4669 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4670 		vecs, pos));
4671 
4672 #if VFS_PAGES_IO_TRACING
4673 	generic_size_t bytesRequested = *_numBytes;
4674 #endif
4675 
4676 	IORequest request;
4677 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4678 	if (status == B_OK) {
4679 		status = vfs_vnode_io(vnode, cookie, &request);
4680 		if (status == B_OK)
4681 			status = request.Wait();
4682 		*_numBytes = request.TransferredBytes();
4683 	}
4684 
4685 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4686 		status, *_numBytes));
4687 
4688 	return status;
4689 }
4690 
4691 
4692 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4693 	created if \a allocate is \c true.
4694 	In case it's successful, it will also grab a reference to the cache
4695 	it returns.
4696 */
4697 extern "C" status_t
4698 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4699 {
4700 	if (vnode->cache != NULL) {
4701 		vnode->cache->AcquireRef();
4702 		*_cache = vnode->cache;
4703 		return B_OK;
4704 	}
4705 
4706 	rw_lock_read_lock(&sVnodeLock);
4707 	vnode->Lock();
4708 
4709 	status_t status = B_OK;
4710 
4711 	// The cache could have been created in the meantime
4712 	if (vnode->cache == NULL) {
4713 		if (allocate) {
4714 			// TODO: actually the vnode needs to be busy already here, or
4715 			//	else this won't work...
4716 			bool wasBusy = vnode->IsBusy();
4717 			vnode->SetBusy(true);
4718 
4719 			vnode->Unlock();
4720 			rw_lock_read_unlock(&sVnodeLock);
4721 
4722 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4723 
4724 			rw_lock_read_lock(&sVnodeLock);
4725 			vnode->Lock();
4726 			vnode->SetBusy(wasBusy);
4727 		} else
4728 			status = B_BAD_VALUE;
4729 	}
4730 
4731 	vnode->Unlock();
4732 	rw_lock_read_unlock(&sVnodeLock);
4733 
4734 	if (status == B_OK) {
4735 		vnode->cache->AcquireRef();
4736 		*_cache = vnode->cache;
4737 	}
4738 
4739 	return status;
4740 }
4741 
4742 
4743 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4744 	their own.
4745 	In case it's successful, it will also grab a reference to the cache
4746 	it returns.
4747 */
4748 extern "C" status_t
4749 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4750 {
4751 	rw_lock_read_lock(&sVnodeLock);
4752 	vnode->Lock();
4753 
4754 	status_t status = B_OK;
4755 	if (vnode->cache != NULL) {
4756 		status = B_NOT_ALLOWED;
4757 	} else {
4758 		vnode->cache = _cache;
4759 		_cache->AcquireRef();
4760 	}
4761 
4762 	vnode->Unlock();
4763 	rw_lock_read_unlock(&sVnodeLock);
4764 	return status;
4765 }
4766 
4767 
4768 status_t
4769 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4770 	file_io_vec* vecs, size_t* _count)
4771 {
4772 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4773 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4774 
4775 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4776 }
4777 
4778 
4779 status_t
4780 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4781 {
4782 	status_t status = FS_CALL(vnode, read_stat, stat);
4783 
4784 	// fill in the st_dev and st_ino fields
4785 	if (status == B_OK) {
4786 		stat->st_dev = vnode->device;
4787 		stat->st_ino = vnode->id;
4788 		// the rdev field must stay unset for non-special files
4789 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4790 			stat->st_rdev = -1;
4791 	}
4792 
4793 	return status;
4794 }
4795 
4796 
4797 status_t
4798 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4799 {
4800 	struct vnode* vnode;
4801 	status_t status = get_vnode(device, inode, &vnode, true, false);
4802 	if (status != B_OK)
4803 		return status;
4804 
4805 	status = vfs_stat_vnode(vnode, stat);
4806 
4807 	put_vnode(vnode);
4808 	return status;
4809 }
4810 
4811 
4812 status_t
4813 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4814 {
4815 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4816 }
4817 
4818 
4819 status_t
4820 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4821 	bool kernel, char* path, size_t pathLength)
4822 {
4823 	VnodePutter vnode;
4824 	status_t status;
4825 
4826 	// filter invalid leaf names
4827 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4828 		return B_BAD_VALUE;
4829 
4830 	// get the vnode matching the dir's node_ref
4831 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4832 		// special cases "." and "..": we can directly get the vnode of the
4833 		// referenced directory
4834 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, vnode);
4835 		leaf = NULL;
4836 	} else {
4837 		struct vnode* temp = NULL;
4838 		status = get_vnode(device, inode, &temp, true, false);
4839 		vnode.SetTo(temp);
4840 	}
4841 	if (status != B_OK)
4842 		return status;
4843 
4844 	// get the directory path
4845 	status = dir_vnode_to_path(vnode.Get(), path, pathLength, kernel);
4846 	vnode.Unset();
4847 		// we don't need the vnode anymore
4848 	if (status != B_OK)
4849 		return status;
4850 
4851 	// append the leaf name
4852 	if (leaf) {
4853 		// insert a directory separator if this is not the file system root
4854 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4855 				>= pathLength)
4856 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4857 			return B_NAME_TOO_LONG;
4858 		}
4859 	}
4860 
4861 	return B_OK;
4862 }
4863 
4864 
4865 /*!	If the given descriptor locked its vnode, that lock will be released. */
4866 void
4867 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4868 {
4869 	struct vnode* vnode = fd_vnode(descriptor);
4870 
4871 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4872 		vnode->mandatory_locked_by = NULL;
4873 }
4874 
4875 
4876 /*!	Releases any POSIX locks on the file descriptor. */
4877 status_t
4878 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4879 {
4880 	struct vnode* vnode = descriptor->u.vnode;
4881 	if (vnode == NULL)
4882 		return B_OK;
4883 
4884 	if (HAS_FS_CALL(vnode, release_lock))
4885 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4886 
4887 	return release_advisory_lock(vnode, context, NULL, NULL);
4888 }
4889 
4890 
4891 /*!	Closes all file descriptors of the specified I/O context that
4892 	have the O_CLOEXEC flag set.
4893 */
4894 void
4895 vfs_exec_io_context(io_context* context)
4896 {
4897 	uint32 i;
4898 
4899 	for (i = 0; i < context->table_size; i++) {
4900 		mutex_lock(&context->io_mutex);
4901 
4902 		struct file_descriptor* descriptor = context->fds[i];
4903 		bool remove = false;
4904 
4905 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4906 			context->fds[i] = NULL;
4907 			context->num_used_fds--;
4908 
4909 			remove = true;
4910 		}
4911 
4912 		mutex_unlock(&context->io_mutex);
4913 
4914 		if (remove) {
4915 			close_fd(context, descriptor);
4916 			put_fd(descriptor);
4917 		}
4918 	}
4919 }
4920 
4921 
4922 /*! Sets up a new io_control structure, and inherits the properties
4923 	of the parent io_control if it is given.
4924 */
4925 io_context*
4926 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4927 {
4928 	io_context* context = (io_context*)malloc(sizeof(io_context));
4929 	if (context == NULL)
4930 		return NULL;
4931 
4932 	TIOC(NewIOContext(context, parentContext));
4933 
4934 	memset(context, 0, sizeof(io_context));
4935 	context->ref_count = 1;
4936 
4937 	MutexLocker parentLocker;
4938 
4939 	size_t tableSize;
4940 	if (parentContext != NULL) {
4941 		parentLocker.SetTo(parentContext->io_mutex, false);
4942 		tableSize = parentContext->table_size;
4943 	} else
4944 		tableSize = DEFAULT_FD_TABLE_SIZE;
4945 
4946 	// allocate space for FDs and their close-on-exec flag
4947 	context->fds = (file_descriptor**)malloc(
4948 		sizeof(struct file_descriptor*) * tableSize
4949 		+ sizeof(struct select_info**) * tableSize
4950 		+ (tableSize + 7) / 8);
4951 	if (context->fds == NULL) {
4952 		free(context);
4953 		return NULL;
4954 	}
4955 
4956 	context->select_infos = (select_info**)(context->fds + tableSize);
4957 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4958 
4959 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4960 		+ sizeof(struct select_info**) * tableSize
4961 		+ (tableSize + 7) / 8);
4962 
4963 	mutex_init(&context->io_mutex, "I/O context");
4964 
4965 	// Copy all parent file descriptors
4966 
4967 	if (parentContext != NULL) {
4968 		size_t i;
4969 
4970 		mutex_lock(&sIOContextRootLock);
4971 		context->root = parentContext->root;
4972 		if (context->root)
4973 			inc_vnode_ref_count(context->root);
4974 		mutex_unlock(&sIOContextRootLock);
4975 
4976 		context->cwd = parentContext->cwd;
4977 		if (context->cwd)
4978 			inc_vnode_ref_count(context->cwd);
4979 
4980 		if (parentContext->inherit_fds) {
4981 			for (i = 0; i < tableSize; i++) {
4982 				struct file_descriptor* descriptor = parentContext->fds[i];
4983 
4984 				if (descriptor != NULL
4985 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4986 					bool closeOnExec = fd_close_on_exec(parentContext, i);
4987 					if (closeOnExec && purgeCloseOnExec)
4988 						continue;
4989 
4990 					TFD(InheritFD(context, i, descriptor, parentContext));
4991 
4992 					context->fds[i] = descriptor;
4993 					context->num_used_fds++;
4994 					atomic_add(&descriptor->ref_count, 1);
4995 					atomic_add(&descriptor->open_count, 1);
4996 
4997 					if (closeOnExec)
4998 						fd_set_close_on_exec(context, i, true);
4999 				}
5000 			}
5001 		}
5002 
5003 		parentLocker.Unlock();
5004 	} else {
5005 		context->root = sRoot;
5006 		context->cwd = sRoot;
5007 
5008 		if (context->root)
5009 			inc_vnode_ref_count(context->root);
5010 
5011 		if (context->cwd)
5012 			inc_vnode_ref_count(context->cwd);
5013 	}
5014 
5015 	context->table_size = tableSize;
5016 	context->inherit_fds = parentContext != NULL;
5017 
5018 	list_init(&context->node_monitors);
5019 	context->max_monitors = DEFAULT_NODE_MONITORS;
5020 
5021 	return context;
5022 }
5023 
5024 
5025 void
5026 vfs_get_io_context(io_context* context)
5027 {
5028 	atomic_add(&context->ref_count, 1);
5029 }
5030 
5031 
5032 void
5033 vfs_put_io_context(io_context* context)
5034 {
5035 	if (atomic_add(&context->ref_count, -1) == 1)
5036 		free_io_context(context);
5037 }
5038 
5039 
5040 status_t
5041 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5042 {
5043 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5044 		return B_BAD_VALUE;
5045 
5046 	TIOC(ResizeIOContext(context, newSize));
5047 
5048 	MutexLocker _(context->io_mutex);
5049 
5050 	uint32 oldSize = context->table_size;
5051 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5052 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5053 
5054 	// If the tables shrink, make sure none of the fds being dropped are in use.
5055 	if (newSize < oldSize) {
5056 		for (uint32 i = oldSize; i-- > newSize;) {
5057 			if (context->fds[i])
5058 				return B_BUSY;
5059 		}
5060 	}
5061 
5062 	// store pointers to the old tables
5063 	file_descriptor** oldFDs = context->fds;
5064 	select_info** oldSelectInfos = context->select_infos;
5065 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5066 
5067 	// allocate new tables
5068 	file_descriptor** newFDs = (file_descriptor**)malloc(
5069 		sizeof(struct file_descriptor*) * newSize
5070 		+ sizeof(struct select_infos**) * newSize
5071 		+ newCloseOnExitBitmapSize);
5072 	if (newFDs == NULL)
5073 		return B_NO_MEMORY;
5074 
5075 	context->fds = newFDs;
5076 	context->select_infos = (select_info**)(context->fds + newSize);
5077 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5078 	context->table_size = newSize;
5079 
5080 	// copy entries from old tables
5081 	uint32 toCopy = min_c(oldSize, newSize);
5082 
5083 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5084 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5085 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5086 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5087 
5088 	// clear additional entries, if the tables grow
5089 	if (newSize > oldSize) {
5090 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5091 		memset(context->select_infos + oldSize, 0,
5092 			sizeof(void*) * (newSize - oldSize));
5093 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5094 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5095 	}
5096 
5097 	free(oldFDs);
5098 
5099 	return B_OK;
5100 }
5101 
5102 
5103 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5104 
5105 	Given an arbitrary vnode (identified by mount and node ID), the function
5106 	checks, whether the vnode is covered by another vnode. If it is, the
5107 	function returns the mount and node ID of the covering vnode. Otherwise
5108 	it simply returns the supplied mount and node ID.
5109 
5110 	In case of error (e.g. the supplied node could not be found) the variables
5111 	for storing the resolved mount and node ID remain untouched and an error
5112 	code is returned.
5113 
5114 	\param mountID The mount ID of the vnode in question.
5115 	\param nodeID The node ID of the vnode in question.
5116 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5117 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5118 	\return
5119 	- \c B_OK, if everything went fine,
5120 	- another error code, if something went wrong.
5121 */
5122 status_t
5123 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5124 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5125 {
5126 	// get the node
5127 	struct vnode* node;
5128 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5129 	if (error != B_OK)
5130 		return error;
5131 
5132 	// resolve the node
5133 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5134 		put_vnode(node);
5135 		node = coveringNode;
5136 	}
5137 
5138 	// set the return values
5139 	*resolvedMountID = node->device;
5140 	*resolvedNodeID = node->id;
5141 
5142 	put_vnode(node);
5143 
5144 	return B_OK;
5145 }
5146 
5147 
5148 status_t
5149 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5150 	ino_t* _mountPointNodeID)
5151 {
5152 	ReadLocker nodeLocker(sVnodeLock);
5153 	ReadLocker mountLocker(sMountLock);
5154 
5155 	struct fs_mount* mount = find_mount(mountID);
5156 	if (mount == NULL)
5157 		return B_BAD_VALUE;
5158 
5159 	Vnode* mountPoint = mount->covers_vnode;
5160 
5161 	*_mountPointMountID = mountPoint->device;
5162 	*_mountPointNodeID = mountPoint->id;
5163 
5164 	return B_OK;
5165 }
5166 
5167 
5168 status_t
5169 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5170 	ino_t coveredNodeID)
5171 {
5172 	// get the vnodes
5173 	Vnode* vnode;
5174 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5175 	if (error != B_OK)
5176 		return B_BAD_VALUE;
5177 	VnodePutter vnodePutter(vnode);
5178 
5179 	Vnode* coveredVnode;
5180 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5181 		false);
5182 	if (error != B_OK)
5183 		return B_BAD_VALUE;
5184 	VnodePutter coveredVnodePutter(coveredVnode);
5185 
5186 	// establish the covered/covering links
5187 	WriteLocker locker(sVnodeLock);
5188 
5189 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5190 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5191 		return B_BUSY;
5192 	}
5193 
5194 	vnode->covers = coveredVnode;
5195 	vnode->SetCovering(true);
5196 
5197 	coveredVnode->covered_by = vnode;
5198 	coveredVnode->SetCovered(true);
5199 
5200 	// the vnodes do now reference each other
5201 	inc_vnode_ref_count(vnode);
5202 	inc_vnode_ref_count(coveredVnode);
5203 
5204 	return B_OK;
5205 }
5206 
5207 
5208 int
5209 vfs_getrlimit(int resource, struct rlimit* rlp)
5210 {
5211 	if (!rlp)
5212 		return B_BAD_ADDRESS;
5213 
5214 	switch (resource) {
5215 		case RLIMIT_NOFILE:
5216 		{
5217 			struct io_context* context = get_current_io_context(false);
5218 			MutexLocker _(context->io_mutex);
5219 
5220 			rlp->rlim_cur = context->table_size;
5221 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5222 			return 0;
5223 		}
5224 
5225 		case RLIMIT_NOVMON:
5226 		{
5227 			struct io_context* context = get_current_io_context(false);
5228 			MutexLocker _(context->io_mutex);
5229 
5230 			rlp->rlim_cur = context->max_monitors;
5231 			rlp->rlim_max = MAX_NODE_MONITORS;
5232 			return 0;
5233 		}
5234 
5235 		default:
5236 			return B_BAD_VALUE;
5237 	}
5238 }
5239 
5240 
5241 int
5242 vfs_setrlimit(int resource, const struct rlimit* rlp)
5243 {
5244 	if (!rlp)
5245 		return B_BAD_ADDRESS;
5246 
5247 	switch (resource) {
5248 		case RLIMIT_NOFILE:
5249 			/* TODO: check getuid() */
5250 			if (rlp->rlim_max != RLIM_SAVED_MAX
5251 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5252 				return B_NOT_ALLOWED;
5253 
5254 			return vfs_resize_fd_table(get_current_io_context(false),
5255 				rlp->rlim_cur);
5256 
5257 		case RLIMIT_NOVMON:
5258 			/* TODO: check getuid() */
5259 			if (rlp->rlim_max != RLIM_SAVED_MAX
5260 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5261 				return B_NOT_ALLOWED;
5262 
5263 			return resize_monitor_table(get_current_io_context(false),
5264 				rlp->rlim_cur);
5265 
5266 		default:
5267 			return B_BAD_VALUE;
5268 	}
5269 }
5270 
5271 
5272 status_t
5273 vfs_init(kernel_args* args)
5274 {
5275 	vnode::StaticInit();
5276 
5277 	sVnodeTable = new(std::nothrow) VnodeTable();
5278 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5279 		panic("vfs_init: error creating vnode hash table\n");
5280 
5281 	struct vnode dummy_vnode;
5282 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5283 
5284 	struct fs_mount dummyMount;
5285 	sMountsTable = new(std::nothrow) MountTable();
5286 	if (sMountsTable == NULL
5287 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5288 		panic("vfs_init: error creating mounts hash table\n");
5289 
5290 	sPathNameCache = create_object_cache("vfs path names",
5291 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5292 	if (sPathNameCache == NULL)
5293 		panic("vfs_init: error creating path name object_cache\n");
5294 
5295 	sVnodeCache = create_object_cache("vfs vnodes",
5296 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5297 	if (sVnodeCache == NULL)
5298 		panic("vfs_init: error creating vnode object_cache\n");
5299 
5300 	sFileDescriptorCache = create_object_cache("vfs fds",
5301 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5302 	if (sFileDescriptorCache == NULL)
5303 		panic("vfs_init: error creating file descriptor object_cache\n");
5304 
5305 	node_monitor_init();
5306 
5307 	sRoot = NULL;
5308 
5309 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5310 
5311 	if (block_cache_init() != B_OK)
5312 		return B_ERROR;
5313 
5314 #ifdef ADD_DEBUGGER_COMMANDS
5315 	// add some debugger commands
5316 	add_debugger_command_etc("vnode", &dump_vnode,
5317 		"Print info about the specified vnode",
5318 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5319 		"Prints information about the vnode specified by address <vnode> or\n"
5320 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5321 		"constructed and printed. It might not be possible to construct a\n"
5322 		"complete path, though.\n",
5323 		0);
5324 	add_debugger_command("vnodes", &dump_vnodes,
5325 		"list all vnodes (from the specified device)");
5326 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5327 		"list all vnode caches");
5328 	add_debugger_command("mount", &dump_mount,
5329 		"info about the specified fs_mount");
5330 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5331 	add_debugger_command("io_context", &dump_io_context,
5332 		"info about the I/O context");
5333 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5334 		"info about vnode usage");
5335 #endif
5336 
5337 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5338 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5339 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5340 		0);
5341 
5342 	fifo_init();
5343 	file_map_init();
5344 
5345 	return file_cache_init();
5346 }
5347 
5348 
5349 //	#pragma mark - fd_ops implementations
5350 
5351 
5352 /*!
5353 	Calls fs_open() on the given vnode and returns a new
5354 	file descriptor for it
5355 */
5356 static int
5357 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5358 {
5359 	void* cookie;
5360 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5361 	if (status != B_OK)
5362 		return status;
5363 
5364 	int fd = get_new_fd(&sFileOps, NULL, vnode, cookie, openMode, kernel);
5365 	if (fd < 0) {
5366 		FS_CALL(vnode, close, cookie);
5367 		FS_CALL(vnode, free_cookie, cookie);
5368 	}
5369 	return fd;
5370 }
5371 
5372 
5373 /*!
5374 	Calls fs_open() on the given vnode and returns a new
5375 	file descriptor for it
5376 */
5377 static int
5378 create_vnode(struct vnode* directory, const char* name, int openMode,
5379 	int perms, bool kernel)
5380 {
5381 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5382 	status_t status = B_ERROR;
5383 	VnodePutter vnode, dirPutter;
5384 	void* cookie;
5385 	ino_t newID;
5386 	char clonedName[B_FILE_NAME_LENGTH + 1];
5387 
5388 	// This is somewhat tricky: If the entry already exists, the FS responsible
5389 	// for the directory might not necessarily also be the one responsible for
5390 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5391 	// we can actually never call the create() hook without O_EXCL. Instead we
5392 	// try to look the entry up first. If it already exists, we just open the
5393 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5394 	// introduces a race condition, since someone else might have created the
5395 	// entry in the meantime. We hope the respective FS returns the correct
5396 	// error code and retry (up to 3 times) again.
5397 
5398 	for (int i = 0; i < 3 && status != B_OK; i++) {
5399 		bool create = false;
5400 
5401 		// look the node up
5402 		{
5403 			struct vnode* entry = NULL;
5404 			status = lookup_dir_entry(directory, name, &entry);
5405 			vnode.SetTo(entry);
5406 		}
5407 		if (status == B_OK) {
5408 			if ((openMode & O_EXCL) != 0)
5409 				return B_FILE_EXISTS;
5410 
5411 			// If the node is a symlink, we have to follow it, unless
5412 			// O_NOTRAVERSE is set.
5413 			if (S_ISLNK(vnode->Type()) && traverse) {
5414 				vnode.Unset();
5415 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5416 						>= B_FILE_NAME_LENGTH) {
5417 					return B_NAME_TOO_LONG;
5418 				}
5419 
5420 				inc_vnode_ref_count(directory);
5421 				dirPutter.Unset();
5422 				status = vnode_path_to_vnode(directory, clonedName, true,
5423 					kernel, vnode, NULL, clonedName);
5424 				if (status != B_OK) {
5425 					// vnode is not found, but maybe it has a parent and we can create it from
5426 					// there. In that case, vnode_path_to_vnode has set vnode to the latest
5427 					// directory found in the path
5428 					if (status == B_ENTRY_NOT_FOUND) {
5429 						directory = vnode.Detach();
5430 						dirPutter.SetTo(directory);
5431 						name = clonedName;
5432 						create = true;
5433 					} else
5434 						return status;
5435 				}
5436 			}
5437 
5438 			if (!create) {
5439 				if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5440 					return B_LINK_LIMIT;
5441 
5442 				int fd = open_vnode(vnode.Get(), openMode & ~O_CREAT, kernel);
5443 				// on success keep the vnode reference for the FD
5444 				if (fd >= 0)
5445 					vnode.Detach();
5446 
5447 				return fd;
5448 			}
5449 		}
5450 
5451 		// it doesn't exist yet -- try to create it
5452 
5453 		if (!HAS_FS_CALL(directory, create))
5454 			return B_READ_ONLY_DEVICE;
5455 
5456 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5457 			&cookie, &newID);
5458 		if (status != B_OK
5459 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5460 			return status;
5461 		}
5462 	}
5463 
5464 	if (status != B_OK)
5465 		return status;
5466 
5467 	// the node has been created successfully
5468 
5469 	rw_lock_read_lock(&sVnodeLock);
5470 	vnode.SetTo(lookup_vnode(directory->device, newID));
5471 	rw_lock_read_unlock(&sVnodeLock);
5472 
5473 	if (!vnode.IsSet()) {
5474 		panic("vfs: fs_create() returned success but there is no vnode, "
5475 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5476 		return B_BAD_VALUE;
5477 	}
5478 
5479 	int fd = get_new_fd(&sFileOps, NULL, vnode.Get(), cookie, openMode, kernel);
5480 	if (fd >= 0) {
5481 		vnode.Detach();
5482 		return fd;
5483 	}
5484 
5485 	status = fd;
5486 
5487 	// something went wrong, clean up
5488 
5489 	FS_CALL(vnode.Get(), close, cookie);
5490 	FS_CALL(vnode.Get(), free_cookie, cookie);
5491 
5492 	FS_CALL(directory, unlink, name);
5493 
5494 	return status;
5495 }
5496 
5497 
5498 /*! Calls fs open_dir() on the given vnode and returns a new
5499 	file descriptor for it
5500 */
5501 static int
5502 open_dir_vnode(struct vnode* vnode, bool kernel)
5503 {
5504 	if (!HAS_FS_CALL(vnode, open_dir))
5505 		return B_UNSUPPORTED;
5506 
5507 	void* cookie;
5508 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5509 	if (status != B_OK)
5510 		return status;
5511 
5512 	// directory is opened, create a fd
5513 	status = get_new_fd(&sDirectoryOps, NULL, vnode, cookie, O_CLOEXEC, kernel);
5514 	if (status >= 0)
5515 		return status;
5516 
5517 	FS_CALL(vnode, close_dir, cookie);
5518 	FS_CALL(vnode, free_dir_cookie, cookie);
5519 
5520 	return status;
5521 }
5522 
5523 
5524 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5525 	file descriptor for it.
5526 	Used by attr_dir_open(), and attr_dir_open_fd().
5527 */
5528 static int
5529 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5530 {
5531 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5532 		return B_UNSUPPORTED;
5533 
5534 	void* cookie;
5535 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5536 	if (status != B_OK)
5537 		return status;
5538 
5539 	// directory is opened, create a fd
5540 	status = get_new_fd(&sAttributeDirectoryOps, NULL, vnode, cookie, O_CLOEXEC,
5541 		kernel);
5542 	if (status >= 0)
5543 		return status;
5544 
5545 	FS_CALL(vnode, close_attr_dir, cookie);
5546 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5547 
5548 	return status;
5549 }
5550 
5551 
5552 static int
5553 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5554 	int openMode, int perms, bool kernel)
5555 {
5556 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5557 		"kernel %d\n", name, openMode, perms, kernel));
5558 
5559 	// get directory to put the new file in
5560 	struct vnode* directory;
5561 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5562 	if (status != B_OK)
5563 		return status;
5564 
5565 	status = create_vnode(directory, name, openMode, perms, kernel);
5566 	put_vnode(directory);
5567 
5568 	return status;
5569 }
5570 
5571 
5572 static int
5573 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5574 {
5575 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5576 		openMode, perms, kernel));
5577 
5578 	// get directory to put the new file in
5579 	char name[B_FILE_NAME_LENGTH];
5580 	VnodePutter directory;
5581 	status_t status = fd_and_path_to_dir_vnode(fd, path, directory, name,
5582 		kernel);
5583 	if (status < 0)
5584 		return status;
5585 
5586 	return create_vnode(directory.Get(), name, openMode, perms, kernel);
5587 }
5588 
5589 
5590 static int
5591 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5592 	int openMode, bool kernel)
5593 {
5594 	if (name == NULL || *name == '\0')
5595 		return B_BAD_VALUE;
5596 
5597 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5598 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5599 
5600 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5601 
5602 	// get the vnode matching the entry_ref
5603 	VnodePutter vnode;
5604 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5605 		kernel, vnode);
5606 	if (status != B_OK)
5607 		return status;
5608 
5609 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5610 		return B_LINK_LIMIT;
5611 
5612 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5613 	if (newFD >= 0) {
5614 		cache_node_opened(vnode.Get(), vnode->cache, mountID,
5615 			directoryID, vnode->id, name);
5616 
5617 		// The vnode reference has been transferred to the FD
5618 		vnode.Detach();
5619 	}
5620 
5621 	return newFD;
5622 }
5623 
5624 
5625 static int
5626 file_open(int fd, char* path, int openMode, bool kernel)
5627 {
5628 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5629 
5630 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5631 		fd, path, openMode, kernel));
5632 
5633 	// get the vnode matching the vnode + path combination
5634 	VnodePutter vnode;
5635 	ino_t parentID;
5636 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode,
5637 		&parentID, kernel);
5638 	if (status != B_OK)
5639 		return status;
5640 
5641 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5642 		return B_LINK_LIMIT;
5643 
5644 	// open the vnode
5645 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5646 	if (newFD >= 0) {
5647 		cache_node_opened(vnode.Get(), vnode->cache,
5648 			vnode->device, parentID, vnode->id, NULL);
5649 
5650 		// The vnode reference has been transferred to the FD
5651 		vnode.Detach();
5652 	}
5653 
5654 	return newFD;
5655 }
5656 
5657 
5658 static status_t
5659 file_close(struct file_descriptor* descriptor)
5660 {
5661 	struct vnode* vnode = descriptor->u.vnode;
5662 	status_t status = B_OK;
5663 
5664 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5665 
5666 	cache_node_closed(vnode, vnode->cache, vnode->device,
5667 		vnode->id);
5668 	if (HAS_FS_CALL(vnode, close)) {
5669 		status = FS_CALL(vnode, close, descriptor->cookie);
5670 	}
5671 
5672 	if (status == B_OK) {
5673 		// remove all outstanding locks for this team
5674 		if (HAS_FS_CALL(vnode, release_lock))
5675 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5676 		else
5677 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5678 	}
5679 	return status;
5680 }
5681 
5682 
5683 static void
5684 file_free_fd(struct file_descriptor* descriptor)
5685 {
5686 	struct vnode* vnode = descriptor->u.vnode;
5687 
5688 	if (vnode != NULL) {
5689 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5690 		put_vnode(vnode);
5691 	}
5692 }
5693 
5694 
5695 static status_t
5696 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5697 	size_t* length)
5698 {
5699 	struct vnode* vnode = descriptor->u.vnode;
5700 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5701 		pos, length, *length));
5702 
5703 	if (S_ISDIR(vnode->Type()))
5704 		return B_IS_A_DIRECTORY;
5705 	if (pos != -1 && descriptor->pos == -1)
5706 		return ESPIPE;
5707 
5708 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5709 }
5710 
5711 
5712 static status_t
5713 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5714 	size_t* length)
5715 {
5716 	struct vnode* vnode = descriptor->u.vnode;
5717 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5718 		length));
5719 
5720 	if (S_ISDIR(vnode->Type()))
5721 		return B_IS_A_DIRECTORY;
5722 	if (pos != -1 && descriptor->pos == -1)
5723 		return ESPIPE;
5724 
5725 	if (!HAS_FS_CALL(vnode, write))
5726 		return B_READ_ONLY_DEVICE;
5727 
5728 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5729 }
5730 
5731 
5732 static ssize_t
5733 file_vector_io(struct file_descriptor* descriptor, off_t pos,
5734 	const struct iovec *vecs, int count, bool write)
5735 {
5736 	struct vnode* vnode = descriptor->u.vnode;
5737 	if (pos != -1 && descriptor->pos == -1)
5738 		return ESPIPE;
5739 	if (S_ISDIR(vnode->Type()))
5740 		return B_IS_A_DIRECTORY;
5741 
5742 	if (pos == -1)
5743 		return B_UNSUPPORTED;
5744 	if (!HAS_FS_CALL(vnode, io))
5745 		return B_UNSUPPORTED;
5746 
5747 	// We can only perform real vectored I/O for vnodes that have no cache,
5748 	// because the I/O hook bypasses the cache entirely.
5749 	if (vnode->cache != NULL)
5750 		return B_UNSUPPORTED;
5751 
5752 	BStackOrHeapArray<generic_io_vec, 8> iovecs(count);
5753 	if (!iovecs.IsValid())
5754 		return B_NO_MEMORY;
5755 
5756 	generic_size_t length = 0;
5757 	for (int i = 0; i < count; i++) {
5758 		iovecs[i].base = (generic_addr_t)vecs[i].iov_base;
5759 		iovecs[i].length = vecs[i].iov_len;
5760 		length += vecs[i].iov_len;
5761 	}
5762 
5763 	status_t status = (write ? vfs_write_pages : vfs_read_pages)(vnode,
5764 		descriptor->cookie, pos, iovecs, count, 0, &length);
5765 	if (length > 0)
5766 		return length;
5767 	return status;
5768 }
5769 
5770 
5771 static ssize_t
5772 file_readv(struct file_descriptor* descriptor, off_t pos,
5773 	const struct iovec *vecs, int count)
5774 {
5775 	FUNCTION(("file_readv: pos %" B_PRIdOFF "\n", pos));
5776 	return file_vector_io(descriptor, pos, vecs, count, false);
5777 }
5778 
5779 
5780 static ssize_t
5781 file_writev(struct file_descriptor* descriptor, off_t pos,
5782 	const struct iovec *vecs, int count)
5783 {
5784 	FUNCTION(("file_writev: pos %" B_PRIdOFF "\n", pos));
5785 	return file_vector_io(descriptor, pos, vecs, count, true);
5786 }
5787 
5788 
5789 static off_t
5790 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5791 {
5792 	struct vnode* vnode = descriptor->u.vnode;
5793 	off_t offset;
5794 	bool isDevice = false;
5795 
5796 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5797 		seekType));
5798 
5799 	if (descriptor->pos == -1)
5800 		return ESPIPE;
5801 
5802 	switch (vnode->Type() & S_IFMT) {
5803 		// drivers publish block devices as chr, so pick both
5804 		case S_IFBLK:
5805 		case S_IFCHR:
5806 			isDevice = true;
5807 			break;
5808 	}
5809 
5810 	switch (seekType) {
5811 		case SEEK_SET:
5812 			offset = 0;
5813 			break;
5814 		case SEEK_CUR:
5815 			offset = descriptor->pos;
5816 			break;
5817 		case SEEK_END:
5818 		{
5819 			// stat() the node
5820 			if (!HAS_FS_CALL(vnode, read_stat))
5821 				return B_UNSUPPORTED;
5822 
5823 			struct stat stat;
5824 			status_t status = FS_CALL(vnode, read_stat, &stat);
5825 			if (status != B_OK)
5826 				return status;
5827 
5828 			offset = stat.st_size;
5829 
5830 			if (offset == 0 && isDevice) {
5831 				// stat() on regular drivers doesn't report size
5832 				device_geometry geometry;
5833 
5834 				if (HAS_FS_CALL(vnode, ioctl)) {
5835 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5836 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5837 					if (status == B_OK)
5838 						offset = (off_t)geometry.bytes_per_sector
5839 							* geometry.sectors_per_track
5840 							* geometry.cylinder_count
5841 							* geometry.head_count;
5842 				}
5843 			}
5844 
5845 			break;
5846 		}
5847 		case SEEK_DATA:
5848 		case SEEK_HOLE:
5849 		{
5850 			status_t status = B_BAD_VALUE;
5851 			if (HAS_FS_CALL(vnode, ioctl)) {
5852 				offset = pos;
5853 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5854 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5855 					&offset, sizeof(offset));
5856 				if (status == B_OK) {
5857 					if (offset > pos)
5858 						offset -= pos;
5859 					break;
5860 				}
5861 			}
5862 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5863 				return status;
5864 
5865 			// basic implementation with stat() the node
5866 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5867 				return B_BAD_VALUE;
5868 
5869 			struct stat stat;
5870 			status = FS_CALL(vnode, read_stat, &stat);
5871 			if (status != B_OK)
5872 				return status;
5873 
5874 			off_t end = stat.st_size;
5875 			if (pos >= end)
5876 				return ENXIO;
5877 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5878 			break;
5879 		}
5880 		default:
5881 			return B_BAD_VALUE;
5882 	}
5883 
5884 	// assumes off_t is 64 bits wide
5885 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5886 		return B_BUFFER_OVERFLOW;
5887 
5888 	pos += offset;
5889 	if (pos < 0)
5890 		return B_BAD_VALUE;
5891 
5892 	return descriptor->pos = pos;
5893 }
5894 
5895 
5896 static status_t
5897 file_select(struct file_descriptor* descriptor, uint8 event,
5898 	struct selectsync* sync)
5899 {
5900 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5901 
5902 	struct vnode* vnode = descriptor->u.vnode;
5903 
5904 	// If the FS has no select() hook, notify select() now.
5905 	if (!HAS_FS_CALL(vnode, select)) {
5906 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5907 			notify_select_event(sync, event);
5908 		return B_UNSUPPORTED;
5909 	}
5910 
5911 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5912 }
5913 
5914 
5915 static status_t
5916 file_deselect(struct file_descriptor* descriptor, uint8 event,
5917 	struct selectsync* sync)
5918 {
5919 	struct vnode* vnode = descriptor->u.vnode;
5920 
5921 	if (!HAS_FS_CALL(vnode, deselect))
5922 		return B_OK;
5923 
5924 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5925 }
5926 
5927 
5928 static status_t
5929 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5930 	bool kernel)
5931 {
5932 	struct vnode* vnode;
5933 	status_t status;
5934 
5935 	if (name == NULL || *name == '\0')
5936 		return B_BAD_VALUE;
5937 
5938 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5939 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5940 
5941 	status = get_vnode(mountID, parentID, &vnode, true, false);
5942 	if (status != B_OK)
5943 		return status;
5944 
5945 	if (HAS_FS_CALL(vnode, create_dir))
5946 		status = FS_CALL(vnode, create_dir, name, perms);
5947 	else
5948 		status = B_READ_ONLY_DEVICE;
5949 
5950 	put_vnode(vnode);
5951 	return status;
5952 }
5953 
5954 
5955 static status_t
5956 dir_create(int fd, char* path, int perms, bool kernel)
5957 {
5958 	char filename[B_FILE_NAME_LENGTH];
5959 	status_t status;
5960 
5961 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5962 		kernel));
5963 
5964 	VnodePutter vnode;
5965 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
5966 	if (status < 0)
5967 		return status;
5968 
5969 	if (HAS_FS_CALL(vnode, create_dir)) {
5970 		status = FS_CALL(vnode.Get(), create_dir, filename, perms);
5971 	} else
5972 		status = B_READ_ONLY_DEVICE;
5973 
5974 	return status;
5975 }
5976 
5977 
5978 static int
5979 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5980 {
5981 	FUNCTION(("dir_open_entry_ref()\n"));
5982 
5983 	if (name && name[0] == '\0')
5984 		return B_BAD_VALUE;
5985 
5986 	// get the vnode matching the entry_ref/node_ref
5987 	VnodePutter vnode;
5988 	status_t status;
5989 	if (name) {
5990 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5991 			vnode);
5992 	} else {
5993 		struct vnode* temp = NULL;
5994 		status = get_vnode(mountID, parentID, &temp, true, false);
5995 		vnode.SetTo(temp);
5996 	}
5997 	if (status != B_OK)
5998 		return status;
5999 
6000 	int newFD = open_dir_vnode(vnode.Get(), kernel);
6001 	if (newFD >= 0) {
6002 		cache_node_opened(vnode.Get(), vnode->cache, mountID, parentID,
6003 			vnode->id, name);
6004 
6005 		// The vnode reference has been transferred to the FD
6006 		vnode.Detach();
6007 	}
6008 
6009 	return newFD;
6010 }
6011 
6012 
6013 static int
6014 dir_open(int fd, char* path, bool kernel)
6015 {
6016 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
6017 		kernel));
6018 
6019 	// get the vnode matching the vnode + path combination
6020 	VnodePutter vnode;
6021 	ino_t parentID;
6022 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, &parentID,
6023 		kernel);
6024 	if (status != B_OK)
6025 		return status;
6026 
6027 	// open the dir
6028 	int newFD = open_dir_vnode(vnode.Get(), kernel);
6029 	if (newFD >= 0) {
6030 		cache_node_opened(vnode.Get(), vnode->cache, vnode->device,
6031 			parentID, vnode->id, NULL);
6032 
6033 		// The vnode reference has been transferred to the FD
6034 		vnode.Detach();
6035 	}
6036 
6037 	return newFD;
6038 }
6039 
6040 
6041 static status_t
6042 dir_close(struct file_descriptor* descriptor)
6043 {
6044 	struct vnode* vnode = descriptor->u.vnode;
6045 
6046 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6047 
6048 	cache_node_closed(vnode, vnode->cache, vnode->device,
6049 		vnode->id);
6050 	if (HAS_FS_CALL(vnode, close_dir))
6051 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6052 
6053 	return B_OK;
6054 }
6055 
6056 
6057 static void
6058 dir_free_fd(struct file_descriptor* descriptor)
6059 {
6060 	struct vnode* vnode = descriptor->u.vnode;
6061 
6062 	if (vnode != NULL) {
6063 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6064 		put_vnode(vnode);
6065 	}
6066 }
6067 
6068 
6069 static status_t
6070 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6071 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6072 {
6073 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6074 		bufferSize, _count);
6075 }
6076 
6077 
6078 static status_t
6079 fix_dirent(struct vnode* parent, struct dirent* entry,
6080 	struct io_context* ioContext)
6081 {
6082 	// set d_pdev and d_pino
6083 	entry->d_pdev = parent->device;
6084 	entry->d_pino = parent->id;
6085 
6086 	// If this is the ".." entry and the directory covering another vnode,
6087 	// we need to replace d_dev and d_ino with the actual values.
6088 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6089 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6090 			ioContext);
6091 	}
6092 
6093 	// resolve covered vnodes
6094 	ReadLocker _(&sVnodeLock);
6095 
6096 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6097 	if (vnode != NULL && vnode->covered_by != NULL) {
6098 		do {
6099 			vnode = vnode->covered_by;
6100 		} while (vnode->covered_by != NULL);
6101 
6102 		entry->d_dev = vnode->device;
6103 		entry->d_ino = vnode->id;
6104 	}
6105 
6106 	return B_OK;
6107 }
6108 
6109 
6110 static status_t
6111 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6112 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6113 {
6114 	if (!HAS_FS_CALL(vnode, read_dir))
6115 		return B_UNSUPPORTED;
6116 
6117 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6118 		_count);
6119 	if (error != B_OK)
6120 		return error;
6121 
6122 	// we need to adjust the read dirents
6123 	uint32 count = *_count;
6124 	for (uint32 i = 0; i < count; i++) {
6125 		error = fix_dirent(vnode, buffer, ioContext);
6126 		if (error != B_OK)
6127 			return error;
6128 
6129 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6130 	}
6131 
6132 	return error;
6133 }
6134 
6135 
6136 static status_t
6137 dir_rewind(struct file_descriptor* descriptor)
6138 {
6139 	struct vnode* vnode = descriptor->u.vnode;
6140 
6141 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6142 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6143 	}
6144 
6145 	return B_UNSUPPORTED;
6146 }
6147 
6148 
6149 static status_t
6150 dir_remove(int fd, char* path, bool kernel)
6151 {
6152 	char name[B_FILE_NAME_LENGTH];
6153 	status_t status;
6154 
6155 	if (path != NULL) {
6156 		// we need to make sure our path name doesn't stop with "/", ".",
6157 		// or ".."
6158 		char* lastSlash;
6159 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6160 			char* leaf = lastSlash + 1;
6161 			if (!strcmp(leaf, ".."))
6162 				return B_NOT_ALLOWED;
6163 
6164 			// omit multiple slashes
6165 			while (lastSlash > path && lastSlash[-1] == '/')
6166 				lastSlash--;
6167 
6168 			if (leaf[0]
6169 				&& strcmp(leaf, ".")) {
6170 				break;
6171 			}
6172 			// "name/" -> "name", or "name/." -> "name"
6173 			lastSlash[0] = '\0';
6174 		}
6175 
6176 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6177 			return B_NOT_ALLOWED;
6178 	}
6179 
6180 	VnodePutter directory;
6181 	status = fd_and_path_to_dir_vnode(fd, path, directory, name, kernel);
6182 	if (status != B_OK)
6183 		return status;
6184 
6185 	if (HAS_FS_CALL(directory, remove_dir))
6186 		status = FS_CALL(directory.Get(), remove_dir, name);
6187 	else
6188 		status = B_READ_ONLY_DEVICE;
6189 
6190 	return status;
6191 }
6192 
6193 
6194 static status_t
6195 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6196 	size_t length)
6197 {
6198 	struct vnode* vnode = descriptor->u.vnode;
6199 
6200 	if (HAS_FS_CALL(vnode, ioctl))
6201 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6202 
6203 	return B_DEV_INVALID_IOCTL;
6204 }
6205 
6206 
6207 static status_t
6208 common_fcntl(int fd, int op, size_t argument, bool kernel)
6209 {
6210 	struct flock flock;
6211 
6212 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6213 		fd, op, argument, kernel ? "kernel" : "user"));
6214 
6215 	struct io_context* context = get_current_io_context(kernel);
6216 
6217 	FileDescriptorPutter descriptor(get_fd(context, fd));
6218 	if (!descriptor.IsSet())
6219 		return B_FILE_ERROR;
6220 
6221 	struct vnode* vnode = fd_vnode(descriptor.Get());
6222 
6223 	status_t status = B_OK;
6224 
6225 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6226 		if (descriptor->ops != &sFileOps)
6227 			status = B_BAD_VALUE;
6228 		else if (kernel)
6229 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6230 		else if (user_memcpy(&flock, (struct flock*)argument,
6231 				sizeof(struct flock)) != B_OK)
6232 			status = B_BAD_ADDRESS;
6233 		if (status != B_OK)
6234 			return status;
6235 	}
6236 
6237 	switch (op) {
6238 		case F_SETFD:
6239 		{
6240 			// Set file descriptor flags
6241 
6242 			// O_CLOEXEC is the only flag available at this time
6243 			mutex_lock(&context->io_mutex);
6244 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6245 			mutex_unlock(&context->io_mutex);
6246 
6247 			status = B_OK;
6248 			break;
6249 		}
6250 
6251 		case F_GETFD:
6252 		{
6253 			// Get file descriptor flags
6254 			mutex_lock(&context->io_mutex);
6255 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6256 			mutex_unlock(&context->io_mutex);
6257 			break;
6258 		}
6259 
6260 		case F_SETFL:
6261 		{
6262 			// Set file descriptor open mode
6263 
6264 			// we only accept changes to certain flags
6265 			const int32 modifiableFlags = O_APPEND | O_NONBLOCK;
6266 			argument &= modifiableFlags;
6267 
6268 			if (descriptor->ops->fd_set_flags != NULL) {
6269 				status = descriptor->ops->fd_set_flags(descriptor.Get(), argument);
6270 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6271 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6272 					(int)argument);
6273 			} else
6274 				status = B_UNSUPPORTED;
6275 
6276 			if (status == B_OK) {
6277 				// update this descriptor's open_mode field
6278 				descriptor->open_mode = (descriptor->open_mode
6279 					& ~modifiableFlags) | argument;
6280 			}
6281 
6282 			break;
6283 		}
6284 
6285 		case F_GETFL:
6286 			// Get file descriptor open mode
6287 			status = descriptor->open_mode;
6288 			break;
6289 
6290 		case F_DUPFD:
6291 		case F_DUPFD_CLOEXEC:
6292 		{
6293 			status = new_fd_etc(context, descriptor.Get(), (int)argument);
6294 			if (status >= 0) {
6295 				mutex_lock(&context->io_mutex);
6296 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6297 				mutex_unlock(&context->io_mutex);
6298 
6299 				atomic_add(&descriptor->ref_count, 1);
6300 			}
6301 			break;
6302 		}
6303 
6304 		case F_GETLK:
6305 			if (vnode != NULL) {
6306 				struct flock normalizedLock;
6307 
6308 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6309 				status = normalize_flock(descriptor.Get(), &normalizedLock);
6310 				if (status != B_OK)
6311 					break;
6312 
6313 				if (HAS_FS_CALL(vnode, test_lock)) {
6314 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6315 						&normalizedLock);
6316 				} else
6317 					status = test_advisory_lock(vnode, &normalizedLock);
6318 				if (status == B_OK) {
6319 					if (normalizedLock.l_type == F_UNLCK) {
6320 						// no conflicting lock found, copy back the same struct
6321 						// we were given except change type to F_UNLCK
6322 						flock.l_type = F_UNLCK;
6323 						if (kernel) {
6324 							memcpy((struct flock*)argument, &flock,
6325 								sizeof(struct flock));
6326 						} else {
6327 							status = user_memcpy((struct flock*)argument,
6328 								&flock, sizeof(struct flock));
6329 						}
6330 					} else {
6331 						// a conflicting lock was found, copy back its range and
6332 						// type
6333 						if (normalizedLock.l_len == OFF_MAX)
6334 							normalizedLock.l_len = 0;
6335 
6336 						if (kernel) {
6337 							memcpy((struct flock*)argument,
6338 								&normalizedLock, sizeof(struct flock));
6339 						} else {
6340 							status = user_memcpy((struct flock*)argument,
6341 								&normalizedLock, sizeof(struct flock));
6342 						}
6343 					}
6344 				}
6345 			} else
6346 				status = B_BAD_VALUE;
6347 			break;
6348 
6349 		case F_SETLK:
6350 		case F_SETLKW:
6351 			status = normalize_flock(descriptor.Get(), &flock);
6352 			if (status != B_OK)
6353 				break;
6354 
6355 			if (vnode == NULL) {
6356 				status = B_BAD_VALUE;
6357 			} else if (flock.l_type == F_UNLCK) {
6358 				if (HAS_FS_CALL(vnode, release_lock)) {
6359 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6360 						&flock);
6361 				} else {
6362 					status = release_advisory_lock(vnode, context, NULL,
6363 						&flock);
6364 				}
6365 			} else {
6366 				// the open mode must match the lock type
6367 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6368 						&& flock.l_type == F_WRLCK)
6369 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6370 						&& flock.l_type == F_RDLCK))
6371 					status = B_FILE_ERROR;
6372 				else {
6373 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6374 						status = FS_CALL(vnode, acquire_lock,
6375 							descriptor->cookie, &flock, op == F_SETLKW);
6376 					} else {
6377 						status = acquire_advisory_lock(vnode, context, NULL,
6378 							&flock, op == F_SETLKW);
6379 					}
6380 				}
6381 			}
6382 			break;
6383 
6384 		// ToDo: add support for more ops?
6385 
6386 		default:
6387 			status = B_BAD_VALUE;
6388 	}
6389 
6390 	return status;
6391 }
6392 
6393 
6394 static status_t
6395 common_sync(int fd, bool kernel)
6396 {
6397 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6398 
6399 	struct vnode* vnode;
6400 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6401 	if (!descriptor.IsSet())
6402 		return B_FILE_ERROR;
6403 
6404 	status_t status;
6405 	if (HAS_FS_CALL(vnode, fsync))
6406 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6407 	else
6408 		status = B_UNSUPPORTED;
6409 
6410 	return status;
6411 }
6412 
6413 
6414 static status_t
6415 common_lock_node(int fd, bool kernel)
6416 {
6417 	struct vnode* vnode;
6418 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6419 	if (!descriptor.IsSet())
6420 		return B_FILE_ERROR;
6421 
6422 	status_t status = B_OK;
6423 
6424 	// We need to set the locking atomically - someone
6425 	// else might set one at the same time
6426 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6427 			descriptor.Get(), (file_descriptor*)NULL) != NULL)
6428 		status = B_BUSY;
6429 
6430 	return status;
6431 }
6432 
6433 
6434 static status_t
6435 common_unlock_node(int fd, bool kernel)
6436 {
6437 	struct vnode* vnode;
6438 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6439 	if (!descriptor.IsSet())
6440 		return B_FILE_ERROR;
6441 
6442 	status_t status = B_OK;
6443 
6444 	// We need to set the locking atomically - someone
6445 	// else might set one at the same time
6446 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6447 			(file_descriptor*)NULL, descriptor.Get()) != descriptor.Get())
6448 		status = B_BAD_VALUE;
6449 
6450 	return status;
6451 }
6452 
6453 
6454 static status_t
6455 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6456 {
6457 	if (offset < 0 || length == 0)
6458 		return B_BAD_VALUE;
6459 	if (offset > OFF_MAX - length)
6460 		return B_FILE_TOO_LARGE;
6461 
6462 	struct vnode* vnode;
6463 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6464 	if (!descriptor.IsSet() || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6465 		return B_FILE_ERROR;
6466 
6467 	switch (vnode->Type() & S_IFMT) {
6468 		case S_IFIFO:
6469 		case S_IFSOCK:
6470 			return ESPIPE;
6471 
6472 		case S_IFBLK:
6473 		case S_IFCHR:
6474 		case S_IFDIR:
6475 		case S_IFLNK:
6476 			return B_DEVICE_NOT_FOUND;
6477 
6478 		case S_IFREG:
6479 			break;
6480 	}
6481 
6482 	status_t status = B_OK;
6483 	if (HAS_FS_CALL(vnode, preallocate)) {
6484 		status = FS_CALL(vnode, preallocate, offset, length);
6485 	} else {
6486 		status = HAS_FS_CALL(vnode, write)
6487 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6488 	}
6489 
6490 	return status;
6491 }
6492 
6493 
6494 static status_t
6495 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6496 	bool kernel)
6497 {
6498 	VnodePutter vnode;
6499 	status_t status;
6500 
6501 	status = fd_and_path_to_vnode(fd, path, false, vnode, NULL, kernel);
6502 	if (status != B_OK)
6503 		return status;
6504 
6505 	if (HAS_FS_CALL(vnode, read_symlink)) {
6506 		status = FS_CALL(vnode.Get(), read_symlink, buffer, _bufferSize);
6507 	} else
6508 		status = B_BAD_VALUE;
6509 
6510 	return status;
6511 }
6512 
6513 
6514 static status_t
6515 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6516 	bool kernel)
6517 {
6518 	// path validity checks have to be in the calling function!
6519 	char name[B_FILE_NAME_LENGTH];
6520 	status_t status;
6521 
6522 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6523 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6524 
6525 	VnodePutter vnode;
6526 	status = fd_and_path_to_dir_vnode(fd, path, vnode, name, kernel);
6527 	if (status != B_OK)
6528 		return status;
6529 
6530 	if (HAS_FS_CALL(vnode, create_symlink))
6531 		status = FS_CALL(vnode.Get(), create_symlink, name, toPath, mode);
6532 	else {
6533 		status = HAS_FS_CALL(vnode, write)
6534 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6535 	}
6536 
6537 	return status;
6538 }
6539 
6540 
6541 static status_t
6542 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6543 	bool traverseLeafLink, bool kernel)
6544 {
6545 	// path validity checks have to be in the calling function!
6546 
6547 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6548 		toPath, kernel));
6549 
6550 	char name[B_FILE_NAME_LENGTH];
6551 	VnodePutter directory;
6552 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, directory, name,
6553 		kernel);
6554 	if (status != B_OK)
6555 		return status;
6556 
6557 	VnodePutter vnode;
6558 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, vnode, NULL,
6559 		kernel);
6560 	if (status != B_OK)
6561 		return status;
6562 
6563 	if (directory->mount != vnode->mount)
6564 		return B_CROSS_DEVICE_LINK;
6565 
6566 	if (HAS_FS_CALL(directory, link))
6567 		status = FS_CALL(directory.Get(), link, name, vnode.Get());
6568 	else
6569 		status = B_READ_ONLY_DEVICE;
6570 
6571 	return status;
6572 }
6573 
6574 
6575 static status_t
6576 common_unlink(int fd, char* path, bool kernel)
6577 {
6578 	char filename[B_FILE_NAME_LENGTH];
6579 	status_t status;
6580 
6581 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6582 		kernel));
6583 
6584 	VnodePutter vnode;
6585 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
6586 	if (status < 0)
6587 		return status;
6588 
6589 	if (HAS_FS_CALL(vnode, unlink))
6590 		status = FS_CALL(vnode.Get(), unlink, filename);
6591 	else
6592 		status = B_READ_ONLY_DEVICE;
6593 
6594 	return status;
6595 }
6596 
6597 
6598 static status_t
6599 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6600 {
6601 	status_t status;
6602 
6603 	// TODO: honor effectiveUserGroup argument
6604 
6605 	VnodePutter vnode;
6606 	status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
6607 	if (status != B_OK)
6608 		return status;
6609 
6610 	if (HAS_FS_CALL(vnode, access))
6611 		status = FS_CALL(vnode.Get(), access, mode);
6612 	else
6613 		status = B_OK;
6614 
6615 	return status;
6616 }
6617 
6618 
6619 static status_t
6620 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6621 {
6622 	status_t status;
6623 
6624 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6625 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6626 
6627 	VnodePutter fromVnode;
6628 	char fromName[B_FILE_NAME_LENGTH];
6629 	status = fd_and_path_to_dir_vnode(fd, path, fromVnode, fromName, kernel);
6630 	if (status != B_OK)
6631 		return status;
6632 
6633 	VnodePutter toVnode;
6634 	char toName[B_FILE_NAME_LENGTH];
6635 	status = fd_and_path_to_dir_vnode(newFD, newPath, toVnode, toName, kernel);
6636 	if (status != B_OK)
6637 		return status;
6638 
6639 	if (fromVnode->device != toVnode->device)
6640 		return B_CROSS_DEVICE_LINK;
6641 
6642 	if (fromVnode.Get() == toVnode.Get() && !strcmp(fromName, toName))
6643 		return B_OK;
6644 
6645 	if (fromName[0] == '\0' || toName[0] == '\0'
6646 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6647 		|| !strcmp(toName, ".") || !strcmp(toName, "..")) {
6648 		return B_BAD_VALUE;
6649 	}
6650 
6651 	if (HAS_FS_CALL(fromVnode, rename))
6652 		status = FS_CALL(fromVnode.Get(), rename, fromName, toVnode.Get(), toName);
6653 	else
6654 		status = B_READ_ONLY_DEVICE;
6655 
6656 	return status;
6657 }
6658 
6659 
6660 static status_t
6661 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6662 {
6663 	struct vnode* vnode = descriptor->u.vnode;
6664 
6665 	FUNCTION(("common_read_stat: stat %p\n", stat));
6666 
6667 	// TODO: remove this once all file systems properly set them!
6668 	stat->st_crtim.tv_nsec = 0;
6669 	stat->st_ctim.tv_nsec = 0;
6670 	stat->st_mtim.tv_nsec = 0;
6671 	stat->st_atim.tv_nsec = 0;
6672 
6673 	return vfs_stat_vnode(vnode, stat);
6674 }
6675 
6676 
6677 static status_t
6678 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6679 	int statMask)
6680 {
6681 	struct vnode* vnode = descriptor->u.vnode;
6682 
6683 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6684 		vnode, stat, statMask));
6685 
6686 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6687 		&& (statMask & B_STAT_SIZE) != 0) {
6688 		return B_BAD_VALUE;
6689 	}
6690 
6691 	if (!HAS_FS_CALL(vnode, write_stat))
6692 		return B_READ_ONLY_DEVICE;
6693 
6694 	return FS_CALL(vnode, write_stat, stat, statMask);
6695 }
6696 
6697 
6698 static status_t
6699 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6700 	struct stat* stat, bool kernel)
6701 {
6702 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6703 		stat));
6704 
6705 	VnodePutter vnode;
6706 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6707 		NULL, kernel);
6708 	if (status != B_OK)
6709 		return status;
6710 
6711 	status = vfs_stat_vnode(vnode.Get(), stat);
6712 
6713 	return status;
6714 }
6715 
6716 
6717 static status_t
6718 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6719 	const struct stat* stat, int statMask, bool kernel)
6720 {
6721 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6722 		"kernel %d\n", fd, path, stat, statMask, kernel));
6723 
6724 	VnodePutter vnode;
6725 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6726 		NULL, kernel);
6727 	if (status != B_OK)
6728 		return status;
6729 
6730 	if (HAS_FS_CALL(vnode, write_stat))
6731 		status = FS_CALL(vnode.Get(), write_stat, stat, statMask);
6732 	else
6733 		status = B_READ_ONLY_DEVICE;
6734 
6735 	return status;
6736 }
6737 
6738 
6739 static int
6740 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6741 {
6742 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6743 		kernel));
6744 
6745 	VnodePutter vnode;
6746 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6747 		NULL, kernel);
6748 	if (status != B_OK)
6749 		return status;
6750 
6751 	status = open_attr_dir_vnode(vnode.Get(), kernel);
6752 	if (status >= 0)
6753 		vnode.Detach();
6754 
6755 	return status;
6756 }
6757 
6758 
6759 static status_t
6760 attr_dir_close(struct file_descriptor* descriptor)
6761 {
6762 	struct vnode* vnode = descriptor->u.vnode;
6763 
6764 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6765 
6766 	if (HAS_FS_CALL(vnode, close_attr_dir))
6767 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6768 
6769 	return B_OK;
6770 }
6771 
6772 
6773 static void
6774 attr_dir_free_fd(struct file_descriptor* descriptor)
6775 {
6776 	struct vnode* vnode = descriptor->u.vnode;
6777 
6778 	if (vnode != NULL) {
6779 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6780 		put_vnode(vnode);
6781 	}
6782 }
6783 
6784 
6785 static status_t
6786 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6787 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6788 {
6789 	struct vnode* vnode = descriptor->u.vnode;
6790 
6791 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6792 
6793 	if (HAS_FS_CALL(vnode, read_attr_dir))
6794 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6795 			bufferSize, _count);
6796 
6797 	return B_UNSUPPORTED;
6798 }
6799 
6800 
6801 static status_t
6802 attr_dir_rewind(struct file_descriptor* descriptor)
6803 {
6804 	struct vnode* vnode = descriptor->u.vnode;
6805 
6806 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6807 
6808 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6809 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6810 
6811 	return B_UNSUPPORTED;
6812 }
6813 
6814 
6815 static int
6816 attr_create(int fd, char* path, const char* name, uint32 type,
6817 	int openMode, bool kernel)
6818 {
6819 	if (name == NULL || *name == '\0')
6820 		return B_BAD_VALUE;
6821 
6822 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6823 	VnodePutter vnode;
6824 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6825 		kernel);
6826 	if (status != B_OK)
6827 		return status;
6828 
6829 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6830 		return B_LINK_LIMIT;
6831 
6832 	if (!HAS_FS_CALL(vnode, create_attr))
6833 		return B_READ_ONLY_DEVICE;
6834 
6835 	void* cookie;
6836 	status = FS_CALL(vnode.Get(), create_attr, name, type, openMode, &cookie);
6837 	if (status != B_OK)
6838 		return status;
6839 
6840 	fd = get_new_fd(&sAttributeOps, NULL, vnode.Get(), cookie, openMode, kernel);
6841 	if (fd >= 0) {
6842 		vnode.Detach();
6843 		return fd;
6844 	}
6845 
6846 	status = fd;
6847 
6848 	FS_CALL(vnode.Get(), close_attr, cookie);
6849 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6850 
6851 	FS_CALL(vnode.Get(), remove_attr, name);
6852 
6853 	return status;
6854 }
6855 
6856 
6857 static int
6858 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6859 {
6860 	if (name == NULL || *name == '\0')
6861 		return B_BAD_VALUE;
6862 
6863 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6864 	VnodePutter vnode;
6865 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6866 		kernel);
6867 	if (status != B_OK)
6868 		return status;
6869 
6870 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6871 		return B_LINK_LIMIT;
6872 
6873 	if (!HAS_FS_CALL(vnode, open_attr))
6874 		return B_UNSUPPORTED;
6875 
6876 	void* cookie;
6877 	status = FS_CALL(vnode.Get(), open_attr, name, openMode, &cookie);
6878 	if (status != B_OK)
6879 		return status;
6880 
6881 	// now we only need a file descriptor for this attribute and we're done
6882 	fd = get_new_fd(&sAttributeOps, NULL, vnode.Get(), cookie, openMode, kernel);
6883 	if (fd >= 0) {
6884 		vnode.Detach();
6885 		return fd;
6886 	}
6887 
6888 	status = fd;
6889 
6890 	FS_CALL(vnode.Get(), close_attr, cookie);
6891 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6892 
6893 	return status;
6894 }
6895 
6896 
6897 static status_t
6898 attr_close(struct file_descriptor* descriptor)
6899 {
6900 	struct vnode* vnode = descriptor->u.vnode;
6901 
6902 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6903 
6904 	if (HAS_FS_CALL(vnode, close_attr))
6905 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6906 
6907 	return B_OK;
6908 }
6909 
6910 
6911 static void
6912 attr_free_fd(struct file_descriptor* descriptor)
6913 {
6914 	struct vnode* vnode = descriptor->u.vnode;
6915 
6916 	if (vnode != NULL) {
6917 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6918 		put_vnode(vnode);
6919 	}
6920 }
6921 
6922 
6923 static status_t
6924 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6925 	size_t* length)
6926 {
6927 	struct vnode* vnode = descriptor->u.vnode;
6928 
6929 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6930 		pos, length, *length));
6931 
6932 	if (!HAS_FS_CALL(vnode, read_attr))
6933 		return B_UNSUPPORTED;
6934 
6935 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6936 }
6937 
6938 
6939 static status_t
6940 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6941 	size_t* length)
6942 {
6943 	struct vnode* vnode = descriptor->u.vnode;
6944 
6945 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6946 		length));
6947 
6948 	if (!HAS_FS_CALL(vnode, write_attr))
6949 		return B_UNSUPPORTED;
6950 
6951 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6952 }
6953 
6954 
6955 static off_t
6956 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6957 {
6958 	off_t offset;
6959 
6960 	switch (seekType) {
6961 		case SEEK_SET:
6962 			offset = 0;
6963 			break;
6964 		case SEEK_CUR:
6965 			offset = descriptor->pos;
6966 			break;
6967 		case SEEK_END:
6968 		{
6969 			struct vnode* vnode = descriptor->u.vnode;
6970 			if (!HAS_FS_CALL(vnode, read_stat))
6971 				return B_UNSUPPORTED;
6972 
6973 			struct stat stat;
6974 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6975 				&stat);
6976 			if (status != B_OK)
6977 				return status;
6978 
6979 			offset = stat.st_size;
6980 			break;
6981 		}
6982 		default:
6983 			return B_BAD_VALUE;
6984 	}
6985 
6986 	// assumes off_t is 64 bits wide
6987 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6988 		return B_BUFFER_OVERFLOW;
6989 
6990 	pos += offset;
6991 	if (pos < 0)
6992 		return B_BAD_VALUE;
6993 
6994 	return descriptor->pos = pos;
6995 }
6996 
6997 
6998 static status_t
6999 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7000 {
7001 	struct vnode* vnode = descriptor->u.vnode;
7002 
7003 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7004 
7005 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7006 		return B_UNSUPPORTED;
7007 
7008 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7009 }
7010 
7011 
7012 static status_t
7013 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7014 	int statMask)
7015 {
7016 	struct vnode* vnode = descriptor->u.vnode;
7017 
7018 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7019 
7020 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7021 		return B_READ_ONLY_DEVICE;
7022 
7023 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7024 }
7025 
7026 
7027 static status_t
7028 attr_remove(int fd, const char* name, bool kernel)
7029 {
7030 	if (name == NULL || *name == '\0')
7031 		return B_BAD_VALUE;
7032 
7033 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7034 		kernel));
7035 
7036 	struct vnode* vnode;
7037 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
7038 	if (!descriptor.IsSet())
7039 		return B_FILE_ERROR;
7040 
7041 	status_t status;
7042 	if (HAS_FS_CALL(vnode, remove_attr))
7043 		status = FS_CALL(vnode, remove_attr, name);
7044 	else
7045 		status = B_READ_ONLY_DEVICE;
7046 
7047 	return status;
7048 }
7049 
7050 
7051 static status_t
7052 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7053 	bool kernel)
7054 {
7055 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7056 		|| *toName == '\0')
7057 		return B_BAD_VALUE;
7058 
7059 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7060 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7061 
7062 	struct vnode* fromVnode;
7063 	FileDescriptorPutter fromDescriptor(get_fd_and_vnode(fromFD, &fromVnode, kernel));
7064 	if (!fromDescriptor.IsSet())
7065 		return B_FILE_ERROR;
7066 
7067 	struct vnode* toVnode;
7068 	FileDescriptorPutter toDescriptor(get_fd_and_vnode(toFD, &toVnode, kernel));
7069 	if (!toDescriptor.IsSet())
7070 		return B_FILE_ERROR;
7071 
7072 	// are the files on the same volume?
7073 	if (fromVnode->device != toVnode->device)
7074 		return B_CROSS_DEVICE_LINK;
7075 
7076 	status_t status;
7077 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7078 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7079 	} else
7080 		status = B_READ_ONLY_DEVICE;
7081 
7082 	return status;
7083 }
7084 
7085 
7086 static int
7087 index_dir_open(dev_t mountID, bool kernel)
7088 {
7089 	struct fs_mount* mount;
7090 	void* cookie;
7091 
7092 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7093 		kernel));
7094 
7095 	status_t status = get_mount(mountID, &mount);
7096 	if (status != B_OK)
7097 		return status;
7098 
7099 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7100 		status = B_UNSUPPORTED;
7101 		goto error;
7102 	}
7103 
7104 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7105 	if (status != B_OK)
7106 		goto error;
7107 
7108 	// get fd for the index directory
7109 	int fd;
7110 	fd = get_new_fd(&sIndexDirectoryOps, mount, NULL, cookie, O_CLOEXEC, kernel);
7111 	if (fd >= 0)
7112 		return fd;
7113 
7114 	// something went wrong
7115 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7116 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7117 
7118 	status = fd;
7119 
7120 error:
7121 	put_mount(mount);
7122 	return status;
7123 }
7124 
7125 
7126 static status_t
7127 index_dir_close(struct file_descriptor* descriptor)
7128 {
7129 	struct fs_mount* mount = descriptor->u.mount;
7130 
7131 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7132 
7133 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7134 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7135 
7136 	return B_OK;
7137 }
7138 
7139 
7140 static void
7141 index_dir_free_fd(struct file_descriptor* descriptor)
7142 {
7143 	struct fs_mount* mount = descriptor->u.mount;
7144 
7145 	if (mount != NULL) {
7146 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7147 		put_mount(mount);
7148 	}
7149 }
7150 
7151 
7152 static status_t
7153 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7154 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7155 {
7156 	struct fs_mount* mount = descriptor->u.mount;
7157 
7158 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7159 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7160 			bufferSize, _count);
7161 	}
7162 
7163 	return B_UNSUPPORTED;
7164 }
7165 
7166 
7167 static status_t
7168 index_dir_rewind(struct file_descriptor* descriptor)
7169 {
7170 	struct fs_mount* mount = descriptor->u.mount;
7171 
7172 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7173 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7174 
7175 	return B_UNSUPPORTED;
7176 }
7177 
7178 
7179 static status_t
7180 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7181 	bool kernel)
7182 {
7183 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7184 		mountID, name, kernel));
7185 
7186 	struct fs_mount* mount;
7187 	status_t status = get_mount(mountID, &mount);
7188 	if (status != B_OK)
7189 		return status;
7190 
7191 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7192 		status = B_READ_ONLY_DEVICE;
7193 		goto out;
7194 	}
7195 
7196 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7197 
7198 out:
7199 	put_mount(mount);
7200 	return status;
7201 }
7202 
7203 
7204 #if 0
7205 static status_t
7206 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7207 {
7208 	struct vnode* vnode = descriptor->u.vnode;
7209 
7210 	// ToDo: currently unused!
7211 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7212 	if (!HAS_FS_CALL(vnode, read_index_stat))
7213 		return B_UNSUPPORTED;
7214 
7215 	return B_UNSUPPORTED;
7216 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7217 }
7218 
7219 
7220 static void
7221 index_free_fd(struct file_descriptor* descriptor)
7222 {
7223 	struct vnode* vnode = descriptor->u.vnode;
7224 
7225 	if (vnode != NULL) {
7226 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7227 		put_vnode(vnode);
7228 	}
7229 }
7230 #endif
7231 
7232 
7233 static status_t
7234 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7235 	bool kernel)
7236 {
7237 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7238 		mountID, name, kernel));
7239 
7240 	struct fs_mount* mount;
7241 	status_t status = get_mount(mountID, &mount);
7242 	if (status != B_OK)
7243 		return status;
7244 
7245 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7246 		status = B_UNSUPPORTED;
7247 		goto out;
7248 	}
7249 
7250 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7251 
7252 out:
7253 	put_mount(mount);
7254 	return status;
7255 }
7256 
7257 
7258 static status_t
7259 index_remove(dev_t mountID, const char* name, bool kernel)
7260 {
7261 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7262 		mountID, name, kernel));
7263 
7264 	struct fs_mount* mount;
7265 	status_t status = get_mount(mountID, &mount);
7266 	if (status != B_OK)
7267 		return status;
7268 
7269 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7270 		status = B_READ_ONLY_DEVICE;
7271 		goto out;
7272 	}
7273 
7274 	status = FS_MOUNT_CALL(mount, remove_index, name);
7275 
7276 out:
7277 	put_mount(mount);
7278 	return status;
7279 }
7280 
7281 
7282 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7283 		It would be nice if the FS would find some more kernel support
7284 		for them.
7285 		For example, query parsing should be moved into the kernel.
7286 */
7287 static int
7288 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7289 	int32 token, bool kernel)
7290 {
7291 	struct fs_mount* mount;
7292 	void* cookie;
7293 
7294 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7295 		device, query, kernel));
7296 
7297 	status_t status = get_mount(device, &mount);
7298 	if (status != B_OK)
7299 		return status;
7300 
7301 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7302 		status = B_UNSUPPORTED;
7303 		goto error;
7304 	}
7305 
7306 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7307 		&cookie);
7308 	if (status != B_OK)
7309 		goto error;
7310 
7311 	// get fd for the index directory
7312 	int fd;
7313 	fd = get_new_fd(&sQueryOps, mount, NULL, cookie, O_CLOEXEC, kernel);
7314 	if (fd >= 0)
7315 		return fd;
7316 
7317 	status = fd;
7318 
7319 	// something went wrong
7320 	FS_MOUNT_CALL(mount, close_query, cookie);
7321 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7322 
7323 error:
7324 	put_mount(mount);
7325 	return status;
7326 }
7327 
7328 
7329 static status_t
7330 query_close(struct file_descriptor* descriptor)
7331 {
7332 	struct fs_mount* mount = descriptor->u.mount;
7333 
7334 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7335 
7336 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7337 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7338 
7339 	return B_OK;
7340 }
7341 
7342 
7343 static void
7344 query_free_fd(struct file_descriptor* descriptor)
7345 {
7346 	struct fs_mount* mount = descriptor->u.mount;
7347 
7348 	if (mount != NULL) {
7349 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7350 		put_mount(mount);
7351 	}
7352 }
7353 
7354 
7355 static status_t
7356 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7357 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7358 {
7359 	struct fs_mount* mount = descriptor->u.mount;
7360 
7361 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7362 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7363 			bufferSize, _count);
7364 	}
7365 
7366 	return B_UNSUPPORTED;
7367 }
7368 
7369 
7370 static status_t
7371 query_rewind(struct file_descriptor* descriptor)
7372 {
7373 	struct fs_mount* mount = descriptor->u.mount;
7374 
7375 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7376 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7377 
7378 	return B_UNSUPPORTED;
7379 }
7380 
7381 
7382 //	#pragma mark - General File System functions
7383 
7384 
7385 static dev_t
7386 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7387 	const char* args, bool kernel)
7388 {
7389 	struct ::fs_mount* mount;
7390 	status_t status = B_OK;
7391 	fs_volume* volume = NULL;
7392 	int32 layer = 0;
7393 	Vnode* coveredNode = NULL;
7394 
7395 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7396 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7397 
7398 	// The path is always safe, we just have to make sure that fsName is
7399 	// almost valid - we can't make any assumptions about args, though.
7400 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7401 	// We'll get it from the DDM later.
7402 	if (fsName == NULL) {
7403 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7404 			return B_BAD_VALUE;
7405 	} else if (fsName[0] == '\0')
7406 		return B_BAD_VALUE;
7407 
7408 	RecursiveLocker mountOpLocker(sMountOpLock);
7409 
7410 	// Helper to delete a newly created file device on failure.
7411 	// Not exactly beautiful, but helps to keep the code below cleaner.
7412 	struct FileDeviceDeleter {
7413 		FileDeviceDeleter() : id(-1) {}
7414 		~FileDeviceDeleter()
7415 		{
7416 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7417 		}
7418 
7419 		partition_id id;
7420 	} fileDeviceDeleter;
7421 
7422 	// If the file system is not a "virtual" one, the device argument should
7423 	// point to a real file/device (if given at all).
7424 	// get the partition
7425 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7426 	KPartition* partition = NULL;
7427 	KPath normalizedDevice;
7428 	bool newlyCreatedFileDevice = false;
7429 
7430 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7431 		// normalize the device path
7432 		status = normalizedDevice.SetTo(device, true);
7433 		if (status != B_OK)
7434 			return status;
7435 
7436 		// get a corresponding partition from the DDM
7437 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7438 		if (partition == NULL) {
7439 			// Partition not found: This either means, the user supplied
7440 			// an invalid path, or the path refers to an image file. We try
7441 			// to let the DDM create a file device for the path.
7442 			partition_id deviceID = ddm->CreateFileDevice(
7443 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7444 			if (deviceID >= 0) {
7445 				partition = ddm->RegisterPartition(deviceID);
7446 				if (newlyCreatedFileDevice)
7447 					fileDeviceDeleter.id = deviceID;
7448 			}
7449 		}
7450 
7451 		if (!partition) {
7452 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7453 				normalizedDevice.Path()));
7454 			return B_ENTRY_NOT_FOUND;
7455 		}
7456 
7457 		device = normalizedDevice.Path();
7458 			// correct path to file device
7459 	}
7460 	PartitionRegistrar partitionRegistrar(partition, true);
7461 
7462 	// Write lock the partition's device. For the time being, we keep the lock
7463 	// until we're done mounting -- not nice, but ensure, that no-one is
7464 	// interfering.
7465 	// TODO: Just mark the partition busy while mounting!
7466 	KDiskDevice* diskDevice = NULL;
7467 	if (partition) {
7468 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7469 		if (!diskDevice) {
7470 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7471 			return B_ERROR;
7472 		}
7473 	}
7474 
7475 	DeviceWriteLocker writeLocker(diskDevice, true);
7476 		// this takes over the write lock acquired before
7477 
7478 	if (partition != NULL) {
7479 		// make sure, that the partition is not busy
7480 		if (partition->IsBusy()) {
7481 			TRACE(("fs_mount(): Partition is busy.\n"));
7482 			return B_BUSY;
7483 		}
7484 
7485 		// if no FS name had been supplied, we get it from the partition
7486 		if (fsName == NULL) {
7487 			KDiskSystem* diskSystem = partition->DiskSystem();
7488 			if (!diskSystem) {
7489 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7490 					"recognize it.\n"));
7491 				return B_BAD_VALUE;
7492 			}
7493 
7494 			if (!diskSystem->IsFileSystem()) {
7495 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7496 					"partitioning system.\n"));
7497 				return B_BAD_VALUE;
7498 			}
7499 
7500 			// The disk system name will not change, and the KDiskSystem
7501 			// object will not go away while the disk device is locked (and
7502 			// the partition has a reference to it), so this is safe.
7503 			fsName = diskSystem->Name();
7504 		}
7505 	}
7506 
7507 	mount = new(std::nothrow) (struct ::fs_mount);
7508 	if (mount == NULL)
7509 		return B_NO_MEMORY;
7510 
7511 	mount->device_name = strdup(device);
7512 		// "device" can be NULL
7513 
7514 	status = mount->entry_cache.Init();
7515 	if (status != B_OK)
7516 		goto err1;
7517 
7518 	// initialize structure
7519 	mount->id = sNextMountID++;
7520 	mount->partition = NULL;
7521 	mount->root_vnode = NULL;
7522 	mount->covers_vnode = NULL;
7523 	mount->unmounting = false;
7524 	mount->owns_file_device = false;
7525 	mount->volume = NULL;
7526 
7527 	// build up the volume(s)
7528 	while (true) {
7529 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7530 		if (layerFSName == NULL) {
7531 			if (layer == 0) {
7532 				status = B_NO_MEMORY;
7533 				goto err1;
7534 			}
7535 
7536 			break;
7537 		}
7538 		MemoryDeleter layerFSNameDeleter(layerFSName);
7539 
7540 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7541 		if (volume == NULL) {
7542 			status = B_NO_MEMORY;
7543 			goto err1;
7544 		}
7545 
7546 		volume->id = mount->id;
7547 		volume->partition = partition != NULL ? partition->ID() : -1;
7548 		volume->layer = layer++;
7549 		volume->private_volume = NULL;
7550 		volume->ops = NULL;
7551 		volume->sub_volume = NULL;
7552 		volume->super_volume = NULL;
7553 		volume->file_system = NULL;
7554 		volume->file_system_name = NULL;
7555 
7556 		volume->file_system_name = get_file_system_name(layerFSName);
7557 		if (volume->file_system_name == NULL) {
7558 			status = B_NO_MEMORY;
7559 			free(volume);
7560 			goto err1;
7561 		}
7562 
7563 		volume->file_system = get_file_system(layerFSName);
7564 		if (volume->file_system == NULL) {
7565 			status = B_DEVICE_NOT_FOUND;
7566 			free(volume->file_system_name);
7567 			free(volume);
7568 			goto err1;
7569 		}
7570 
7571 		if (mount->volume == NULL)
7572 			mount->volume = volume;
7573 		else {
7574 			volume->super_volume = mount->volume;
7575 			mount->volume->sub_volume = volume;
7576 			mount->volume = volume;
7577 		}
7578 	}
7579 
7580 	// insert mount struct into list before we call FS's mount() function
7581 	// so that vnodes can be created for this mount
7582 	rw_lock_write_lock(&sMountLock);
7583 	sMountsTable->Insert(mount);
7584 	rw_lock_write_unlock(&sMountLock);
7585 
7586 	ino_t rootID;
7587 
7588 	if (!sRoot) {
7589 		// we haven't mounted anything yet
7590 		if (strcmp(path, "/") != 0) {
7591 			status = B_ERROR;
7592 			goto err2;
7593 		}
7594 
7595 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7596 			args, &rootID);
7597 		if (status != B_OK || mount->volume->ops == NULL)
7598 			goto err2;
7599 	} else {
7600 		{
7601 			VnodePutter temp;
7602 			status = path_to_vnode(path, true, temp, NULL, kernel);
7603 			coveredNode = temp.Detach();
7604 		}
7605 		if (status != B_OK)
7606 			goto err2;
7607 
7608 		mount->covers_vnode = coveredNode;
7609 
7610 		// make sure covered_vnode is a directory
7611 		if (!S_ISDIR(coveredNode->Type())) {
7612 			status = B_NOT_A_DIRECTORY;
7613 			goto err3;
7614 		}
7615 
7616 		if (coveredNode->IsCovered()) {
7617 			// this is already a covered vnode
7618 			status = B_BUSY;
7619 			goto err3;
7620 		}
7621 
7622 		// mount it/them
7623 		fs_volume* volume = mount->volume;
7624 		while (volume) {
7625 			status = volume->file_system->mount(volume, device, flags, args,
7626 				&rootID);
7627 			if (status != B_OK || volume->ops == NULL) {
7628 				if (status == B_OK && volume->ops == NULL)
7629 					panic("fs_mount: mount() succeeded but ops is NULL!");
7630 				if (volume->sub_volume)
7631 					goto err4;
7632 				goto err3;
7633 			}
7634 
7635 			volume = volume->super_volume;
7636 		}
7637 
7638 		volume = mount->volume;
7639 		while (volume) {
7640 			if (volume->ops->all_layers_mounted != NULL)
7641 				volume->ops->all_layers_mounted(volume);
7642 			volume = volume->super_volume;
7643 		}
7644 	}
7645 
7646 	// the root node is supposed to be owned by the file system - it must
7647 	// exist at this point
7648 	rw_lock_write_lock(&sVnodeLock);
7649 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7650 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7651 		panic("fs_mount: file system does not own its root node!\n");
7652 		status = B_ERROR;
7653 		rw_lock_write_unlock(&sVnodeLock);
7654 		goto err4;
7655 	}
7656 
7657 	// set up the links between the root vnode and the vnode it covers
7658 	if (coveredNode != NULL) {
7659 		if (coveredNode->IsCovered()) {
7660 			// the vnode is covered now
7661 			status = B_BUSY;
7662 			rw_lock_write_unlock(&sVnodeLock);
7663 			goto err4;
7664 		}
7665 
7666 		mount->root_vnode->covers = coveredNode;
7667 		mount->root_vnode->SetCovering(true);
7668 
7669 		coveredNode->covered_by = mount->root_vnode;
7670 		coveredNode->SetCovered(true);
7671 	}
7672 	rw_lock_write_unlock(&sVnodeLock);
7673 
7674 	if (!sRoot) {
7675 		sRoot = mount->root_vnode;
7676 		mutex_lock(&sIOContextRootLock);
7677 		get_current_io_context(true)->root = sRoot;
7678 		mutex_unlock(&sIOContextRootLock);
7679 		inc_vnode_ref_count(sRoot);
7680 	}
7681 
7682 	// supply the partition (if any) with the mount cookie and mark it mounted
7683 	if (partition) {
7684 		partition->SetMountCookie(mount->volume->private_volume);
7685 		partition->SetVolumeID(mount->id);
7686 
7687 		// keep a partition reference as long as the partition is mounted
7688 		partitionRegistrar.Detach();
7689 		mount->partition = partition;
7690 		mount->owns_file_device = newlyCreatedFileDevice;
7691 		fileDeviceDeleter.id = -1;
7692 	}
7693 
7694 	notify_mount(mount->id,
7695 		coveredNode != NULL ? coveredNode->device : -1,
7696 		coveredNode ? coveredNode->id : -1);
7697 
7698 	return mount->id;
7699 
7700 err4:
7701 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7702 err3:
7703 	if (coveredNode != NULL)
7704 		put_vnode(coveredNode);
7705 err2:
7706 	rw_lock_write_lock(&sMountLock);
7707 	sMountsTable->Remove(mount);
7708 	rw_lock_write_unlock(&sMountLock);
7709 err1:
7710 	delete mount;
7711 
7712 	return status;
7713 }
7714 
7715 
7716 static status_t
7717 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7718 {
7719 	struct fs_mount* mount;
7720 	status_t err;
7721 
7722 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7723 		mountID, kernel));
7724 
7725 	VnodePutter pathVnode;
7726 	if (path != NULL) {
7727 		err = path_to_vnode(path, true, pathVnode, NULL, kernel);
7728 		if (err != B_OK)
7729 			return B_ENTRY_NOT_FOUND;
7730 	}
7731 
7732 	RecursiveLocker mountOpLocker(sMountOpLock);
7733 	ReadLocker mountLocker(sMountLock);
7734 
7735 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7736 	if (mount == NULL) {
7737 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7738 			pathVnode.Get());
7739 	}
7740 
7741 	mountLocker.Unlock();
7742 
7743 	if (path != NULL) {
7744 		if (mount->root_vnode != pathVnode.Get()) {
7745 			// not mountpoint
7746 			return B_BAD_VALUE;
7747 		}
7748 
7749 		pathVnode.Unset();
7750 	}
7751 
7752 	// if the volume is associated with a partition, lock the device of the
7753 	// partition as long as we are unmounting
7754 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7755 	KPartition* partition = mount->partition;
7756 	KDiskDevice* diskDevice = NULL;
7757 	if (partition != NULL) {
7758 		if (partition->Device() == NULL) {
7759 			dprintf("fs_unmount(): There is no device!\n");
7760 			return B_ERROR;
7761 		}
7762 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7763 		if (!diskDevice) {
7764 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7765 			return B_ERROR;
7766 		}
7767 	}
7768 	DeviceWriteLocker writeLocker(diskDevice, true);
7769 
7770 	// make sure, that the partition is not busy
7771 	if (partition != NULL) {
7772 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7773 			dprintf("fs_unmount(): Partition is busy.\n");
7774 			return B_BUSY;
7775 		}
7776 	}
7777 
7778 	// grab the vnode master mutex to keep someone from creating
7779 	// a vnode while we're figuring out if we can continue
7780 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7781 
7782 	bool disconnectedDescriptors = false;
7783 
7784 	while (true) {
7785 		bool busy = false;
7786 
7787 		// cycle through the list of vnodes associated with this mount and
7788 		// make sure all of them are not busy or have refs on them
7789 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7790 		while (struct vnode* vnode = iterator.Next()) {
7791 			if (vnode->IsBusy()) {
7792 				dprintf("fs_unmount(): inode %" B_PRIdINO " is busy\n", vnode->id);
7793 				busy = true;
7794 				break;
7795 			}
7796 
7797 			// check the vnode's ref count -- subtract additional references for
7798 			// covering
7799 			int32 refCount = vnode->ref_count;
7800 			if (vnode->covers != NULL)
7801 				refCount--;
7802 			if (vnode->covered_by != NULL)
7803 				refCount--;
7804 
7805 			if (refCount != 0) {
7806 				dprintf("fs_unmount(): inode %" B_PRIdINO " is still referenced\n", vnode->id);
7807 				// there are still vnodes in use on this mount, so we cannot
7808 				// unmount yet
7809 				busy = true;
7810 				break;
7811 			}
7812 		}
7813 
7814 		if (!busy)
7815 			break;
7816 
7817 		if ((flags & B_FORCE_UNMOUNT) == 0)
7818 			return B_BUSY;
7819 
7820 		if (disconnectedDescriptors) {
7821 			// wait a bit until the last access is finished, and then try again
7822 			vnodesWriteLocker.Unlock();
7823 			snooze(100000);
7824 			// TODO: if there is some kind of bug that prevents the ref counts
7825 			// from getting back to zero, this will fall into an endless loop...
7826 			vnodesWriteLocker.Lock();
7827 			continue;
7828 		}
7829 
7830 		// the file system is still busy - but we're forced to unmount it,
7831 		// so let's disconnect all open file descriptors
7832 
7833 		mount->unmounting = true;
7834 			// prevent new vnodes from being created
7835 
7836 		vnodesWriteLocker.Unlock();
7837 
7838 		disconnect_mount_or_vnode_fds(mount, NULL);
7839 		disconnectedDescriptors = true;
7840 
7841 		vnodesWriteLocker.Lock();
7842 	}
7843 
7844 	// We can safely continue. Mark all of the vnodes busy and this mount
7845 	// structure in unmounting state. Also undo the vnode covers/covered_by
7846 	// links.
7847 	mount->unmounting = true;
7848 
7849 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7850 	while (struct vnode* vnode = iterator.Next()) {
7851 		// Remove all covers/covered_by links from other mounts' nodes to this
7852 		// vnode and adjust the node ref count accordingly. We will release the
7853 		// references to the external vnodes below.
7854 		if (Vnode* coveredNode = vnode->covers) {
7855 			if (Vnode* coveringNode = vnode->covered_by) {
7856 				// We have both covered and covering vnodes, so just remove us
7857 				// from the chain.
7858 				coveredNode->covered_by = coveringNode;
7859 				coveringNode->covers = coveredNode;
7860 				vnode->ref_count -= 2;
7861 
7862 				vnode->covered_by = NULL;
7863 				vnode->covers = NULL;
7864 				vnode->SetCovering(false);
7865 				vnode->SetCovered(false);
7866 			} else {
7867 				// We only have a covered vnode. Remove its link to us.
7868 				coveredNode->covered_by = NULL;
7869 				coveredNode->SetCovered(false);
7870 				vnode->ref_count--;
7871 
7872 				// If the other node is an external vnode, we keep its link
7873 				// link around so we can put the reference later on. Otherwise
7874 				// we get rid of it right now.
7875 				if (coveredNode->mount == mount) {
7876 					vnode->covers = NULL;
7877 					coveredNode->ref_count--;
7878 				}
7879 			}
7880 		} else if (Vnode* coveringNode = vnode->covered_by) {
7881 			// We only have a covering vnode. Remove its link to us.
7882 			coveringNode->covers = NULL;
7883 			coveringNode->SetCovering(false);
7884 			vnode->ref_count--;
7885 
7886 			// If the other node is an external vnode, we keep its link
7887 			// link around so we can put the reference later on. Otherwise
7888 			// we get rid of it right now.
7889 			if (coveringNode->mount == mount) {
7890 				vnode->covered_by = NULL;
7891 				coveringNode->ref_count--;
7892 			}
7893 		}
7894 
7895 		vnode->SetBusy(true);
7896 		vnode_to_be_freed(vnode);
7897 	}
7898 
7899 	vnodesWriteLocker.Unlock();
7900 
7901 	// Free all vnodes associated with this mount.
7902 	// They will be removed from the mount list by free_vnode(), so
7903 	// we don't have to do this.
7904 	while (struct vnode* vnode = mount->vnodes.Head()) {
7905 		// Put the references to external covered/covering vnodes we kept above.
7906 		if (Vnode* coveredNode = vnode->covers)
7907 			put_vnode(coveredNode);
7908 		if (Vnode* coveringNode = vnode->covered_by)
7909 			put_vnode(coveringNode);
7910 
7911 		free_vnode(vnode, false);
7912 	}
7913 
7914 	// remove the mount structure from the hash table
7915 	rw_lock_write_lock(&sMountLock);
7916 	sMountsTable->Remove(mount);
7917 	rw_lock_write_unlock(&sMountLock);
7918 
7919 	mountOpLocker.Unlock();
7920 
7921 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7922 	notify_unmount(mount->id);
7923 
7924 	// dereference the partition and mark it unmounted
7925 	if (partition) {
7926 		partition->SetVolumeID(-1);
7927 		partition->SetMountCookie(NULL);
7928 
7929 		if (mount->owns_file_device)
7930 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7931 		partition->Unregister();
7932 	}
7933 
7934 	delete mount;
7935 	return B_OK;
7936 }
7937 
7938 
7939 static status_t
7940 fs_sync(dev_t device)
7941 {
7942 	struct fs_mount* mount;
7943 	status_t status = get_mount(device, &mount);
7944 	if (status != B_OK)
7945 		return status;
7946 
7947 	struct vnode marker;
7948 	memset(&marker, 0, sizeof(marker));
7949 	marker.SetBusy(true);
7950 	marker.SetRemoved(true);
7951 
7952 	// First, synchronize all file caches
7953 
7954 	while (true) {
7955 		WriteLocker locker(sVnodeLock);
7956 			// Note: That's the easy way. Which is probably OK for sync(),
7957 			// since it's a relatively rare call and doesn't need to allow for
7958 			// a lot of concurrency. Using a read lock would be possible, but
7959 			// also more involved, since we had to lock the individual nodes
7960 			// and take care of the locking order, which we might not want to
7961 			// do while holding fs_mount::lock.
7962 
7963 		// synchronize access to vnode list
7964 		mutex_lock(&mount->lock);
7965 
7966 		struct vnode* vnode;
7967 		if (!marker.IsRemoved()) {
7968 			vnode = mount->vnodes.GetNext(&marker);
7969 			mount->vnodes.Remove(&marker);
7970 			marker.SetRemoved(true);
7971 		} else
7972 			vnode = mount->vnodes.First();
7973 
7974 		while (vnode != NULL && (vnode->cache == NULL
7975 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7976 			// TODO: we could track writes (and writable mapped vnodes)
7977 			//	and have a simple flag that we could test for here
7978 			vnode = mount->vnodes.GetNext(vnode);
7979 		}
7980 
7981 		if (vnode != NULL) {
7982 			// insert marker vnode again
7983 			mount->vnodes.InsertBefore(mount->vnodes.GetNext(vnode), &marker);
7984 			marker.SetRemoved(false);
7985 		}
7986 
7987 		mutex_unlock(&mount->lock);
7988 
7989 		if (vnode == NULL)
7990 			break;
7991 
7992 		vnode = lookup_vnode(mount->id, vnode->id);
7993 		if (vnode == NULL || vnode->IsBusy())
7994 			continue;
7995 
7996 		if (vnode->ref_count == 0) {
7997 			// this vnode has been unused before
7998 			vnode_used(vnode);
7999 		}
8000 		inc_vnode_ref_count(vnode);
8001 
8002 		locker.Unlock();
8003 
8004 		if (vnode->cache != NULL && !vnode->IsRemoved())
8005 			vnode->cache->WriteModified();
8006 
8007 		put_vnode(vnode);
8008 	}
8009 
8010 	// Let the file systems do their synchronizing work
8011 	if (HAS_FS_MOUNT_CALL(mount, sync))
8012 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8013 
8014 	// Finally, flush the underlying device's write cache (if possible.)
8015 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8016 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8017 
8018 	put_mount(mount);
8019 	return status;
8020 }
8021 
8022 
8023 static status_t
8024 fs_read_info(dev_t device, struct fs_info* info)
8025 {
8026 	struct fs_mount* mount;
8027 	status_t status = get_mount(device, &mount);
8028 	if (status != B_OK)
8029 		return status;
8030 
8031 	memset(info, 0, sizeof(struct fs_info));
8032 
8033 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8034 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8035 
8036 	// fill in info the file system doesn't (have to) know about
8037 	if (status == B_OK) {
8038 		info->dev = mount->id;
8039 		info->root = mount->root_vnode->id;
8040 
8041 		fs_volume* volume = mount->volume;
8042 		while (volume->super_volume != NULL)
8043 			volume = volume->super_volume;
8044 
8045 		strlcpy(info->fsh_name, volume->file_system_name,
8046 			sizeof(info->fsh_name));
8047 		if (mount->device_name != NULL) {
8048 			strlcpy(info->device_name, mount->device_name,
8049 				sizeof(info->device_name));
8050 		}
8051 	}
8052 
8053 	// if the call is not supported by the file system, there are still
8054 	// the parts that we filled out ourselves
8055 
8056 	put_mount(mount);
8057 	return status;
8058 }
8059 
8060 
8061 static status_t
8062 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8063 {
8064 	struct fs_mount* mount;
8065 	status_t status = get_mount(device, &mount);
8066 	if (status != B_OK)
8067 		return status;
8068 
8069 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8070 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8071 	else
8072 		status = B_READ_ONLY_DEVICE;
8073 
8074 	put_mount(mount);
8075 	return status;
8076 }
8077 
8078 
8079 static dev_t
8080 fs_next_device(int32* _cookie)
8081 {
8082 	struct fs_mount* mount = NULL;
8083 	dev_t device = *_cookie;
8084 
8085 	rw_lock_read_lock(&sMountLock);
8086 
8087 	// Since device IDs are assigned sequentially, this algorithm
8088 	// does work good enough. It makes sure that the device list
8089 	// returned is sorted, and that no device is skipped when an
8090 	// already visited device got unmounted.
8091 
8092 	while (device < sNextMountID) {
8093 		mount = find_mount(device++);
8094 		if (mount != NULL && mount->volume->private_volume != NULL)
8095 			break;
8096 	}
8097 
8098 	*_cookie = device;
8099 
8100 	if (mount != NULL)
8101 		device = mount->id;
8102 	else
8103 		device = B_BAD_VALUE;
8104 
8105 	rw_lock_read_unlock(&sMountLock);
8106 
8107 	return device;
8108 }
8109 
8110 
8111 ssize_t
8112 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8113 	void *buffer, size_t readBytes)
8114 {
8115 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8116 	if (attrFD < 0)
8117 		return attrFD;
8118 
8119 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8120 
8121 	_kern_close(attrFD);
8122 
8123 	return bytesRead;
8124 }
8125 
8126 
8127 static status_t
8128 get_cwd(char* buffer, size_t size, bool kernel)
8129 {
8130 	// Get current working directory from io context
8131 	struct io_context* context = get_current_io_context(kernel);
8132 	status_t status;
8133 
8134 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8135 
8136 	mutex_lock(&context->io_mutex);
8137 
8138 	struct vnode* vnode = context->cwd;
8139 	if (vnode)
8140 		inc_vnode_ref_count(vnode);
8141 
8142 	mutex_unlock(&context->io_mutex);
8143 
8144 	if (vnode) {
8145 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8146 		put_vnode(vnode);
8147 	} else
8148 		status = B_ERROR;
8149 
8150 	return status;
8151 }
8152 
8153 
8154 static status_t
8155 set_cwd(int fd, char* path, bool kernel)
8156 {
8157 	struct io_context* context;
8158 	struct vnode* oldDirectory;
8159 
8160 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8161 
8162 	// Get vnode for passed path, and bail if it failed
8163 	VnodePutter vnode;
8164 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
8165 	if (status < 0)
8166 		return status;
8167 
8168 	if (!S_ISDIR(vnode->Type())) {
8169 		// nope, can't cwd to here
8170 		return B_NOT_A_DIRECTORY;
8171 	}
8172 
8173 	// We need to have the permission to enter the directory, too
8174 	if (HAS_FS_CALL(vnode, access)) {
8175 		status = FS_CALL(vnode.Get(), access, X_OK);
8176 		if (status != B_OK)
8177 			return status;
8178 	}
8179 
8180 	// Get current io context and lock
8181 	context = get_current_io_context(kernel);
8182 	mutex_lock(&context->io_mutex);
8183 
8184 	// save the old current working directory first
8185 	oldDirectory = context->cwd;
8186 	context->cwd = vnode.Detach();
8187 
8188 	mutex_unlock(&context->io_mutex);
8189 
8190 	if (oldDirectory)
8191 		put_vnode(oldDirectory);
8192 
8193 	return B_NO_ERROR;
8194 }
8195 
8196 
8197 static status_t
8198 user_copy_name(char* to, const char* from, size_t length)
8199 {
8200 	ssize_t len = user_strlcpy(to, from, length);
8201 	if (len < 0)
8202 		return len;
8203 	if (len >= (ssize_t)length)
8204 		return B_NAME_TOO_LONG;
8205 	return B_OK;
8206 }
8207 
8208 
8209 //	#pragma mark - kernel mirrored syscalls
8210 
8211 
8212 dev_t
8213 _kern_mount(const char* path, const char* device, const char* fsName,
8214 	uint32 flags, const char* args, size_t argsLength)
8215 {
8216 	KPath pathBuffer(path);
8217 	if (pathBuffer.InitCheck() != B_OK)
8218 		return B_NO_MEMORY;
8219 
8220 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8221 }
8222 
8223 
8224 status_t
8225 _kern_unmount(const char* path, uint32 flags)
8226 {
8227 	KPath pathBuffer(path);
8228 	if (pathBuffer.InitCheck() != B_OK)
8229 		return B_NO_MEMORY;
8230 
8231 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8232 }
8233 
8234 
8235 status_t
8236 _kern_read_fs_info(dev_t device, struct fs_info* info)
8237 {
8238 	if (info == NULL)
8239 		return B_BAD_VALUE;
8240 
8241 	return fs_read_info(device, info);
8242 }
8243 
8244 
8245 status_t
8246 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8247 {
8248 	if (info == NULL)
8249 		return B_BAD_VALUE;
8250 
8251 	return fs_write_info(device, info, mask);
8252 }
8253 
8254 
8255 status_t
8256 _kern_sync(void)
8257 {
8258 	// Note: _kern_sync() is also called from _user_sync()
8259 	int32 cookie = 0;
8260 	dev_t device;
8261 	while ((device = next_dev(&cookie)) >= 0) {
8262 		status_t status = fs_sync(device);
8263 		if (status != B_OK && status != B_BAD_VALUE) {
8264 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8265 				strerror(status));
8266 		}
8267 	}
8268 
8269 	return B_OK;
8270 }
8271 
8272 
8273 dev_t
8274 _kern_next_device(int32* _cookie)
8275 {
8276 	return fs_next_device(_cookie);
8277 }
8278 
8279 
8280 status_t
8281 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8282 	size_t infoSize)
8283 {
8284 	if (infoSize != sizeof(fd_info))
8285 		return B_BAD_VALUE;
8286 
8287 	// get the team
8288 	Team* team = Team::Get(teamID);
8289 	if (team == NULL)
8290 		return B_BAD_TEAM_ID;
8291 	BReference<Team> teamReference(team, true);
8292 
8293 	// now that we have a team reference, its I/O context won't go away
8294 	io_context* context = team->io_context;
8295 	MutexLocker contextLocker(context->io_mutex);
8296 
8297 	uint32 slot = *_cookie;
8298 
8299 	struct file_descriptor* descriptor;
8300 	while (slot < context->table_size
8301 		&& (descriptor = context->fds[slot]) == NULL) {
8302 		slot++;
8303 	}
8304 
8305 	if (slot >= context->table_size)
8306 		return B_ENTRY_NOT_FOUND;
8307 
8308 	info->number = slot;
8309 	info->open_mode = descriptor->open_mode;
8310 
8311 	struct vnode* vnode = fd_vnode(descriptor);
8312 	if (vnode != NULL) {
8313 		info->device = vnode->device;
8314 		info->node = vnode->id;
8315 	} else if (descriptor->u.mount != NULL) {
8316 		info->device = descriptor->u.mount->id;
8317 		info->node = -1;
8318 	}
8319 
8320 	*_cookie = slot + 1;
8321 	return B_OK;
8322 }
8323 
8324 
8325 int
8326 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8327 	int perms)
8328 {
8329 	if ((openMode & O_CREAT) != 0) {
8330 		return file_create_entry_ref(device, inode, name, openMode, perms,
8331 			true);
8332 	}
8333 
8334 	return file_open_entry_ref(device, inode, name, openMode, true);
8335 }
8336 
8337 
8338 /*!	\brief Opens a node specified by a FD + path pair.
8339 
8340 	At least one of \a fd and \a path must be specified.
8341 	If only \a fd is given, the function opens the node identified by this
8342 	FD. If only a path is given, this path is opened. If both are given and
8343 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8344 	of the directory (!) identified by \a fd.
8345 
8346 	\param fd The FD. May be < 0.
8347 	\param path The absolute or relative path. May be \c NULL.
8348 	\param openMode The open mode.
8349 	\return A FD referring to the newly opened node, or an error code,
8350 			if an error occurs.
8351 */
8352 int
8353 _kern_open(int fd, const char* path, int openMode, int perms)
8354 {
8355 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8356 	if (pathBuffer.InitCheck() != B_OK)
8357 		return B_NO_MEMORY;
8358 
8359 	if ((openMode & O_CREAT) != 0)
8360 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8361 
8362 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8363 }
8364 
8365 
8366 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8367 
8368 	The supplied name may be \c NULL, in which case directory identified
8369 	by \a device and \a inode will be opened. Otherwise \a device and
8370 	\a inode identify the parent directory of the directory to be opened
8371 	and \a name its entry name.
8372 
8373 	\param device If \a name is specified the ID of the device the parent
8374 		   directory of the directory to be opened resides on, otherwise
8375 		   the device of the directory itself.
8376 	\param inode If \a name is specified the node ID of the parent
8377 		   directory of the directory to be opened, otherwise node ID of the
8378 		   directory itself.
8379 	\param name The entry name of the directory to be opened. If \c NULL,
8380 		   the \a device + \a inode pair identify the node to be opened.
8381 	\return The FD of the newly opened directory or an error code, if
8382 			something went wrong.
8383 */
8384 int
8385 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8386 {
8387 	return dir_open_entry_ref(device, inode, name, true);
8388 }
8389 
8390 
8391 /*!	\brief Opens a directory specified by a FD + path pair.
8392 
8393 	At least one of \a fd and \a path must be specified.
8394 	If only \a fd is given, the function opens the directory identified by this
8395 	FD. If only a path is given, this path is opened. If both are given and
8396 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8397 	of the directory (!) identified by \a fd.
8398 
8399 	\param fd The FD. May be < 0.
8400 	\param path The absolute or relative path. May be \c NULL.
8401 	\return A FD referring to the newly opened directory, or an error code,
8402 			if an error occurs.
8403 */
8404 int
8405 _kern_open_dir(int fd, const char* path)
8406 {
8407 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8408 	if (pathBuffer.InitCheck() != B_OK)
8409 		return B_NO_MEMORY;
8410 
8411 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8412 }
8413 
8414 
8415 status_t
8416 _kern_fcntl(int fd, int op, size_t argument)
8417 {
8418 	return common_fcntl(fd, op, argument, true);
8419 }
8420 
8421 
8422 status_t
8423 _kern_fsync(int fd)
8424 {
8425 	return common_sync(fd, true);
8426 }
8427 
8428 
8429 status_t
8430 _kern_lock_node(int fd)
8431 {
8432 	return common_lock_node(fd, true);
8433 }
8434 
8435 
8436 status_t
8437 _kern_unlock_node(int fd)
8438 {
8439 	return common_unlock_node(fd, true);
8440 }
8441 
8442 
8443 status_t
8444 _kern_preallocate(int fd, off_t offset, off_t length)
8445 {
8446 	return common_preallocate(fd, offset, length, true);
8447 }
8448 
8449 
8450 status_t
8451 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8452 	int perms)
8453 {
8454 	return dir_create_entry_ref(device, inode, name, perms, true);
8455 }
8456 
8457 
8458 /*!	\brief Creates a directory specified by a FD + path pair.
8459 
8460 	\a path must always be specified (it contains the name of the new directory
8461 	at least). If only a path is given, this path identifies the location at
8462 	which the directory shall be created. If both \a fd and \a path are given
8463 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8464 	of the directory (!) identified by \a fd.
8465 
8466 	\param fd The FD. May be < 0.
8467 	\param path The absolute or relative path. Must not be \c NULL.
8468 	\param perms The access permissions the new directory shall have.
8469 	\return \c B_OK, if the directory has been created successfully, another
8470 			error code otherwise.
8471 */
8472 status_t
8473 _kern_create_dir(int fd, const char* path, int perms)
8474 {
8475 	KPath pathBuffer(path, KPath::DEFAULT);
8476 	if (pathBuffer.InitCheck() != B_OK)
8477 		return B_NO_MEMORY;
8478 
8479 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8480 }
8481 
8482 
8483 status_t
8484 _kern_remove_dir(int fd, const char* path)
8485 {
8486 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8487 	if (pathBuffer.InitCheck() != B_OK)
8488 		return B_NO_MEMORY;
8489 
8490 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8491 }
8492 
8493 
8494 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8495 
8496 	At least one of \a fd and \a path must be specified.
8497 	If only \a fd is given, the function the symlink to be read is the node
8498 	identified by this FD. If only a path is given, this path identifies the
8499 	symlink to be read. If both are given and the path is absolute, \a fd is
8500 	ignored; a relative path is reckoned off of the directory (!) identified
8501 	by \a fd.
8502 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8503 	will still be updated to reflect the required buffer size.
8504 
8505 	\param fd The FD. May be < 0.
8506 	\param path The absolute or relative path. May be \c NULL.
8507 	\param buffer The buffer into which the contents of the symlink shall be
8508 		   written.
8509 	\param _bufferSize A pointer to the size of the supplied buffer.
8510 	\return The length of the link on success or an appropriate error code
8511 */
8512 status_t
8513 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8514 {
8515 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8516 	if (pathBuffer.InitCheck() != B_OK)
8517 		return B_NO_MEMORY;
8518 
8519 	return common_read_link(fd, pathBuffer.LockBuffer(),
8520 		buffer, _bufferSize, true);
8521 }
8522 
8523 
8524 /*!	\brief Creates a symlink specified by a FD + path pair.
8525 
8526 	\a path must always be specified (it contains the name of the new symlink
8527 	at least). If only a path is given, this path identifies the location at
8528 	which the symlink shall be created. If both \a fd and \a path are given and
8529 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8530 	of the directory (!) identified by \a fd.
8531 
8532 	\param fd The FD. May be < 0.
8533 	\param toPath The absolute or relative path. Must not be \c NULL.
8534 	\param mode The access permissions the new symlink shall have.
8535 	\return \c B_OK, if the symlink has been created successfully, another
8536 			error code otherwise.
8537 */
8538 status_t
8539 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8540 {
8541 	KPath pathBuffer(path);
8542 	if (pathBuffer.InitCheck() != B_OK)
8543 		return B_NO_MEMORY;
8544 
8545 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8546 		toPath, mode, true);
8547 }
8548 
8549 
8550 status_t
8551 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8552 	bool traverseLeafLink)
8553 {
8554 	KPath pathBuffer(path);
8555 	KPath toPathBuffer(toPath);
8556 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8557 		return B_NO_MEMORY;
8558 
8559 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8560 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8561 }
8562 
8563 
8564 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8565 
8566 	\a path must always be specified (it contains at least the name of the entry
8567 	to be deleted). If only a path is given, this path identifies the entry
8568 	directly. If both \a fd and \a path are given and the path is absolute,
8569 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8570 	identified by \a fd.
8571 
8572 	\param fd The FD. May be < 0.
8573 	\param path The absolute or relative path. Must not be \c NULL.
8574 	\return \c B_OK, if the entry has been removed successfully, another
8575 			error code otherwise.
8576 */
8577 status_t
8578 _kern_unlink(int fd, const char* path)
8579 {
8580 	KPath pathBuffer(path);
8581 	if (pathBuffer.InitCheck() != B_OK)
8582 		return B_NO_MEMORY;
8583 
8584 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8585 }
8586 
8587 
8588 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8589 		   by another FD + path pair.
8590 
8591 	\a oldPath and \a newPath must always be specified (they contain at least
8592 	the name of the entry). If only a path is given, this path identifies the
8593 	entry directly. If both a FD and a path are given and the path is absolute,
8594 	the FD is ignored; a relative path is reckoned off of the directory (!)
8595 	identified by the respective FD.
8596 
8597 	\param oldFD The FD of the old location. May be < 0.
8598 	\param oldPath The absolute or relative path of the old location. Must not
8599 		   be \c NULL.
8600 	\param newFD The FD of the new location. May be < 0.
8601 	\param newPath The absolute or relative path of the new location. Must not
8602 		   be \c NULL.
8603 	\return \c B_OK, if the entry has been moved successfully, another
8604 			error code otherwise.
8605 */
8606 status_t
8607 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8608 {
8609 	KPath oldPathBuffer(oldPath);
8610 	KPath newPathBuffer(newPath);
8611 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8612 		return B_NO_MEMORY;
8613 
8614 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8615 		newFD, newPathBuffer.LockBuffer(), true);
8616 }
8617 
8618 
8619 status_t
8620 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8621 {
8622 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8623 	if (pathBuffer.InitCheck() != B_OK)
8624 		return B_NO_MEMORY;
8625 
8626 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8627 		true);
8628 }
8629 
8630 
8631 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8632 
8633 	If only \a fd is given, the stat operation associated with the type
8634 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8635 	given, this path identifies the entry for whose node to retrieve the
8636 	stat data. If both \a fd and \a path are given and the path is absolute,
8637 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8638 	identified by \a fd and specifies the entry whose stat data shall be
8639 	retrieved.
8640 
8641 	\param fd The FD. May be < 0.
8642 	\param path The absolute or relative path. Must not be \c NULL.
8643 	\param traverseLeafLink If \a path is given, \c true specifies that the
8644 		   function shall not stick to symlinks, but traverse them.
8645 	\param stat The buffer the stat data shall be written into.
8646 	\param statSize The size of the supplied stat buffer.
8647 	\return \c B_OK, if the the stat data have been read successfully, another
8648 			error code otherwise.
8649 */
8650 status_t
8651 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8652 	struct stat* stat, size_t statSize)
8653 {
8654 	struct stat completeStat;
8655 	struct stat* originalStat = NULL;
8656 	status_t status;
8657 
8658 	if (statSize > sizeof(struct stat))
8659 		return B_BAD_VALUE;
8660 
8661 	// this supports different stat extensions
8662 	if (statSize < sizeof(struct stat)) {
8663 		originalStat = stat;
8664 		stat = &completeStat;
8665 	}
8666 
8667 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8668 
8669 	if (status == B_OK && originalStat != NULL)
8670 		memcpy(originalStat, stat, statSize);
8671 
8672 	return status;
8673 }
8674 
8675 
8676 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8677 
8678 	If only \a fd is given, the stat operation associated with the type
8679 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8680 	given, this path identifies the entry for whose node to write the
8681 	stat data. If both \a fd and \a path are given and the path is absolute,
8682 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8683 	identified by \a fd and specifies the entry whose stat data shall be
8684 	written.
8685 
8686 	\param fd The FD. May be < 0.
8687 	\param path The absolute or relative path. May be \c NULL.
8688 	\param traverseLeafLink If \a path is given, \c true specifies that the
8689 		   function shall not stick to symlinks, but traverse them.
8690 	\param stat The buffer containing the stat data to be written.
8691 	\param statSize The size of the supplied stat buffer.
8692 	\param statMask A mask specifying which parts of the stat data shall be
8693 		   written.
8694 	\return \c B_OK, if the the stat data have been written successfully,
8695 			another error code otherwise.
8696 */
8697 status_t
8698 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8699 	const struct stat* stat, size_t statSize, int statMask)
8700 {
8701 	struct stat completeStat;
8702 
8703 	if (statSize > sizeof(struct stat))
8704 		return B_BAD_VALUE;
8705 
8706 	// this supports different stat extensions
8707 	if (statSize < sizeof(struct stat)) {
8708 		memset((uint8*)&completeStat + statSize, 0,
8709 			sizeof(struct stat) - statSize);
8710 		memcpy(&completeStat, stat, statSize);
8711 		stat = &completeStat;
8712 	}
8713 
8714 	status_t status;
8715 
8716 	if (path != NULL) {
8717 		// path given: write the stat of the node referred to by (fd, path)
8718 		KPath pathBuffer(path);
8719 		if (pathBuffer.InitCheck() != B_OK)
8720 			return B_NO_MEMORY;
8721 
8722 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8723 			traverseLeafLink, stat, statMask, true);
8724 	} else {
8725 		// no path given: get the FD and use the FD operation
8726 		FileDescriptorPutter descriptor
8727 			(get_fd(get_current_io_context(true), fd));
8728 		if (!descriptor.IsSet())
8729 			return B_FILE_ERROR;
8730 
8731 		if (descriptor->ops->fd_write_stat)
8732 			status = descriptor->ops->fd_write_stat(descriptor.Get(), stat, statMask);
8733 		else
8734 			status = B_UNSUPPORTED;
8735 	}
8736 
8737 	return status;
8738 }
8739 
8740 
8741 int
8742 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8743 {
8744 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8745 	if (pathBuffer.InitCheck() != B_OK)
8746 		return B_NO_MEMORY;
8747 
8748 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8749 }
8750 
8751 
8752 int
8753 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8754 	int openMode)
8755 {
8756 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8757 	if (pathBuffer.InitCheck() != B_OK)
8758 		return B_NO_MEMORY;
8759 
8760 	if ((openMode & O_CREAT) != 0) {
8761 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8762 			true);
8763 	}
8764 
8765 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8766 }
8767 
8768 
8769 status_t
8770 _kern_remove_attr(int fd, const char* name)
8771 {
8772 	return attr_remove(fd, name, true);
8773 }
8774 
8775 
8776 status_t
8777 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8778 	const char* toName)
8779 {
8780 	return attr_rename(fromFile, fromName, toFile, toName, true);
8781 }
8782 
8783 
8784 int
8785 _kern_open_index_dir(dev_t device)
8786 {
8787 	return index_dir_open(device, true);
8788 }
8789 
8790 
8791 status_t
8792 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8793 {
8794 	return index_create(device, name, type, flags, true);
8795 }
8796 
8797 
8798 status_t
8799 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8800 {
8801 	return index_name_read_stat(device, name, stat, true);
8802 }
8803 
8804 
8805 status_t
8806 _kern_remove_index(dev_t device, const char* name)
8807 {
8808 	return index_remove(device, name, true);
8809 }
8810 
8811 
8812 status_t
8813 _kern_getcwd(char* buffer, size_t size)
8814 {
8815 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8816 
8817 	// Call vfs to get current working directory
8818 	return get_cwd(buffer, size, true);
8819 }
8820 
8821 
8822 status_t
8823 _kern_setcwd(int fd, const char* path)
8824 {
8825 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8826 	if (pathBuffer.InitCheck() != B_OK)
8827 		return B_NO_MEMORY;
8828 
8829 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8830 }
8831 
8832 
8833 //	#pragma mark - userland syscalls
8834 
8835 
8836 dev_t
8837 _user_mount(const char* userPath, const char* userDevice,
8838 	const char* userFileSystem, uint32 flags, const char* userArgs,
8839 	size_t argsLength)
8840 {
8841 	char fileSystem[B_FILE_NAME_LENGTH];
8842 	KPath path, device;
8843 	char* args = NULL;
8844 	status_t status;
8845 
8846 	if (!IS_USER_ADDRESS(userPath))
8847 		return B_BAD_ADDRESS;
8848 
8849 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8850 		return B_NO_MEMORY;
8851 
8852 	status = user_copy_name(path.LockBuffer(), userPath,
8853 		B_PATH_NAME_LENGTH);
8854 	if (status != B_OK)
8855 		return status;
8856 	path.UnlockBuffer();
8857 
8858 	if (userFileSystem != NULL) {
8859 		if (!IS_USER_ADDRESS(userFileSystem))
8860 			return B_BAD_ADDRESS;
8861 
8862 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8863 		if (status != B_OK)
8864 			return status;
8865 	}
8866 
8867 	if (userDevice != NULL) {
8868 		if (!IS_USER_ADDRESS(userDevice))
8869 			return B_BAD_ADDRESS;
8870 
8871 		status = user_copy_name(device.LockBuffer(), userDevice,
8872 			B_PATH_NAME_LENGTH);
8873 		if (status != B_OK)
8874 			return status;
8875 		device.UnlockBuffer();
8876 	}
8877 
8878 	if (userArgs != NULL && argsLength > 0) {
8879 		if (!IS_USER_ADDRESS(userArgs))
8880 			return B_BAD_ADDRESS;
8881 
8882 		// this is a safety restriction
8883 		if (argsLength >= 65536)
8884 			return B_NAME_TOO_LONG;
8885 
8886 		args = (char*)malloc(argsLength + 1);
8887 		if (args == NULL)
8888 			return B_NO_MEMORY;
8889 
8890 		status = user_copy_name(args, userArgs, argsLength + 1);
8891 		if (status != B_OK) {
8892 			free(args);
8893 			return status;
8894 		}
8895 	}
8896 
8897 	status = fs_mount(path.LockBuffer(),
8898 		userDevice != NULL ? device.Path() : NULL,
8899 		userFileSystem ? fileSystem : NULL, flags, args, false);
8900 
8901 	free(args);
8902 	return status;
8903 }
8904 
8905 
8906 status_t
8907 _user_unmount(const char* userPath, uint32 flags)
8908 {
8909 	if (!IS_USER_ADDRESS(userPath))
8910 		return B_BAD_ADDRESS;
8911 
8912 	KPath pathBuffer;
8913 	if (pathBuffer.InitCheck() != B_OK)
8914 		return B_NO_MEMORY;
8915 
8916 	char* path = pathBuffer.LockBuffer();
8917 
8918 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8919 	if (status != B_OK)
8920 		return status;
8921 
8922 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8923 }
8924 
8925 
8926 status_t
8927 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8928 {
8929 	struct fs_info info;
8930 	status_t status;
8931 
8932 	if (userInfo == NULL)
8933 		return B_BAD_VALUE;
8934 
8935 	if (!IS_USER_ADDRESS(userInfo))
8936 		return B_BAD_ADDRESS;
8937 
8938 	status = fs_read_info(device, &info);
8939 	if (status != B_OK)
8940 		return status;
8941 
8942 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8943 		return B_BAD_ADDRESS;
8944 
8945 	return B_OK;
8946 }
8947 
8948 
8949 status_t
8950 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8951 {
8952 	struct fs_info info;
8953 
8954 	if (userInfo == NULL)
8955 		return B_BAD_VALUE;
8956 
8957 	if (!IS_USER_ADDRESS(userInfo)
8958 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8959 		return B_BAD_ADDRESS;
8960 
8961 	return fs_write_info(device, &info, mask);
8962 }
8963 
8964 
8965 dev_t
8966 _user_next_device(int32* _userCookie)
8967 {
8968 	int32 cookie;
8969 	dev_t device;
8970 
8971 	if (!IS_USER_ADDRESS(_userCookie)
8972 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8973 		return B_BAD_ADDRESS;
8974 
8975 	device = fs_next_device(&cookie);
8976 
8977 	if (device >= B_OK) {
8978 		// update user cookie
8979 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8980 			return B_BAD_ADDRESS;
8981 	}
8982 
8983 	return device;
8984 }
8985 
8986 
8987 status_t
8988 _user_sync(void)
8989 {
8990 	return _kern_sync();
8991 }
8992 
8993 
8994 status_t
8995 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8996 	size_t infoSize)
8997 {
8998 	struct fd_info info;
8999 	uint32 cookie;
9000 
9001 	// only root can do this
9002 	if (geteuid() != 0)
9003 		return B_NOT_ALLOWED;
9004 
9005 	if (infoSize != sizeof(fd_info))
9006 		return B_BAD_VALUE;
9007 
9008 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9009 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9010 		return B_BAD_ADDRESS;
9011 
9012 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9013 	if (status != B_OK)
9014 		return status;
9015 
9016 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9017 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9018 		return B_BAD_ADDRESS;
9019 
9020 	return status;
9021 }
9022 
9023 
9024 status_t
9025 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9026 	char* userPath, size_t pathLength)
9027 {
9028 	if (!IS_USER_ADDRESS(userPath))
9029 		return B_BAD_ADDRESS;
9030 
9031 	KPath path;
9032 	if (path.InitCheck() != B_OK)
9033 		return B_NO_MEMORY;
9034 
9035 	// copy the leaf name onto the stack
9036 	char stackLeaf[B_FILE_NAME_LENGTH];
9037 	if (leaf != NULL) {
9038 		if (!IS_USER_ADDRESS(leaf))
9039 			return B_BAD_ADDRESS;
9040 
9041 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9042 		if (status != B_OK)
9043 			return status;
9044 
9045 		leaf = stackLeaf;
9046 	}
9047 
9048 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9049 		false, path.LockBuffer(), path.BufferSize());
9050 	if (status != B_OK)
9051 		return status;
9052 
9053 	path.UnlockBuffer();
9054 
9055 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9056 	if (length < 0)
9057 		return length;
9058 	if (length >= (int)pathLength)
9059 		return B_BUFFER_OVERFLOW;
9060 
9061 	return B_OK;
9062 }
9063 
9064 
9065 status_t
9066 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9067 {
9068 	if (userPath == NULL || buffer == NULL)
9069 		return B_BAD_VALUE;
9070 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9071 		return B_BAD_ADDRESS;
9072 
9073 	// copy path from userland
9074 	KPath pathBuffer;
9075 	if (pathBuffer.InitCheck() != B_OK)
9076 		return B_NO_MEMORY;
9077 	char* path = pathBuffer.LockBuffer();
9078 
9079 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9080 	if (status != B_OK)
9081 		return status;
9082 
9083 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9084 		false);
9085 	if (error != B_OK)
9086 		return error;
9087 
9088 	// copy back to userland
9089 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9090 	if (len < 0)
9091 		return len;
9092 	if (len >= B_PATH_NAME_LENGTH)
9093 		return B_BUFFER_OVERFLOW;
9094 
9095 	return B_OK;
9096 }
9097 
9098 
9099 int
9100 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9101 	int openMode, int perms)
9102 {
9103 	char name[B_FILE_NAME_LENGTH];
9104 
9105 	if (userName == NULL || device < 0 || inode < 0)
9106 		return B_BAD_VALUE;
9107 	if (!IS_USER_ADDRESS(userName))
9108 		return B_BAD_ADDRESS;
9109 	status_t status = user_copy_name(name, userName, sizeof(name));
9110 	if (status != B_OK)
9111 		return status;
9112 
9113 	if ((openMode & O_CREAT) != 0) {
9114 		return file_create_entry_ref(device, inode, name, openMode, perms,
9115 			false);
9116 	}
9117 
9118 	return file_open_entry_ref(device, inode, name, openMode, false);
9119 }
9120 
9121 
9122 int
9123 _user_open(int fd, const char* userPath, int openMode, int perms)
9124 {
9125 	KPath path;
9126 	if (path.InitCheck() != B_OK)
9127 		return B_NO_MEMORY;
9128 
9129 	char* buffer = path.LockBuffer();
9130 
9131 	if (!IS_USER_ADDRESS(userPath))
9132 		return B_BAD_ADDRESS;
9133 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9134 	if (status != B_OK)
9135 		return status;
9136 
9137 	if ((openMode & O_CREAT) != 0)
9138 		return file_create(fd, buffer, openMode, perms, false);
9139 
9140 	return file_open(fd, buffer, openMode, false);
9141 }
9142 
9143 
9144 int
9145 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9146 {
9147 	if (userName != NULL) {
9148 		char name[B_FILE_NAME_LENGTH];
9149 
9150 		if (!IS_USER_ADDRESS(userName))
9151 			return B_BAD_ADDRESS;
9152 		status_t status = user_copy_name(name, userName, sizeof(name));
9153 		if (status != B_OK)
9154 			return status;
9155 
9156 		return dir_open_entry_ref(device, inode, name, false);
9157 	}
9158 	return dir_open_entry_ref(device, inode, NULL, false);
9159 }
9160 
9161 
9162 int
9163 _user_open_dir(int fd, const char* userPath)
9164 {
9165 	if (userPath == NULL)
9166 		return dir_open(fd, NULL, false);
9167 
9168 	KPath path;
9169 	if (path.InitCheck() != B_OK)
9170 		return B_NO_MEMORY;
9171 
9172 	char* buffer = path.LockBuffer();
9173 
9174 	if (!IS_USER_ADDRESS(userPath))
9175 		return B_BAD_ADDRESS;
9176 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9177 	if (status != B_OK)
9178 		return status;
9179 
9180 	return dir_open(fd, buffer, false);
9181 }
9182 
9183 
9184 /*!	\brief Opens a directory's parent directory and returns the entry name
9185 		   of the former.
9186 
9187 	Aside from that it returns the directory's entry name, this method is
9188 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9189 	equivalent, if \a userName is \c NULL.
9190 
9191 	If a name buffer is supplied and the name does not fit the buffer, the
9192 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9193 
9194 	\param fd A FD referring to a directory.
9195 	\param userName Buffer the directory's entry name shall be written into.
9196 		   May be \c NULL.
9197 	\param nameLength Size of the name buffer.
9198 	\return The file descriptor of the opened parent directory, if everything
9199 			went fine, an error code otherwise.
9200 */
9201 int
9202 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9203 {
9204 	bool kernel = false;
9205 
9206 	if (userName && !IS_USER_ADDRESS(userName))
9207 		return B_BAD_ADDRESS;
9208 
9209 	// open the parent dir
9210 	int parentFD = dir_open(fd, (char*)"..", kernel);
9211 	if (parentFD < 0)
9212 		return parentFD;
9213 	FDCloser fdCloser(parentFD, kernel);
9214 
9215 	if (userName) {
9216 		// get the vnodes
9217 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9218 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9219 		VnodePutter parentVNodePutter(parentVNode);
9220 		VnodePutter dirVNodePutter(dirVNode);
9221 		if (!parentVNode || !dirVNode)
9222 			return B_FILE_ERROR;
9223 
9224 		// get the vnode name
9225 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9226 		struct dirent* buffer = (struct dirent*)_buffer;
9227 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9228 			sizeof(_buffer), get_current_io_context(false));
9229 		if (status != B_OK)
9230 			return status;
9231 
9232 		// copy the name to the userland buffer
9233 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9234 		if (len < 0)
9235 			return len;
9236 		if (len >= (int)nameLength)
9237 			return B_BUFFER_OVERFLOW;
9238 	}
9239 
9240 	return fdCloser.Detach();
9241 }
9242 
9243 
9244 status_t
9245 _user_fcntl(int fd, int op, size_t argument)
9246 {
9247 	status_t status = common_fcntl(fd, op, argument, false);
9248 	if (op == F_SETLKW)
9249 		syscall_restart_handle_post(status);
9250 
9251 	return status;
9252 }
9253 
9254 
9255 status_t
9256 _user_fsync(int fd)
9257 {
9258 	return common_sync(fd, false);
9259 }
9260 
9261 
9262 status_t
9263 _user_flock(int fd, int operation)
9264 {
9265 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9266 
9267 	// Check if the operation is valid
9268 	switch (operation & ~LOCK_NB) {
9269 		case LOCK_UN:
9270 		case LOCK_SH:
9271 		case LOCK_EX:
9272 			break;
9273 
9274 		default:
9275 			return B_BAD_VALUE;
9276 	}
9277 
9278 	struct vnode* vnode;
9279 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, false));
9280 	if (!descriptor.IsSet())
9281 		return B_FILE_ERROR;
9282 
9283 	if (descriptor->ops != &sFileOps)
9284 		return B_BAD_VALUE;
9285 
9286 	struct flock flock;
9287 	flock.l_start = 0;
9288 	flock.l_len = OFF_MAX;
9289 	flock.l_whence = 0;
9290 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9291 
9292 	status_t status;
9293 	if ((operation & LOCK_UN) != 0) {
9294 		if (HAS_FS_CALL(vnode, release_lock))
9295 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9296 		else
9297 			status = release_advisory_lock(vnode, NULL, descriptor.Get(), &flock);
9298 	} else {
9299 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9300 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9301 				(operation & LOCK_NB) == 0);
9302 		} else {
9303 			status = acquire_advisory_lock(vnode, NULL, descriptor.Get(), &flock,
9304 				(operation & LOCK_NB) == 0);
9305 		}
9306 	}
9307 
9308 	syscall_restart_handle_post(status);
9309 
9310 	return status;
9311 }
9312 
9313 
9314 status_t
9315 _user_lock_node(int fd)
9316 {
9317 	return common_lock_node(fd, false);
9318 }
9319 
9320 
9321 status_t
9322 _user_unlock_node(int fd)
9323 {
9324 	return common_unlock_node(fd, false);
9325 }
9326 
9327 
9328 status_t
9329 _user_preallocate(int fd, off_t offset, off_t length)
9330 {
9331 	return common_preallocate(fd, offset, length, false);
9332 }
9333 
9334 
9335 status_t
9336 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9337 	int perms)
9338 {
9339 	char name[B_FILE_NAME_LENGTH];
9340 	status_t status;
9341 
9342 	if (!IS_USER_ADDRESS(userName))
9343 		return B_BAD_ADDRESS;
9344 
9345 	status = user_copy_name(name, userName, sizeof(name));
9346 	if (status != B_OK)
9347 		return status;
9348 
9349 	return dir_create_entry_ref(device, inode, name, perms, false);
9350 }
9351 
9352 
9353 status_t
9354 _user_create_dir(int fd, const char* userPath, int perms)
9355 {
9356 	KPath pathBuffer;
9357 	if (pathBuffer.InitCheck() != B_OK)
9358 		return B_NO_MEMORY;
9359 
9360 	char* path = pathBuffer.LockBuffer();
9361 
9362 	if (!IS_USER_ADDRESS(userPath))
9363 		return B_BAD_ADDRESS;
9364 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9365 	if (status != B_OK)
9366 		return status;
9367 
9368 	return dir_create(fd, path, perms, false);
9369 }
9370 
9371 
9372 status_t
9373 _user_remove_dir(int fd, const char* userPath)
9374 {
9375 	KPath pathBuffer;
9376 	if (pathBuffer.InitCheck() != B_OK)
9377 		return B_NO_MEMORY;
9378 
9379 	char* path = pathBuffer.LockBuffer();
9380 
9381 	if (userPath != NULL) {
9382 		if (!IS_USER_ADDRESS(userPath))
9383 			return B_BAD_ADDRESS;
9384 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9385 		if (status != B_OK)
9386 			return status;
9387 	}
9388 
9389 	return dir_remove(fd, userPath ? path : NULL, false);
9390 }
9391 
9392 
9393 status_t
9394 _user_read_link(int fd, const char* userPath, char* userBuffer,
9395 	size_t* userBufferSize)
9396 {
9397 	KPath pathBuffer, linkBuffer;
9398 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9399 		return B_NO_MEMORY;
9400 
9401 	size_t bufferSize;
9402 
9403 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9404 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9405 		return B_BAD_ADDRESS;
9406 
9407 	char* path = pathBuffer.LockBuffer();
9408 	char* buffer = linkBuffer.LockBuffer();
9409 
9410 	if (userPath) {
9411 		if (!IS_USER_ADDRESS(userPath))
9412 			return B_BAD_ADDRESS;
9413 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9414 		if (status != B_OK)
9415 			return status;
9416 
9417 		if (bufferSize > B_PATH_NAME_LENGTH)
9418 			bufferSize = B_PATH_NAME_LENGTH;
9419 	}
9420 
9421 	size_t newBufferSize = bufferSize;
9422 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9423 		&newBufferSize, false);
9424 
9425 	// we also update the bufferSize in case of errors
9426 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9427 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9428 		return B_BAD_ADDRESS;
9429 
9430 	if (status != B_OK)
9431 		return status;
9432 
9433 	bufferSize = min_c(newBufferSize, bufferSize);
9434 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9435 		return B_BAD_ADDRESS;
9436 
9437 	return B_OK;
9438 }
9439 
9440 
9441 status_t
9442 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9443 	int mode)
9444 {
9445 	KPath pathBuffer;
9446 	KPath toPathBuffer;
9447 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9448 		return B_NO_MEMORY;
9449 
9450 	char* path = pathBuffer.LockBuffer();
9451 	char* toPath = toPathBuffer.LockBuffer();
9452 
9453 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9454 		return B_BAD_ADDRESS;
9455 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9456 	if (status != B_OK)
9457 		return status;
9458 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9459 	if (status != B_OK)
9460 		return status;
9461 
9462 	return common_create_symlink(fd, path, toPath, mode, false);
9463 }
9464 
9465 
9466 status_t
9467 _user_create_link(int pathFD, const char* userPath, int toFD,
9468 	const char* userToPath, bool traverseLeafLink)
9469 {
9470 	KPath pathBuffer;
9471 	KPath toPathBuffer;
9472 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9473 		return B_NO_MEMORY;
9474 
9475 	char* path = pathBuffer.LockBuffer();
9476 	char* toPath = toPathBuffer.LockBuffer();
9477 
9478 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9479 		return B_BAD_ADDRESS;
9480 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9481 	if (status != B_OK)
9482 		return status;
9483 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9484 	if (status != B_OK)
9485 		return status;
9486 
9487 	status = check_path(toPath);
9488 	if (status != B_OK)
9489 		return status;
9490 
9491 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9492 		false);
9493 }
9494 
9495 
9496 status_t
9497 _user_unlink(int fd, const char* userPath)
9498 {
9499 	KPath pathBuffer;
9500 	if (pathBuffer.InitCheck() != B_OK)
9501 		return B_NO_MEMORY;
9502 
9503 	char* path = pathBuffer.LockBuffer();
9504 
9505 	if (!IS_USER_ADDRESS(userPath))
9506 		return B_BAD_ADDRESS;
9507 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9508 	if (status != B_OK)
9509 		return status;
9510 
9511 	return common_unlink(fd, path, false);
9512 }
9513 
9514 
9515 status_t
9516 _user_rename(int oldFD, const char* userOldPath, int newFD,
9517 	const char* userNewPath)
9518 {
9519 	KPath oldPathBuffer;
9520 	KPath newPathBuffer;
9521 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9522 		return B_NO_MEMORY;
9523 
9524 	char* oldPath = oldPathBuffer.LockBuffer();
9525 	char* newPath = newPathBuffer.LockBuffer();
9526 
9527 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9528 		return B_BAD_ADDRESS;
9529 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9530 	if (status != B_OK)
9531 		return status;
9532 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9533 	if (status != B_OK)
9534 		return status;
9535 
9536 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9537 }
9538 
9539 
9540 status_t
9541 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9542 {
9543 	KPath pathBuffer;
9544 	if (pathBuffer.InitCheck() != B_OK)
9545 		return B_NO_MEMORY;
9546 
9547 	char* path = pathBuffer.LockBuffer();
9548 
9549 	if (!IS_USER_ADDRESS(userPath))
9550 		return B_BAD_ADDRESS;
9551 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9552 	if (status != B_OK)
9553 		return status;
9554 
9555 	// split into directory vnode and filename path
9556 	char filename[B_FILE_NAME_LENGTH];
9557 	VnodePutter dir;
9558 	status = fd_and_path_to_dir_vnode(fd, path, dir, filename, false);
9559 	if (status != B_OK)
9560 		return status;
9561 
9562 	// the underlying FS needs to support creating FIFOs
9563 	if (!HAS_FS_CALL(dir, create_special_node))
9564 		return B_UNSUPPORTED;
9565 
9566 	// create the entry	-- the FIFO sub node is set up automatically
9567 	fs_vnode superVnode;
9568 	ino_t nodeID;
9569 	status = FS_CALL(dir.Get(), create_special_node, filename, NULL,
9570 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9571 
9572 	// create_special_node() acquired a reference for us that we don't need.
9573 	if (status == B_OK)
9574 		put_vnode(dir->mount->volume, nodeID);
9575 
9576 	return status;
9577 }
9578 
9579 
9580 status_t
9581 _user_create_pipe(int* userFDs)
9582 {
9583 	// rootfs should support creating FIFOs, but let's be sure
9584 	if (!HAS_FS_CALL(sRoot, create_special_node))
9585 		return B_UNSUPPORTED;
9586 
9587 	// create the node	-- the FIFO sub node is set up automatically
9588 	fs_vnode superVnode;
9589 	ino_t nodeID;
9590 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9591 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9592 	if (status != B_OK)
9593 		return status;
9594 
9595 	// We've got one reference to the node and need another one.
9596 	struct vnode* vnode;
9597 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9598 	if (status != B_OK) {
9599 		// that should not happen
9600 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9601 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9602 		return status;
9603 	}
9604 
9605 	// Everything looks good so far. Open two FDs for reading respectively
9606 	// writing.
9607 	int fds[2];
9608 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9609 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9610 
9611 	FDCloser closer0(fds[0], false);
9612 	FDCloser closer1(fds[1], false);
9613 
9614 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9615 
9616 	// copy FDs to userland
9617 	if (status == B_OK) {
9618 		if (!IS_USER_ADDRESS(userFDs)
9619 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9620 			status = B_BAD_ADDRESS;
9621 		}
9622 	}
9623 
9624 	// keep FDs, if everything went fine
9625 	if (status == B_OK) {
9626 		closer0.Detach();
9627 		closer1.Detach();
9628 	}
9629 
9630 	return status;
9631 }
9632 
9633 
9634 status_t
9635 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9636 {
9637 	KPath pathBuffer;
9638 	if (pathBuffer.InitCheck() != B_OK)
9639 		return B_NO_MEMORY;
9640 
9641 	char* path = pathBuffer.LockBuffer();
9642 
9643 	if (!IS_USER_ADDRESS(userPath))
9644 		return B_BAD_ADDRESS;
9645 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9646 	if (status != B_OK)
9647 		return status;
9648 
9649 	return common_access(fd, path, mode, effectiveUserGroup, false);
9650 }
9651 
9652 
9653 status_t
9654 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9655 	struct stat* userStat, size_t statSize)
9656 {
9657 	struct stat stat = {0};
9658 	status_t status;
9659 
9660 	if (statSize > sizeof(struct stat))
9661 		return B_BAD_VALUE;
9662 
9663 	if (!IS_USER_ADDRESS(userStat))
9664 		return B_BAD_ADDRESS;
9665 
9666 	if (userPath != NULL) {
9667 		// path given: get the stat of the node referred to by (fd, path)
9668 		if (!IS_USER_ADDRESS(userPath))
9669 			return B_BAD_ADDRESS;
9670 
9671 		KPath pathBuffer;
9672 		if (pathBuffer.InitCheck() != B_OK)
9673 			return B_NO_MEMORY;
9674 
9675 		char* path = pathBuffer.LockBuffer();
9676 
9677 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9678 		if (status != B_OK)
9679 			return status;
9680 
9681 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9682 	} else {
9683 		// no path given: get the FD and use the FD operation
9684 		FileDescriptorPutter descriptor
9685 			(get_fd(get_current_io_context(false), fd));
9686 		if (!descriptor.IsSet())
9687 			return B_FILE_ERROR;
9688 
9689 		if (descriptor->ops->fd_read_stat)
9690 			status = descriptor->ops->fd_read_stat(descriptor.Get(), &stat);
9691 		else
9692 			status = B_UNSUPPORTED;
9693 	}
9694 
9695 	if (status != B_OK)
9696 		return status;
9697 
9698 	return user_memcpy(userStat, &stat, statSize);
9699 }
9700 
9701 
9702 status_t
9703 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9704 	const struct stat* userStat, size_t statSize, int statMask)
9705 {
9706 	if (statSize > sizeof(struct stat))
9707 		return B_BAD_VALUE;
9708 
9709 	struct stat stat;
9710 
9711 	if (!IS_USER_ADDRESS(userStat)
9712 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9713 		return B_BAD_ADDRESS;
9714 
9715 	// clear additional stat fields
9716 	if (statSize < sizeof(struct stat))
9717 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9718 
9719 	status_t status;
9720 
9721 	if (userPath != NULL) {
9722 		// path given: write the stat of the node referred to by (fd, path)
9723 		if (!IS_USER_ADDRESS(userPath))
9724 			return B_BAD_ADDRESS;
9725 
9726 		KPath pathBuffer;
9727 		if (pathBuffer.InitCheck() != B_OK)
9728 			return B_NO_MEMORY;
9729 
9730 		char* path = pathBuffer.LockBuffer();
9731 
9732 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9733 		if (status != B_OK)
9734 			return status;
9735 
9736 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9737 			statMask, false);
9738 	} else {
9739 		// no path given: get the FD and use the FD operation
9740 		FileDescriptorPutter descriptor
9741 			(get_fd(get_current_io_context(false), fd));
9742 		if (!descriptor.IsSet())
9743 			return B_FILE_ERROR;
9744 
9745 		if (descriptor->ops->fd_write_stat) {
9746 			status = descriptor->ops->fd_write_stat(descriptor.Get(), &stat,
9747 				statMask);
9748 		} else
9749 			status = B_UNSUPPORTED;
9750 	}
9751 
9752 	return status;
9753 }
9754 
9755 
9756 int
9757 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9758 {
9759 	KPath pathBuffer;
9760 	if (pathBuffer.InitCheck() != B_OK)
9761 		return B_NO_MEMORY;
9762 
9763 	char* path = pathBuffer.LockBuffer();
9764 
9765 	if (userPath != NULL) {
9766 		if (!IS_USER_ADDRESS(userPath))
9767 			return B_BAD_ADDRESS;
9768 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9769 		if (status != B_OK)
9770 			return status;
9771 	}
9772 
9773 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9774 }
9775 
9776 
9777 ssize_t
9778 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9779 	size_t readBytes)
9780 {
9781 	char attribute[B_FILE_NAME_LENGTH];
9782 
9783 	if (userAttribute == NULL)
9784 		return B_BAD_VALUE;
9785 	if (!IS_USER_ADDRESS(userAttribute))
9786 		return B_BAD_ADDRESS;
9787 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9788 	if (status != B_OK)
9789 		return status;
9790 
9791 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9792 	if (attr < 0)
9793 		return attr;
9794 
9795 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9796 	_user_close(attr);
9797 
9798 	return bytes;
9799 }
9800 
9801 
9802 ssize_t
9803 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9804 	const void* buffer, size_t writeBytes)
9805 {
9806 	char attribute[B_FILE_NAME_LENGTH];
9807 
9808 	if (userAttribute == NULL)
9809 		return B_BAD_VALUE;
9810 	if (!IS_USER_ADDRESS(userAttribute))
9811 		return B_BAD_ADDRESS;
9812 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9813 	if (status != B_OK)
9814 		return status;
9815 
9816 	// Try to support the BeOS typical truncation as well as the position
9817 	// argument
9818 	int attr = attr_create(fd, NULL, attribute, type,
9819 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9820 	if (attr < 0)
9821 		return attr;
9822 
9823 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9824 	_user_close(attr);
9825 
9826 	return bytes;
9827 }
9828 
9829 
9830 status_t
9831 _user_stat_attr(int fd, const char* userAttribute,
9832 	struct attr_info* userAttrInfo)
9833 {
9834 	char attribute[B_FILE_NAME_LENGTH];
9835 
9836 	if (userAttribute == NULL || userAttrInfo == NULL)
9837 		return B_BAD_VALUE;
9838 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9839 		return B_BAD_ADDRESS;
9840 	status_t status = user_copy_name(attribute, userAttribute,
9841 		sizeof(attribute));
9842 	if (status != B_OK)
9843 		return status;
9844 
9845 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9846 	if (attr < 0)
9847 		return attr;
9848 
9849 	struct file_descriptor* descriptor
9850 		= get_fd(get_current_io_context(false), attr);
9851 	if (descriptor == NULL) {
9852 		_user_close(attr);
9853 		return B_FILE_ERROR;
9854 	}
9855 
9856 	struct stat stat;
9857 	if (descriptor->ops->fd_read_stat)
9858 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9859 	else
9860 		status = B_UNSUPPORTED;
9861 
9862 	put_fd(descriptor);
9863 	_user_close(attr);
9864 
9865 	if (status == B_OK) {
9866 		attr_info info;
9867 		info.type = stat.st_type;
9868 		info.size = stat.st_size;
9869 
9870 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9871 			return B_BAD_ADDRESS;
9872 	}
9873 
9874 	return status;
9875 }
9876 
9877 
9878 int
9879 _user_open_attr(int fd, const char* userPath, const char* userName,
9880 	uint32 type, int openMode)
9881 {
9882 	char name[B_FILE_NAME_LENGTH];
9883 
9884 	if (!IS_USER_ADDRESS(userName))
9885 		return B_BAD_ADDRESS;
9886 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9887 	if (status != B_OK)
9888 		return status;
9889 
9890 	KPath pathBuffer;
9891 	if (pathBuffer.InitCheck() != B_OK)
9892 		return B_NO_MEMORY;
9893 
9894 	char* path = pathBuffer.LockBuffer();
9895 
9896 	if (userPath != NULL) {
9897 		if (!IS_USER_ADDRESS(userPath))
9898 			return B_BAD_ADDRESS;
9899 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9900 		if (status != B_OK)
9901 			return status;
9902 	}
9903 
9904 	if ((openMode & O_CREAT) != 0) {
9905 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9906 			false);
9907 	}
9908 
9909 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9910 }
9911 
9912 
9913 status_t
9914 _user_remove_attr(int fd, const char* userName)
9915 {
9916 	char name[B_FILE_NAME_LENGTH];
9917 
9918 	if (!IS_USER_ADDRESS(userName))
9919 		return B_BAD_ADDRESS;
9920 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9921 	if (status != B_OK)
9922 		return status;
9923 
9924 	return attr_remove(fd, name, false);
9925 }
9926 
9927 
9928 status_t
9929 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9930 	const char* userToName)
9931 {
9932 	if (!IS_USER_ADDRESS(userFromName)
9933 		|| !IS_USER_ADDRESS(userToName))
9934 		return B_BAD_ADDRESS;
9935 
9936 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9937 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9938 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9939 		return B_NO_MEMORY;
9940 
9941 	char* fromName = fromNameBuffer.LockBuffer();
9942 	char* toName = toNameBuffer.LockBuffer();
9943 
9944 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9945 	if (status != B_OK)
9946 		return status;
9947 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9948 	if (status != B_OK)
9949 		return status;
9950 
9951 	return attr_rename(fromFile, fromName, toFile, toName, false);
9952 }
9953 
9954 
9955 int
9956 _user_open_index_dir(dev_t device)
9957 {
9958 	return index_dir_open(device, false);
9959 }
9960 
9961 
9962 status_t
9963 _user_create_index(dev_t device, const char* userName, uint32 type,
9964 	uint32 flags)
9965 {
9966 	char name[B_FILE_NAME_LENGTH];
9967 
9968 	if (!IS_USER_ADDRESS(userName))
9969 		return B_BAD_ADDRESS;
9970 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9971 	if (status != B_OK)
9972 		return status;
9973 
9974 	return index_create(device, name, type, flags, false);
9975 }
9976 
9977 
9978 status_t
9979 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9980 {
9981 	char name[B_FILE_NAME_LENGTH];
9982 	struct stat stat = {0};
9983 	status_t status;
9984 
9985 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9986 		return B_BAD_ADDRESS;
9987 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9988 	if (status != B_OK)
9989 		return status;
9990 
9991 	status = index_name_read_stat(device, name, &stat, false);
9992 	if (status == B_OK) {
9993 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9994 			return B_BAD_ADDRESS;
9995 	}
9996 
9997 	return status;
9998 }
9999 
10000 
10001 status_t
10002 _user_remove_index(dev_t device, const char* userName)
10003 {
10004 	char name[B_FILE_NAME_LENGTH];
10005 
10006 	if (!IS_USER_ADDRESS(userName))
10007 		return B_BAD_ADDRESS;
10008 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10009 	if (status != B_OK)
10010 		return status;
10011 
10012 	return index_remove(device, name, false);
10013 }
10014 
10015 
10016 status_t
10017 _user_getcwd(char* userBuffer, size_t size)
10018 {
10019 	if (size == 0)
10020 		return B_BAD_VALUE;
10021 	if (!IS_USER_ADDRESS(userBuffer))
10022 		return B_BAD_ADDRESS;
10023 
10024 	if (size > kMaxPathLength)
10025 		size = kMaxPathLength;
10026 
10027 	KPath pathBuffer(size);
10028 	if (pathBuffer.InitCheck() != B_OK)
10029 		return B_NO_MEMORY;
10030 
10031 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10032 
10033 	char* path = pathBuffer.LockBuffer();
10034 
10035 	status_t status = get_cwd(path, size, false);
10036 	if (status != B_OK)
10037 		return status;
10038 
10039 	// Copy back the result
10040 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10041 		return B_BAD_ADDRESS;
10042 
10043 	return status;
10044 }
10045 
10046 
10047 status_t
10048 _user_setcwd(int fd, const char* userPath)
10049 {
10050 	TRACE(("user_setcwd: path = %p\n", userPath));
10051 
10052 	KPath pathBuffer;
10053 	if (pathBuffer.InitCheck() != B_OK)
10054 		return B_NO_MEMORY;
10055 
10056 	char* path = pathBuffer.LockBuffer();
10057 
10058 	if (userPath != NULL) {
10059 		if (!IS_USER_ADDRESS(userPath))
10060 			return B_BAD_ADDRESS;
10061 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10062 		if (status != B_OK)
10063 			return status;
10064 	}
10065 
10066 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10067 }
10068 
10069 
10070 status_t
10071 _user_change_root(const char* userPath)
10072 {
10073 	// only root is allowed to chroot()
10074 	if (geteuid() != 0)
10075 		return B_NOT_ALLOWED;
10076 
10077 	// alloc path buffer
10078 	KPath pathBuffer;
10079 	if (pathBuffer.InitCheck() != B_OK)
10080 		return B_NO_MEMORY;
10081 
10082 	// copy userland path to kernel
10083 	char* path = pathBuffer.LockBuffer();
10084 	if (userPath != NULL) {
10085 		if (!IS_USER_ADDRESS(userPath))
10086 			return B_BAD_ADDRESS;
10087 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10088 		if (status != B_OK)
10089 			return status;
10090 	}
10091 
10092 	// get the vnode
10093 	VnodePutter vnode;
10094 	status_t status = path_to_vnode(path, true, vnode, NULL, false);
10095 	if (status != B_OK)
10096 		return status;
10097 
10098 	// set the new root
10099 	struct io_context* context = get_current_io_context(false);
10100 	mutex_lock(&sIOContextRootLock);
10101 	struct vnode* oldRoot = context->root;
10102 	context->root = vnode.Detach();
10103 	mutex_unlock(&sIOContextRootLock);
10104 
10105 	put_vnode(oldRoot);
10106 
10107 	return B_OK;
10108 }
10109 
10110 
10111 int
10112 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10113 	uint32 flags, port_id port, int32 token)
10114 {
10115 	if (device < 0 || userQuery == NULL || queryLength == 0)
10116 		return B_BAD_VALUE;
10117 
10118 	if (!IS_USER_ADDRESS(userQuery))
10119 		return B_BAD_ADDRESS;
10120 
10121 	// this is a safety restriction
10122 	if (queryLength >= 65536)
10123 		return B_NAME_TOO_LONG;
10124 
10125 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10126 	if (!query.IsValid())
10127 		return B_NO_MEMORY;
10128 
10129 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10130 		return B_BAD_ADDRESS;
10131 
10132 	return query_open(device, query, flags, port, token, false);
10133 }
10134 
10135 
10136 #include "vfs_request_io.cpp"
10137