xref: /haiku/src/system/kernel/fs/vfs.cpp (revision d891ca1119fc87b4c4fef0baad9ebc49e22b94c9)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <block_cache.h>
36 #include <boot/kernel_args.h>
37 #include <debug_heap.h>
38 #include <disk_device_manager/KDiskDevice.h>
39 #include <disk_device_manager/KDiskDeviceManager.h>
40 #include <disk_device_manager/KDiskDeviceUtils.h>
41 #include <disk_device_manager/KDiskSystem.h>
42 #include <fd.h>
43 #include <file_cache.h>
44 #include <fs/node_monitor.h>
45 #include <KPath.h>
46 #include <lock.h>
47 #include <low_resource_manager.h>
48 #include <slab/Slab.h>
49 #include <StackOrHeapArray.h>
50 #include <syscalls.h>
51 #include <syscall_restart.h>
52 #include <tracing.h>
53 #include <util/atomic.h>
54 #include <util/AutoLock.h>
55 #include <util/ThreadAutoLock.h>
56 #include <util/DoublyLinkedList.h>
57 #include <vfs.h>
58 #include <vm/vm.h>
59 #include <vm/VMCache.h>
60 #include <wait_for_objects.h>
61 
62 #include "EntryCache.h"
63 #include "fifo.h"
64 #include "IORequest.h"
65 #include "unused_vnodes.h"
66 #include "vfs_tracing.h"
67 #include "Vnode.h"
68 #include "../cache/vnode_store.h"
69 
70 
71 //#define TRACE_VFS
72 #ifdef TRACE_VFS
73 #	define TRACE(x) dprintf x
74 #	define FUNCTION(x) dprintf x
75 #else
76 #	define TRACE(x) ;
77 #	define FUNCTION(x) ;
78 #endif
79 
80 #define ADD_DEBUGGER_COMMANDS
81 
82 
83 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
84 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
85 
86 #if KDEBUG
87 #	define FS_CALL(vnode, op, params...) \
88 		( HAS_FS_CALL(vnode, op) ? \
89 			vnode->ops->op(vnode->mount->volume, vnode, params) \
90 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
91 #	define FS_CALL_NO_PARAMS(vnode, op) \
92 		( HAS_FS_CALL(vnode, op) ? \
93 			vnode->ops->op(vnode->mount->volume, vnode) \
94 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
95 #	define FS_MOUNT_CALL(mount, op, params...) \
96 		( HAS_FS_MOUNT_CALL(mount, op) ? \
97 			mount->volume->ops->op(mount->volume, params) \
98 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
99 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
100 		( HAS_FS_MOUNT_CALL(mount, op) ? \
101 			mount->volume->ops->op(mount->volume) \
102 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
103 #else
104 #	define FS_CALL(vnode, op, params...) \
105 			vnode->ops->op(vnode->mount->volume, vnode, params)
106 #	define FS_CALL_NO_PARAMS(vnode, op) \
107 			vnode->ops->op(vnode->mount->volume, vnode)
108 #	define FS_MOUNT_CALL(mount, op, params...) \
109 			mount->volume->ops->op(mount->volume, params)
110 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
111 			mount->volume->ops->op(mount->volume)
112 #endif
113 
114 
115 const static size_t kMaxPathLength = 65536;
116 	// The absolute maximum path length (for getcwd() - this is not depending
117 	// on PATH_MAX
118 
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		mutex_init(&lock, "mount lock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		mutex_destroy(&lock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	mutex			lock;	// guards the vnodes list
165 	struct vnode*	root_vnode;
166 	struct vnode*	covers_vnode;	// immutable
167 	KPartition*		partition;
168 	VnodeList		vnodes;
169 	EntryCache		entry_cache;
170 	bool			unmounting;
171 	bool			owns_file_device;
172 };
173 
174 
175 namespace {
176 
177 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
178 	list_link		link;
179 	void*			bound_to;
180 	team_id			team;
181 	pid_t			session;
182 	off_t			start;
183 	off_t			end;
184 	bool			shared;
185 };
186 
187 typedef DoublyLinkedList<advisory_lock> LockList;
188 
189 } // namespace
190 
191 
192 struct advisory_locking {
193 	sem_id			lock;
194 	sem_id			wait_sem;
195 	LockList		locks;
196 
197 	advisory_locking()
198 		:
199 		lock(-1),
200 		wait_sem(-1)
201 	{
202 	}
203 
204 	~advisory_locking()
205 	{
206 		if (lock >= 0)
207 			delete_sem(lock);
208 		if (wait_sem >= 0)
209 			delete_sem(wait_sem);
210 	}
211 };
212 
213 /*!	\brief Guards sMountsTable.
214 
215 	The holder is allowed to read/write access the sMountsTable.
216 	Manipulation of the fs_mount structures themselves
217 	(and their destruction) requires different locks though.
218 */
219 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
220 
221 /*!	\brief Guards mount/unmount operations.
222 
223 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
224 	That is locking the lock ensures that no FS is mounted/unmounted. In
225 	particular this means that
226 	- sMountsTable will not be modified,
227 	- the fields immutable after initialization of the fs_mount structures in
228 	  sMountsTable will not be modified,
229 
230 	The thread trying to lock the lock must not hold sVnodeLock or
231 	sMountLock.
232 */
233 static recursive_lock sMountOpLock;
234 
235 /*!	\brief Guards sVnodeTable.
236 
237 	The holder is allowed read/write access to sVnodeTable and to
238 	any unbusy vnode in that table, save to the immutable fields (device, id,
239 	private_node, mount) to which only read-only access is allowed.
240 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
241 	well as the busy, removed, unused flags, and the vnode's type can also be
242 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
243 	locked. Write access to covered_by and covers requires to write lock
244 	sVnodeLock.
245 
246 	The thread trying to acquire the lock must not hold sMountLock.
247 	You must not hold this lock when calling create_sem(), as this might call
248 	vfs_free_unused_vnodes() and thus cause a deadlock.
249 */
250 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
251 
252 /*!	\brief Guards io_context::root.
253 
254 	Must be held when setting or getting the io_context::root field.
255 	The only operation allowed while holding this lock besides getting or
256 	setting the field is inc_vnode_ref_count() on io_context::root.
257 */
258 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
259 
260 
261 namespace {
262 
263 struct vnode_hash_key {
264 	dev_t	device;
265 	ino_t	vnode;
266 };
267 
268 struct VnodeHash {
269 	typedef vnode_hash_key	KeyType;
270 	typedef	struct vnode	ValueType;
271 
272 #define VHASH(mountid, vnodeid) \
273 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
274 
275 	size_t HashKey(KeyType key) const
276 	{
277 		return VHASH(key.device, key.vnode);
278 	}
279 
280 	size_t Hash(ValueType* vnode) const
281 	{
282 		return VHASH(vnode->device, vnode->id);
283 	}
284 
285 #undef VHASH
286 
287 	bool Compare(KeyType key, ValueType* vnode) const
288 	{
289 		return vnode->device == key.device && vnode->id == key.vnode;
290 	}
291 
292 	ValueType*& GetLink(ValueType* value) const
293 	{
294 		return value->next;
295 	}
296 };
297 
298 typedef BOpenHashTable<VnodeHash> VnodeTable;
299 
300 
301 struct MountHash {
302 	typedef dev_t			KeyType;
303 	typedef	struct fs_mount	ValueType;
304 
305 	size_t HashKey(KeyType key) const
306 	{
307 		return key;
308 	}
309 
310 	size_t Hash(ValueType* mount) const
311 	{
312 		return mount->id;
313 	}
314 
315 	bool Compare(KeyType key, ValueType* mount) const
316 	{
317 		return mount->id == key;
318 	}
319 
320 	ValueType*& GetLink(ValueType* value) const
321 	{
322 		return value->next;
323 	}
324 };
325 
326 typedef BOpenHashTable<MountHash> MountTable;
327 
328 } // namespace
329 
330 
331 object_cache* sPathNameCache;
332 object_cache* sVnodeCache;
333 object_cache* sFileDescriptorCache;
334 
335 #define VNODE_HASH_TABLE_SIZE 1024
336 static VnodeTable* sVnodeTable;
337 static struct vnode* sRoot;
338 
339 #define MOUNTS_HASH_TABLE_SIZE 16
340 static MountTable* sMountsTable;
341 static dev_t sNextMountID = 1;
342 
343 #define MAX_TEMP_IO_VECS 8
344 
345 // How long to wait for busy vnodes (10s)
346 #define BUSY_VNODE_RETRIES 2000
347 #define BUSY_VNODE_DELAY 5000
348 
349 mode_t __gUmask = 022;
350 
351 /* function declarations */
352 
353 static void free_unused_vnodes();
354 
355 // file descriptor operation prototypes
356 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
357 	void* buffer, size_t* _bytes);
358 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
359 	const void* buffer, size_t* _bytes);
360 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
361 	int seekType);
362 static void file_free_fd(struct file_descriptor* descriptor);
363 static status_t file_close(struct file_descriptor* descriptor);
364 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
365 	struct selectsync* sync);
366 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
367 	struct selectsync* sync);
368 static status_t dir_read(struct io_context* context,
369 	struct file_descriptor* descriptor, struct dirent* buffer,
370 	size_t bufferSize, uint32* _count);
371 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
372 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
373 static status_t dir_rewind(struct file_descriptor* descriptor);
374 static void dir_free_fd(struct file_descriptor* descriptor);
375 static status_t dir_close(struct file_descriptor* descriptor);
376 static status_t attr_dir_read(struct io_context* context,
377 	struct file_descriptor* descriptor, struct dirent* buffer,
378 	size_t bufferSize, uint32* _count);
379 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
380 static void attr_dir_free_fd(struct file_descriptor* descriptor);
381 static status_t attr_dir_close(struct file_descriptor* descriptor);
382 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
383 	void* buffer, size_t* _bytes);
384 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
385 	const void* buffer, size_t* _bytes);
386 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
387 	int seekType);
388 static void attr_free_fd(struct file_descriptor* descriptor);
389 static status_t attr_close(struct file_descriptor* descriptor);
390 static status_t attr_read_stat(struct file_descriptor* descriptor,
391 	struct stat* statData);
392 static status_t attr_write_stat(struct file_descriptor* descriptor,
393 	const struct stat* stat, int statMask);
394 static status_t index_dir_read(struct io_context* context,
395 	struct file_descriptor* descriptor, struct dirent* buffer,
396 	size_t bufferSize, uint32* _count);
397 static status_t index_dir_rewind(struct file_descriptor* descriptor);
398 static void index_dir_free_fd(struct file_descriptor* descriptor);
399 static status_t index_dir_close(struct file_descriptor* descriptor);
400 static status_t query_read(struct io_context* context,
401 	struct file_descriptor* descriptor, struct dirent* buffer,
402 	size_t bufferSize, uint32* _count);
403 static status_t query_rewind(struct file_descriptor* descriptor);
404 static void query_free_fd(struct file_descriptor* descriptor);
405 static status_t query_close(struct file_descriptor* descriptor);
406 
407 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
408 	void* buffer, size_t length);
409 static status_t common_read_stat(struct file_descriptor* descriptor,
410 	struct stat* statData);
411 static status_t common_write_stat(struct file_descriptor* descriptor,
412 	const struct stat* statData, int statMask);
413 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
414 	struct stat* stat, bool kernel);
415 
416 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
417 	bool traverseLeafLink, int count, bool kernel,
418 	struct vnode** _vnode, ino_t* _parentID);
419 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
420 	size_t bufferSize, bool kernel);
421 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
422 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
423 static void inc_vnode_ref_count(struct vnode* vnode);
424 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
425 	bool reenter);
426 static inline void put_vnode(struct vnode* vnode);
427 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
428 	bool kernel);
429 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
430 
431 
432 static struct fd_ops sFileOps = {
433 	file_read,
434 	file_write,
435 	file_seek,
436 	common_ioctl,
437 	NULL,		// set_flags
438 	file_select,
439 	file_deselect,
440 	NULL,		// read_dir()
441 	NULL,		// rewind_dir()
442 	common_read_stat,
443 	common_write_stat,
444 	file_close,
445 	file_free_fd
446 };
447 
448 static struct fd_ops sDirectoryOps = {
449 	NULL,		// read()
450 	NULL,		// write()
451 	NULL,		// seek()
452 	common_ioctl,
453 	NULL,		// set_flags
454 	NULL,		// select()
455 	NULL,		// deselect()
456 	dir_read,
457 	dir_rewind,
458 	common_read_stat,
459 	common_write_stat,
460 	dir_close,
461 	dir_free_fd
462 };
463 
464 static struct fd_ops sAttributeDirectoryOps = {
465 	NULL,		// read()
466 	NULL,		// write()
467 	NULL,		// seek()
468 	common_ioctl,
469 	NULL,		// set_flags
470 	NULL,		// select()
471 	NULL,		// deselect()
472 	attr_dir_read,
473 	attr_dir_rewind,
474 	common_read_stat,
475 	common_write_stat,
476 	attr_dir_close,
477 	attr_dir_free_fd
478 };
479 
480 static struct fd_ops sAttributeOps = {
481 	attr_read,
482 	attr_write,
483 	attr_seek,
484 	common_ioctl,
485 	NULL,		// set_flags
486 	NULL,		// select()
487 	NULL,		// deselect()
488 	NULL,		// read_dir()
489 	NULL,		// rewind_dir()
490 	attr_read_stat,
491 	attr_write_stat,
492 	attr_close,
493 	attr_free_fd
494 };
495 
496 static struct fd_ops sIndexDirectoryOps = {
497 	NULL,		// read()
498 	NULL,		// write()
499 	NULL,		// seek()
500 	NULL,		// ioctl()
501 	NULL,		// set_flags
502 	NULL,		// select()
503 	NULL,		// deselect()
504 	index_dir_read,
505 	index_dir_rewind,
506 	NULL,		// read_stat()
507 	NULL,		// write_stat()
508 	index_dir_close,
509 	index_dir_free_fd
510 };
511 
512 #if 0
513 static struct fd_ops sIndexOps = {
514 	NULL,		// read()
515 	NULL,		// write()
516 	NULL,		// seek()
517 	NULL,		// ioctl()
518 	NULL,		// set_flags
519 	NULL,		// select()
520 	NULL,		// deselect()
521 	NULL,		// dir_read()
522 	NULL,		// dir_rewind()
523 	index_read_stat,	// read_stat()
524 	NULL,		// write_stat()
525 	NULL,		// dir_close()
526 	NULL		// free_fd()
527 };
528 #endif
529 
530 static struct fd_ops sQueryOps = {
531 	NULL,		// read()
532 	NULL,		// write()
533 	NULL,		// seek()
534 	NULL,		// ioctl()
535 	NULL,		// set_flags
536 	NULL,		// select()
537 	NULL,		// deselect()
538 	query_read,
539 	query_rewind,
540 	NULL,		// read_stat()
541 	NULL,		// write_stat()
542 	query_close,
543 	query_free_fd
544 };
545 
546 
547 namespace {
548 
549 class VNodePutter {
550 public:
551 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
552 
553 	~VNodePutter()
554 	{
555 		Put();
556 	}
557 
558 	void SetTo(struct vnode* vnode)
559 	{
560 		Put();
561 		fVNode = vnode;
562 	}
563 
564 	void Put()
565 	{
566 		if (fVNode) {
567 			put_vnode(fVNode);
568 			fVNode = NULL;
569 		}
570 	}
571 
572 	struct vnode* Detach()
573 	{
574 		struct vnode* vnode = fVNode;
575 		fVNode = NULL;
576 		return vnode;
577 	}
578 
579 private:
580 	struct vnode* fVNode;
581 };
582 
583 
584 class FDCloser {
585 public:
586 	FDCloser() : fFD(-1), fKernel(true) {}
587 
588 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
589 
590 	~FDCloser()
591 	{
592 		Close();
593 	}
594 
595 	void SetTo(int fd, bool kernel)
596 	{
597 		Close();
598 		fFD = fd;
599 		fKernel = kernel;
600 	}
601 
602 	void Close()
603 	{
604 		if (fFD >= 0) {
605 			if (fKernel)
606 				_kern_close(fFD);
607 			else
608 				_user_close(fFD);
609 			fFD = -1;
610 		}
611 	}
612 
613 	int Detach()
614 	{
615 		int fd = fFD;
616 		fFD = -1;
617 		return fd;
618 	}
619 
620 private:
621 	int		fFD;
622 	bool	fKernel;
623 };
624 
625 } // namespace
626 
627 
628 #if VFS_PAGES_IO_TRACING
629 
630 namespace VFSPagesIOTracing {
631 
632 class PagesIOTraceEntry : public AbstractTraceEntry {
633 protected:
634 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
635 		const generic_io_vec* vecs, uint32 count, uint32 flags,
636 		generic_size_t bytesRequested, status_t status,
637 		generic_size_t bytesTransferred)
638 		:
639 		fVnode(vnode),
640 		fMountID(vnode->mount->id),
641 		fNodeID(vnode->id),
642 		fCookie(cookie),
643 		fPos(pos),
644 		fCount(count),
645 		fFlags(flags),
646 		fBytesRequested(bytesRequested),
647 		fStatus(status),
648 		fBytesTransferred(bytesTransferred)
649 	{
650 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
651 			sizeof(generic_io_vec) * count, false);
652 	}
653 
654 	void AddDump(TraceOutput& out, const char* mode)
655 	{
656 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
657 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
658 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
659 			(uint64)fBytesRequested);
660 
661 		if (fVecs != NULL) {
662 			for (uint32 i = 0; i < fCount; i++) {
663 				if (i > 0)
664 					out.Print(", ");
665 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
666 					(uint64)fVecs[i].length);
667 			}
668 		}
669 
670 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
671 			"transferred: %" B_PRIu64, fFlags, fStatus,
672 			(uint64)fBytesTransferred);
673 	}
674 
675 protected:
676 	struct vnode*	fVnode;
677 	dev_t			fMountID;
678 	ino_t			fNodeID;
679 	void*			fCookie;
680 	off_t			fPos;
681 	generic_io_vec*	fVecs;
682 	uint32			fCount;
683 	uint32			fFlags;
684 	generic_size_t	fBytesRequested;
685 	status_t		fStatus;
686 	generic_size_t	fBytesTransferred;
687 };
688 
689 
690 class ReadPages : public PagesIOTraceEntry {
691 public:
692 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
693 		const generic_io_vec* vecs, uint32 count, uint32 flags,
694 		generic_size_t bytesRequested, status_t status,
695 		generic_size_t bytesTransferred)
696 		:
697 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
698 			bytesRequested, status, bytesTransferred)
699 	{
700 		Initialized();
701 	}
702 
703 	virtual void AddDump(TraceOutput& out)
704 	{
705 		PagesIOTraceEntry::AddDump(out, "read");
706 	}
707 };
708 
709 
710 class WritePages : public PagesIOTraceEntry {
711 public:
712 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
713 		const generic_io_vec* vecs, uint32 count, uint32 flags,
714 		generic_size_t bytesRequested, status_t status,
715 		generic_size_t bytesTransferred)
716 		:
717 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
718 			bytesRequested, status, bytesTransferred)
719 	{
720 		Initialized();
721 	}
722 
723 	virtual void AddDump(TraceOutput& out)
724 	{
725 		PagesIOTraceEntry::AddDump(out, "write");
726 	}
727 };
728 
729 }	// namespace VFSPagesIOTracing
730 
731 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
732 #else
733 #	define TPIO(x) ;
734 #endif	// VFS_PAGES_IO_TRACING
735 
736 
737 /*! Finds the mounted device (the fs_mount structure) with the given ID.
738 	Note, you must hold the sMountLock lock when you call this function.
739 */
740 static struct fs_mount*
741 find_mount(dev_t id)
742 {
743 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
744 
745 	return sMountsTable->Lookup(id);
746 }
747 
748 
749 static status_t
750 get_mount(dev_t id, struct fs_mount** _mount)
751 {
752 	struct fs_mount* mount;
753 
754 	ReadLocker nodeLocker(sVnodeLock);
755 	ReadLocker mountLocker(sMountLock);
756 
757 	mount = find_mount(id);
758 	if (mount == NULL)
759 		return B_BAD_VALUE;
760 
761 	struct vnode* rootNode = mount->root_vnode;
762 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
763 		|| rootNode->ref_count == 0) {
764 		// might have been called during a mount/unmount operation
765 		return B_BUSY;
766 	}
767 
768 	inc_vnode_ref_count(rootNode);
769 	*_mount = mount;
770 	return B_OK;
771 }
772 
773 
774 static void
775 put_mount(struct fs_mount* mount)
776 {
777 	if (mount)
778 		put_vnode(mount->root_vnode);
779 }
780 
781 
782 /*!	Tries to open the specified file system module.
783 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
784 	Returns a pointer to file system module interface, or NULL if it
785 	could not open the module.
786 */
787 static file_system_module_info*
788 get_file_system(const char* fsName)
789 {
790 	char name[B_FILE_NAME_LENGTH];
791 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
792 		// construct module name if we didn't get one
793 		// (we currently support only one API)
794 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
795 		fsName = NULL;
796 	}
797 
798 	file_system_module_info* info;
799 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
800 		return NULL;
801 
802 	return info;
803 }
804 
805 
806 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
807 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
808 	The name is allocated for you, and you have to free() it when you're
809 	done with it.
810 	Returns NULL if the required memory is not available.
811 */
812 static char*
813 get_file_system_name(const char* fsName)
814 {
815 	const size_t length = strlen("file_systems/");
816 
817 	if (strncmp(fsName, "file_systems/", length)) {
818 		// the name already seems to be the module's file name
819 		return strdup(fsName);
820 	}
821 
822 	fsName += length;
823 	const char* end = strchr(fsName, '/');
824 	if (end == NULL) {
825 		// this doesn't seem to be a valid name, but well...
826 		return strdup(fsName);
827 	}
828 
829 	// cut off the trailing /v1
830 
831 	char* name = (char*)malloc(end + 1 - fsName);
832 	if (name == NULL)
833 		return NULL;
834 
835 	strlcpy(name, fsName, end + 1 - fsName);
836 	return name;
837 }
838 
839 
840 /*!	Accepts a list of file system names separated by a colon, one for each
841 	layer and returns the file system name for the specified layer.
842 	The name is allocated for you, and you have to free() it when you're
843 	done with it.
844 	Returns NULL if the required memory is not available or if there is no
845 	name for the specified layer.
846 */
847 static char*
848 get_file_system_name_for_layer(const char* fsNames, int32 layer)
849 {
850 	while (layer >= 0) {
851 		const char* end = strchr(fsNames, ':');
852 		if (end == NULL) {
853 			if (layer == 0)
854 				return strdup(fsNames);
855 			return NULL;
856 		}
857 
858 		if (layer == 0) {
859 			size_t length = end - fsNames + 1;
860 			char* result = (char*)malloc(length);
861 			strlcpy(result, fsNames, length);
862 			return result;
863 		}
864 
865 		fsNames = end + 1;
866 		layer--;
867 	}
868 
869 	return NULL;
870 }
871 
872 
873 static void
874 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
875 {
876 	MutexLocker _(mount->lock);
877 	mount->vnodes.Add(vnode);
878 }
879 
880 
881 static void
882 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
883 {
884 	MutexLocker _(mount->lock);
885 	mount->vnodes.Remove(vnode);
886 }
887 
888 
889 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
890 
891 	The caller must hold the sVnodeLock (read lock at least).
892 
893 	\param mountID the mount ID.
894 	\param vnodeID the node ID.
895 
896 	\return The vnode structure, if it was found in the hash table, \c NULL
897 			otherwise.
898 */
899 static struct vnode*
900 lookup_vnode(dev_t mountID, ino_t vnodeID)
901 {
902 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
903 
904 	struct vnode_hash_key key;
905 
906 	key.device = mountID;
907 	key.vnode = vnodeID;
908 
909 	return sVnodeTable->Lookup(key);
910 }
911 
912 
913 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
914 
915 	This will also wait for BUSY_VNODE_DELAY before returning if one should
916 	still wait for the vnode becoming unbusy.
917 
918 	\return \c true if one should retry, \c false if not.
919 */
920 static bool
921 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
922 {
923 	if (--tries < 0) {
924 		// vnode doesn't seem to become unbusy
925 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
926 			" is not becoming unbusy!\n", mountID, vnodeID);
927 		return false;
928 	}
929 	snooze(BUSY_VNODE_DELAY);
930 	return true;
931 }
932 
933 
934 /*!	Creates a new vnode with the given mount and node ID.
935 	If the node already exists, it is returned instead and no new node is
936 	created. In either case -- but not, if an error occurs -- the function write
937 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
938 	error the lock is not held on return.
939 
940 	\param mountID The mount ID.
941 	\param vnodeID The vnode ID.
942 	\param _vnode Will be set to the new vnode on success.
943 	\param _nodeCreated Will be set to \c true when the returned vnode has
944 		been newly created, \c false when it already existed. Will not be
945 		changed on error.
946 	\return \c B_OK, when the vnode was successfully created and inserted or
947 		a node with the given ID was found, \c B_NO_MEMORY or
948 		\c B_ENTRY_NOT_FOUND on error.
949 */
950 static status_t
951 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
952 	bool& _nodeCreated)
953 {
954 	FUNCTION(("create_new_vnode_and_lock()\n"));
955 
956 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
957 	if (vnode == NULL)
958 		return B_NO_MEMORY;
959 
960 	// initialize basic values
961 	memset(vnode, 0, sizeof(struct vnode));
962 	vnode->device = mountID;
963 	vnode->id = vnodeID;
964 	vnode->ref_count = 1;
965 	vnode->SetBusy(true);
966 
967 	// look up the node -- it might have been added by someone else in the
968 	// meantime
969 	rw_lock_write_lock(&sVnodeLock);
970 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
971 	if (existingVnode != NULL) {
972 		object_cache_free(sVnodeCache, vnode, 0);
973 		_vnode = existingVnode;
974 		_nodeCreated = false;
975 		return B_OK;
976 	}
977 
978 	// get the mount structure
979 	rw_lock_read_lock(&sMountLock);
980 	vnode->mount = find_mount(mountID);
981 	if (!vnode->mount || vnode->mount->unmounting) {
982 		rw_lock_read_unlock(&sMountLock);
983 		rw_lock_write_unlock(&sVnodeLock);
984 		object_cache_free(sVnodeCache, vnode, 0);
985 		return B_ENTRY_NOT_FOUND;
986 	}
987 
988 	// add the vnode to the mount's node list and the hash table
989 	sVnodeTable->Insert(vnode);
990 	add_vnode_to_mount_list(vnode, vnode->mount);
991 
992 	rw_lock_read_unlock(&sMountLock);
993 
994 	_vnode = vnode;
995 	_nodeCreated = true;
996 
997 	// keep the vnode lock locked
998 	return B_OK;
999 }
1000 
1001 
1002 /*!	Frees the vnode and all resources it has acquired, and removes
1003 	it from the vnode hash as well as from its mount structure.
1004 	Will also make sure that any cache modifications are written back.
1005 */
1006 static void
1007 free_vnode(struct vnode* vnode, bool reenter)
1008 {
1009 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1010 		vnode);
1011 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1012 
1013 	// write back any changes in this vnode's cache -- but only
1014 	// if the vnode won't be deleted, in which case the changes
1015 	// will be discarded
1016 
1017 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1018 		FS_CALL_NO_PARAMS(vnode, fsync);
1019 
1020 	// Note: If this vnode has a cache attached, there will still be two
1021 	// references to that cache at this point. The last one belongs to the vnode
1022 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1023 	// cache. Each but the last reference to a cache also includes a reference
1024 	// to the vnode. The file cache, however, released its reference (cf.
1025 	// file_cache_create()), so that this vnode's ref count has the chance to
1026 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1027 	// cache reference to be released, which will also release a (no longer
1028 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1029 	// count, so that it will neither become negative nor 0.
1030 	vnode->ref_count = 2;
1031 
1032 	if (!vnode->IsUnpublished()) {
1033 		if (vnode->IsRemoved())
1034 			FS_CALL(vnode, remove_vnode, reenter);
1035 		else
1036 			FS_CALL(vnode, put_vnode, reenter);
1037 	}
1038 
1039 	// If the vnode has a VMCache attached, make sure that it won't try to get
1040 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1041 	// long as the vnode is busy and in the hash, that won't happen, but as
1042 	// soon as we've removed it from the hash, it could reload the vnode -- with
1043 	// a new cache attached!
1044 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1045 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1046 
1047 	// The file system has removed the resources of the vnode now, so we can
1048 	// make it available again (by removing the busy vnode from the hash).
1049 	rw_lock_write_lock(&sVnodeLock);
1050 	sVnodeTable->Remove(vnode);
1051 	rw_lock_write_unlock(&sVnodeLock);
1052 
1053 	// if we have a VMCache attached, remove it
1054 	if (vnode->cache)
1055 		vnode->cache->ReleaseRef();
1056 
1057 	vnode->cache = NULL;
1058 
1059 	remove_vnode_from_mount_list(vnode, vnode->mount);
1060 
1061 	object_cache_free(sVnodeCache, vnode, 0);
1062 }
1063 
1064 
1065 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1066 	if the counter dropped to 0.
1067 
1068 	The caller must, of course, own a reference to the vnode to call this
1069 	function.
1070 	The caller must not hold the sVnodeLock or the sMountLock.
1071 
1072 	\param vnode the vnode.
1073 	\param alwaysFree don't move this vnode into the unused list, but really
1074 		   delete it if possible.
1075 	\param reenter \c true, if this function is called (indirectly) from within
1076 		   a file system. This will be passed to file system hooks only.
1077 	\return \c B_OK, if everything went fine, an error code otherwise.
1078 */
1079 static status_t
1080 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1081 {
1082 	ReadLocker locker(sVnodeLock);
1083 	AutoLocker<Vnode> nodeLocker(vnode);
1084 
1085 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1086 
1087 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1088 
1089 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1090 		vnode->ref_count));
1091 
1092 	if (oldRefCount != 1)
1093 		return B_OK;
1094 
1095 	if (vnode->IsBusy())
1096 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1097 
1098 	bool freeNode = false;
1099 	bool freeUnusedNodes = false;
1100 
1101 	// Just insert the vnode into an unused list if we don't need
1102 	// to delete it
1103 	if (vnode->IsRemoved() || alwaysFree) {
1104 		vnode_to_be_freed(vnode);
1105 		vnode->SetBusy(true);
1106 		freeNode = true;
1107 	} else
1108 		freeUnusedNodes = vnode_unused(vnode);
1109 
1110 	nodeLocker.Unlock();
1111 	locker.Unlock();
1112 
1113 	if (freeNode)
1114 		free_vnode(vnode, reenter);
1115 	else if (freeUnusedNodes)
1116 		free_unused_vnodes();
1117 
1118 	return B_OK;
1119 }
1120 
1121 
1122 /*!	\brief Increments the reference counter of the given vnode.
1123 
1124 	The caller must make sure that the node isn't deleted while this function
1125 	is called. This can be done either:
1126 	- by ensuring that a reference to the node exists and remains in existence,
1127 	  or
1128 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1129 	  or by holding sVnodeLock write locked.
1130 
1131 	In the second case the caller is responsible for dealing with the ref count
1132 	0 -> 1 transition. That is 1. this function must not be invoked when the
1133 	node is busy in the first place and 2. vnode_used() must be called for the
1134 	node.
1135 
1136 	\param vnode the vnode.
1137 */
1138 static void
1139 inc_vnode_ref_count(struct vnode* vnode)
1140 {
1141 	atomic_add(&vnode->ref_count, 1);
1142 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1143 		vnode->ref_count));
1144 }
1145 
1146 
1147 static bool
1148 is_special_node_type(int type)
1149 {
1150 	// at the moment only FIFOs are supported
1151 	return S_ISFIFO(type);
1152 }
1153 
1154 
1155 static status_t
1156 create_special_sub_node(struct vnode* vnode, uint32 flags)
1157 {
1158 	if (S_ISFIFO(vnode->Type()))
1159 		return create_fifo_vnode(vnode->mount->volume, vnode);
1160 
1161 	return B_BAD_VALUE;
1162 }
1163 
1164 
1165 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1166 
1167 	If the node is not yet in memory, it will be loaded.
1168 
1169 	The caller must not hold the sVnodeLock or the sMountLock.
1170 
1171 	\param mountID the mount ID.
1172 	\param vnodeID the node ID.
1173 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1174 		   retrieved vnode structure shall be written.
1175 	\param reenter \c true, if this function is called (indirectly) from within
1176 		   a file system.
1177 	\return \c B_OK, if everything when fine, an error code otherwise.
1178 */
1179 static status_t
1180 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1181 	int reenter)
1182 {
1183 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1184 		mountID, vnodeID, _vnode));
1185 
1186 	rw_lock_read_lock(&sVnodeLock);
1187 
1188 	int32 tries = BUSY_VNODE_RETRIES;
1189 restart:
1190 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1191 	AutoLocker<Vnode> nodeLocker(vnode);
1192 
1193 	if (vnode && vnode->IsBusy()) {
1194 		// vnodes in the Removed state (except ones still Unpublished)
1195 		// which are also Busy will disappear soon, so we do not wait for them.
1196 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1197 
1198 		nodeLocker.Unlock();
1199 		rw_lock_read_unlock(&sVnodeLock);
1200 		if (!canWait) {
1201 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1202 				mountID, vnodeID);
1203 			return B_BUSY;
1204 		}
1205 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1206 			return B_BUSY;
1207 
1208 		rw_lock_read_lock(&sVnodeLock);
1209 		goto restart;
1210 	}
1211 
1212 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1213 
1214 	status_t status;
1215 
1216 	if (vnode) {
1217 		if (vnode->ref_count == 0) {
1218 			// this vnode has been unused before
1219 			vnode_used(vnode);
1220 		}
1221 		inc_vnode_ref_count(vnode);
1222 
1223 		nodeLocker.Unlock();
1224 		rw_lock_read_unlock(&sVnodeLock);
1225 	} else {
1226 		// we need to create a new vnode and read it in
1227 		rw_lock_read_unlock(&sVnodeLock);
1228 			// unlock -- create_new_vnode_and_lock() write-locks on success
1229 		bool nodeCreated;
1230 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1231 			nodeCreated);
1232 		if (status != B_OK)
1233 			return status;
1234 
1235 		if (!nodeCreated) {
1236 			rw_lock_read_lock(&sVnodeLock);
1237 			rw_lock_write_unlock(&sVnodeLock);
1238 			goto restart;
1239 		}
1240 
1241 		rw_lock_write_unlock(&sVnodeLock);
1242 
1243 		int type;
1244 		uint32 flags;
1245 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1246 			&flags, reenter);
1247 		if (status == B_OK && vnode->private_node == NULL)
1248 			status = B_BAD_VALUE;
1249 
1250 		bool gotNode = status == B_OK;
1251 		bool publishSpecialSubNode = false;
1252 		if (gotNode) {
1253 			vnode->SetType(type);
1254 			publishSpecialSubNode = is_special_node_type(type)
1255 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1256 		}
1257 
1258 		if (gotNode && publishSpecialSubNode)
1259 			status = create_special_sub_node(vnode, flags);
1260 
1261 		if (status != B_OK) {
1262 			if (gotNode)
1263 				FS_CALL(vnode, put_vnode, reenter);
1264 
1265 			rw_lock_write_lock(&sVnodeLock);
1266 			sVnodeTable->Remove(vnode);
1267 			remove_vnode_from_mount_list(vnode, vnode->mount);
1268 			rw_lock_write_unlock(&sVnodeLock);
1269 
1270 			object_cache_free(sVnodeCache, vnode, 0);
1271 			return status;
1272 		}
1273 
1274 		rw_lock_read_lock(&sVnodeLock);
1275 		vnode->Lock();
1276 
1277 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1278 		vnode->SetBusy(false);
1279 
1280 		vnode->Unlock();
1281 		rw_lock_read_unlock(&sVnodeLock);
1282 	}
1283 
1284 	TRACE(("get_vnode: returning %p\n", vnode));
1285 
1286 	*_vnode = vnode;
1287 	return B_OK;
1288 }
1289 
1290 
1291 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1292 	if the counter dropped to 0.
1293 
1294 	The caller must, of course, own a reference to the vnode to call this
1295 	function.
1296 	The caller must not hold the sVnodeLock or the sMountLock.
1297 
1298 	\param vnode the vnode.
1299 */
1300 static inline void
1301 put_vnode(struct vnode* vnode)
1302 {
1303 	dec_vnode_ref_count(vnode, false, false);
1304 }
1305 
1306 
1307 static void
1308 free_unused_vnodes(int32 level)
1309 {
1310 	unused_vnodes_check_started();
1311 
1312 	if (level == B_NO_LOW_RESOURCE) {
1313 		unused_vnodes_check_done();
1314 		return;
1315 	}
1316 
1317 	flush_hot_vnodes();
1318 
1319 	// determine how many nodes to free
1320 	uint32 count = 1;
1321 	{
1322 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1323 
1324 		switch (level) {
1325 			case B_LOW_RESOURCE_NOTE:
1326 				count = sUnusedVnodes / 100;
1327 				break;
1328 			case B_LOW_RESOURCE_WARNING:
1329 				count = sUnusedVnodes / 10;
1330 				break;
1331 			case B_LOW_RESOURCE_CRITICAL:
1332 				count = sUnusedVnodes;
1333 				break;
1334 		}
1335 
1336 		if (count > sUnusedVnodes)
1337 			count = sUnusedVnodes;
1338 	}
1339 
1340 	// Write back the modified pages of some unused vnodes and free them.
1341 
1342 	for (uint32 i = 0; i < count; i++) {
1343 		ReadLocker vnodesReadLocker(sVnodeLock);
1344 
1345 		// get the first node
1346 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1347 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1348 			&sUnusedVnodeList);
1349 		unusedVnodesLocker.Unlock();
1350 
1351 		if (vnode == NULL)
1352 			break;
1353 
1354 		// lock the node
1355 		AutoLocker<Vnode> nodeLocker(vnode);
1356 
1357 		// Check whether the node is still unused -- since we only append to the
1358 		// tail of the unused queue, the vnode should still be at its head.
1359 		// Alternatively we could check its ref count for 0 and its busy flag,
1360 		// but if the node is no longer at the head of the queue, it means it
1361 		// has been touched in the meantime, i.e. it is no longer the least
1362 		// recently used unused vnode and we rather don't free it.
1363 		unusedVnodesLocker.Lock();
1364 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1365 			continue;
1366 		unusedVnodesLocker.Unlock();
1367 
1368 		ASSERT(!vnode->IsBusy());
1369 
1370 		// grab a reference
1371 		inc_vnode_ref_count(vnode);
1372 		vnode_used(vnode);
1373 
1374 		// write back changes and free the node
1375 		nodeLocker.Unlock();
1376 		vnodesReadLocker.Unlock();
1377 
1378 		if (vnode->cache != NULL)
1379 			vnode->cache->WriteModified();
1380 
1381 		dec_vnode_ref_count(vnode, true, false);
1382 			// this should free the vnode when it's still unused
1383 	}
1384 
1385 	unused_vnodes_check_done();
1386 }
1387 
1388 
1389 /*!	Gets the vnode the given vnode is covering.
1390 
1391 	The caller must have \c sVnodeLock read-locked at least.
1392 
1393 	The function returns a reference to the retrieved vnode (if any), the caller
1394 	is responsible to free.
1395 
1396 	\param vnode The vnode whose covered node shall be returned.
1397 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1398 		vnode.
1399 */
1400 static inline Vnode*
1401 get_covered_vnode_locked(Vnode* vnode)
1402 {
1403 	if (Vnode* coveredNode = vnode->covers) {
1404 		while (coveredNode->covers != NULL)
1405 			coveredNode = coveredNode->covers;
1406 
1407 		inc_vnode_ref_count(coveredNode);
1408 		return coveredNode;
1409 	}
1410 
1411 	return NULL;
1412 }
1413 
1414 
1415 /*!	Gets the vnode the given vnode is covering.
1416 
1417 	The caller must not hold \c sVnodeLock. Note that this implies a race
1418 	condition, since the situation can change at any time.
1419 
1420 	The function returns a reference to the retrieved vnode (if any), the caller
1421 	is responsible to free.
1422 
1423 	\param vnode The vnode whose covered node shall be returned.
1424 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1425 		vnode.
1426 */
1427 static inline Vnode*
1428 get_covered_vnode(Vnode* vnode)
1429 {
1430 	if (!vnode->IsCovering())
1431 		return NULL;
1432 
1433 	ReadLocker vnodeReadLocker(sVnodeLock);
1434 	return get_covered_vnode_locked(vnode);
1435 }
1436 
1437 
1438 /*!	Gets the vnode the given vnode is covered by.
1439 
1440 	The caller must have \c sVnodeLock read-locked at least.
1441 
1442 	The function returns a reference to the retrieved vnode (if any), the caller
1443 	is responsible to free.
1444 
1445 	\param vnode The vnode whose covering node shall be returned.
1446 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1447 		any vnode.
1448 */
1449 static Vnode*
1450 get_covering_vnode_locked(Vnode* vnode)
1451 {
1452 	if (Vnode* coveringNode = vnode->covered_by) {
1453 		while (coveringNode->covered_by != NULL)
1454 			coveringNode = coveringNode->covered_by;
1455 
1456 		inc_vnode_ref_count(coveringNode);
1457 		return coveringNode;
1458 	}
1459 
1460 	return NULL;
1461 }
1462 
1463 
1464 /*!	Gets the vnode the given vnode is covered by.
1465 
1466 	The caller must not hold \c sVnodeLock. Note that this implies a race
1467 	condition, since the situation can change at any time.
1468 
1469 	The function returns a reference to the retrieved vnode (if any), the caller
1470 	is responsible to free.
1471 
1472 	\param vnode The vnode whose covering node shall be returned.
1473 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1474 		any vnode.
1475 */
1476 static inline Vnode*
1477 get_covering_vnode(Vnode* vnode)
1478 {
1479 	if (!vnode->IsCovered())
1480 		return NULL;
1481 
1482 	ReadLocker vnodeReadLocker(sVnodeLock);
1483 	return get_covering_vnode_locked(vnode);
1484 }
1485 
1486 
1487 static void
1488 free_unused_vnodes()
1489 {
1490 	free_unused_vnodes(
1491 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1492 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1493 }
1494 
1495 
1496 static void
1497 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1498 {
1499 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1500 
1501 	free_unused_vnodes(level);
1502 }
1503 
1504 
1505 static inline void
1506 put_advisory_locking(struct advisory_locking* locking)
1507 {
1508 	release_sem(locking->lock);
1509 }
1510 
1511 
1512 /*!	Returns the advisory_locking object of the \a vnode in case it
1513 	has one, and locks it.
1514 	You have to call put_advisory_locking() when you're done with
1515 	it.
1516 	Note, you must not have the vnode mutex locked when calling
1517 	this function.
1518 */
1519 static struct advisory_locking*
1520 get_advisory_locking(struct vnode* vnode)
1521 {
1522 	rw_lock_read_lock(&sVnodeLock);
1523 	vnode->Lock();
1524 
1525 	struct advisory_locking* locking = vnode->advisory_locking;
1526 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1527 
1528 	vnode->Unlock();
1529 	rw_lock_read_unlock(&sVnodeLock);
1530 
1531 	if (lock >= 0)
1532 		lock = acquire_sem(lock);
1533 	if (lock < 0) {
1534 		// This means the locking has been deleted in the mean time
1535 		// or had never existed in the first place - otherwise, we
1536 		// would get the lock at some point.
1537 		return NULL;
1538 	}
1539 
1540 	return locking;
1541 }
1542 
1543 
1544 /*!	Creates a locked advisory_locking object, and attaches it to the
1545 	given \a vnode.
1546 	Returns B_OK in case of success - also if the vnode got such an
1547 	object from someone else in the mean time, you'll still get this
1548 	one locked then.
1549 */
1550 static status_t
1551 create_advisory_locking(struct vnode* vnode)
1552 {
1553 	if (vnode == NULL)
1554 		return B_FILE_ERROR;
1555 
1556 	ObjectDeleter<advisory_locking> lockingDeleter;
1557 	struct advisory_locking* locking = NULL;
1558 
1559 	while (get_advisory_locking(vnode) == NULL) {
1560 		// no locking object set on the vnode yet, create one
1561 		if (locking == NULL) {
1562 			locking = new(std::nothrow) advisory_locking;
1563 			if (locking == NULL)
1564 				return B_NO_MEMORY;
1565 			lockingDeleter.SetTo(locking);
1566 
1567 			locking->wait_sem = create_sem(0, "advisory lock");
1568 			if (locking->wait_sem < 0)
1569 				return locking->wait_sem;
1570 
1571 			locking->lock = create_sem(0, "advisory locking");
1572 			if (locking->lock < 0)
1573 				return locking->lock;
1574 		}
1575 
1576 		// set our newly created locking object
1577 		ReadLocker _(sVnodeLock);
1578 		AutoLocker<Vnode> nodeLocker(vnode);
1579 		if (vnode->advisory_locking == NULL) {
1580 			vnode->advisory_locking = locking;
1581 			lockingDeleter.Detach();
1582 			return B_OK;
1583 		}
1584 	}
1585 
1586 	// The vnode already had a locking object. That's just as well.
1587 
1588 	return B_OK;
1589 }
1590 
1591 
1592 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1593 	with the advisory_lock \a lock.
1594 */
1595 static bool
1596 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1597 {
1598 	if (flock == NULL)
1599 		return true;
1600 
1601 	return lock->start <= flock->l_start - 1 + flock->l_len
1602 		&& lock->end >= flock->l_start;
1603 }
1604 
1605 
1606 /*!	Tests whether acquiring a lock would block.
1607 */
1608 static status_t
1609 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1610 {
1611 	flock->l_type = F_UNLCK;
1612 
1613 	struct advisory_locking* locking = get_advisory_locking(vnode);
1614 	if (locking == NULL)
1615 		return B_OK;
1616 
1617 	team_id team = team_get_current_team_id();
1618 
1619 	LockList::Iterator iterator = locking->locks.GetIterator();
1620 	while (iterator.HasNext()) {
1621 		struct advisory_lock* lock = iterator.Next();
1622 
1623 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1624 			// locks do overlap
1625 			if (flock->l_type != F_RDLCK || !lock->shared) {
1626 				// collision
1627 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1628 				flock->l_whence = SEEK_SET;
1629 				flock->l_start = lock->start;
1630 				flock->l_len = lock->end - lock->start + 1;
1631 				flock->l_pid = lock->team;
1632 				break;
1633 			}
1634 		}
1635 	}
1636 
1637 	put_advisory_locking(locking);
1638 	return B_OK;
1639 }
1640 
1641 
1642 /*!	Removes the specified lock, or all locks of the calling team
1643 	if \a flock is NULL.
1644 */
1645 static status_t
1646 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1647 	struct file_descriptor* descriptor, struct flock* flock)
1648 {
1649 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1650 
1651 	struct advisory_locking* locking = get_advisory_locking(vnode);
1652 	if (locking == NULL)
1653 		return B_OK;
1654 
1655 	// find matching lock entries
1656 
1657 	LockList::Iterator iterator = locking->locks.GetIterator();
1658 	while (iterator.HasNext()) {
1659 		struct advisory_lock* lock = iterator.Next();
1660 		bool removeLock = false;
1661 
1662 		if (descriptor != NULL && lock->bound_to == descriptor) {
1663 			// Remove flock() locks
1664 			removeLock = true;
1665 		} else if (lock->bound_to == context
1666 				&& advisory_lock_intersects(lock, flock)) {
1667 			// Remove POSIX locks
1668 			bool endsBeyond = false;
1669 			bool startsBefore = false;
1670 			if (flock != NULL) {
1671 				startsBefore = lock->start < flock->l_start;
1672 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1673 			}
1674 
1675 			if (!startsBefore && !endsBeyond) {
1676 				// lock is completely contained in flock
1677 				removeLock = true;
1678 			} else if (startsBefore && !endsBeyond) {
1679 				// cut the end of the lock
1680 				lock->end = flock->l_start - 1;
1681 			} else if (!startsBefore && endsBeyond) {
1682 				// cut the start of the lock
1683 				lock->start = flock->l_start + flock->l_len;
1684 			} else {
1685 				// divide the lock into two locks
1686 				struct advisory_lock* secondLock = new advisory_lock;
1687 				if (secondLock == NULL) {
1688 					// TODO: we should probably revert the locks we already
1689 					// changed... (ie. allocate upfront)
1690 					put_advisory_locking(locking);
1691 					return B_NO_MEMORY;
1692 				}
1693 
1694 				lock->end = flock->l_start - 1;
1695 
1696 				secondLock->bound_to = context;
1697 				secondLock->team = lock->team;
1698 				secondLock->session = lock->session;
1699 				// values must already be normalized when getting here
1700 				secondLock->start = flock->l_start + flock->l_len;
1701 				secondLock->end = lock->end;
1702 				secondLock->shared = lock->shared;
1703 
1704 				locking->locks.Add(secondLock);
1705 			}
1706 		}
1707 
1708 		if (removeLock) {
1709 			// this lock is no longer used
1710 			iterator.Remove();
1711 			delete lock;
1712 		}
1713 	}
1714 
1715 	bool removeLocking = locking->locks.IsEmpty();
1716 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1717 
1718 	put_advisory_locking(locking);
1719 
1720 	if (removeLocking) {
1721 		// We can remove the whole advisory locking structure; it's no
1722 		// longer used
1723 		locking = get_advisory_locking(vnode);
1724 		if (locking != NULL) {
1725 			ReadLocker locker(sVnodeLock);
1726 			AutoLocker<Vnode> nodeLocker(vnode);
1727 
1728 			// the locking could have been changed in the mean time
1729 			if (locking->locks.IsEmpty()) {
1730 				vnode->advisory_locking = NULL;
1731 				nodeLocker.Unlock();
1732 				locker.Unlock();
1733 
1734 				// we've detached the locking from the vnode, so we can
1735 				// safely delete it
1736 				delete locking;
1737 			} else {
1738 				// the locking is in use again
1739 				nodeLocker.Unlock();
1740 				locker.Unlock();
1741 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1742 			}
1743 		}
1744 	}
1745 
1746 	return B_OK;
1747 }
1748 
1749 
1750 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1751 	will wait for the lock to become available, if there are any collisions
1752 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1753 
1754 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1755 	BSD flock() semantics are used, that is, all children can unlock the file
1756 	in question (we even allow parents to remove the lock, though, but that
1757 	seems to be in line to what the BSD's are doing).
1758 */
1759 static status_t
1760 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1761 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1762 {
1763 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1764 		vnode, flock, wait ? "yes" : "no"));
1765 
1766 	bool shared = flock->l_type == F_RDLCK;
1767 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1768 	status_t status = B_OK;
1769 
1770 	// TODO: do deadlock detection!
1771 
1772 	struct advisory_locking* locking;
1773 
1774 	while (true) {
1775 		// if this vnode has an advisory_locking structure attached,
1776 		// lock that one and search for any colliding file lock
1777 		status = create_advisory_locking(vnode);
1778 		if (status != B_OK)
1779 			return status;
1780 
1781 		locking = vnode->advisory_locking;
1782 		team_id team = team_get_current_team_id();
1783 		sem_id waitForLock = -1;
1784 
1785 		// test for collisions
1786 		LockList::Iterator iterator = locking->locks.GetIterator();
1787 		while (iterator.HasNext()) {
1788 			struct advisory_lock* lock = iterator.Next();
1789 
1790 			// TODO: locks from the same team might be joinable!
1791 			if ((lock->team != team || lock->bound_to != boundTo)
1792 					&& advisory_lock_intersects(lock, flock)) {
1793 				// locks do overlap
1794 				if (!shared || !lock->shared) {
1795 					// we need to wait
1796 					waitForLock = locking->wait_sem;
1797 					break;
1798 				}
1799 			}
1800 		}
1801 
1802 		if (waitForLock < 0)
1803 			break;
1804 
1805 		// We need to wait. Do that or fail now, if we've been asked not to.
1806 
1807 		if (!wait) {
1808 			put_advisory_locking(locking);
1809 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1810 		}
1811 
1812 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1813 			B_CAN_INTERRUPT, 0);
1814 		if (status != B_OK && status != B_BAD_SEM_ID)
1815 			return status;
1816 
1817 		// We have been notified, but we need to re-lock the locking object. So
1818 		// go another round...
1819 	}
1820 
1821 	// install new lock
1822 
1823 	struct advisory_lock* lock = new(std::nothrow) advisory_lock;
1824 	if (lock == NULL) {
1825 		put_advisory_locking(locking);
1826 		return B_NO_MEMORY;
1827 	}
1828 
1829 	lock->bound_to = boundTo;
1830 	lock->team = team_get_current_team_id();
1831 	lock->session = thread_get_current_thread()->team->session_id;
1832 	// values must already be normalized when getting here
1833 	lock->start = flock->l_start;
1834 	lock->end = flock->l_start - 1 + flock->l_len;
1835 	lock->shared = shared;
1836 
1837 	locking->locks.Add(lock);
1838 	put_advisory_locking(locking);
1839 
1840 	return status;
1841 }
1842 
1843 
1844 /*!	Normalizes the \a flock structure to make it easier to compare the
1845 	structure with others. The l_start and l_len fields are set to absolute
1846 	values according to the l_whence field.
1847 */
1848 static status_t
1849 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1850 {
1851 	switch (flock->l_whence) {
1852 		case SEEK_SET:
1853 			break;
1854 		case SEEK_CUR:
1855 			flock->l_start += descriptor->pos;
1856 			break;
1857 		case SEEK_END:
1858 		{
1859 			struct vnode* vnode = descriptor->u.vnode;
1860 			struct stat stat;
1861 			status_t status;
1862 
1863 			if (!HAS_FS_CALL(vnode, read_stat))
1864 				return B_UNSUPPORTED;
1865 
1866 			status = FS_CALL(vnode, read_stat, &stat);
1867 			if (status != B_OK)
1868 				return status;
1869 
1870 			flock->l_start += stat.st_size;
1871 			break;
1872 		}
1873 		default:
1874 			return B_BAD_VALUE;
1875 	}
1876 
1877 	if (flock->l_start < 0)
1878 		flock->l_start = 0;
1879 	if (flock->l_len == 0)
1880 		flock->l_len = OFF_MAX;
1881 
1882 	// don't let the offset and length overflow
1883 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1884 		flock->l_len = OFF_MAX - flock->l_start;
1885 
1886 	if (flock->l_len < 0) {
1887 		// a negative length reverses the region
1888 		flock->l_start += flock->l_len;
1889 		flock->l_len = -flock->l_len;
1890 	}
1891 
1892 	return B_OK;
1893 }
1894 
1895 
1896 static void
1897 replace_vnode_if_disconnected(struct fs_mount* mount,
1898 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1899 	struct vnode* fallBack, bool lockRootLock)
1900 {
1901 	struct vnode* givenVnode = vnode;
1902 	bool vnodeReplaced = false;
1903 
1904 	ReadLocker vnodeReadLocker(sVnodeLock);
1905 
1906 	if (lockRootLock)
1907 		mutex_lock(&sIOContextRootLock);
1908 
1909 	while (vnode != NULL && vnode->mount == mount
1910 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1911 		if (vnode->covers != NULL) {
1912 			// redirect the vnode to the covered vnode
1913 			vnode = vnode->covers;
1914 		} else
1915 			vnode = fallBack;
1916 
1917 		vnodeReplaced = true;
1918 	}
1919 
1920 	// If we've replaced the node, grab a reference for the new one.
1921 	if (vnodeReplaced && vnode != NULL)
1922 		inc_vnode_ref_count(vnode);
1923 
1924 	if (lockRootLock)
1925 		mutex_unlock(&sIOContextRootLock);
1926 
1927 	vnodeReadLocker.Unlock();
1928 
1929 	if (vnodeReplaced)
1930 		put_vnode(givenVnode);
1931 }
1932 
1933 
1934 /*!	Disconnects all file descriptors that are associated with the
1935 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1936 	\a mount object.
1937 
1938 	Note, after you've called this function, there might still be ongoing
1939 	accesses - they won't be interrupted if they already happened before.
1940 	However, any subsequent access will fail.
1941 
1942 	This is not a cheap function and should be used with care and rarely.
1943 	TODO: there is currently no means to stop a blocking read/write!
1944 */
1945 static void
1946 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1947 	struct vnode* vnodeToDisconnect)
1948 {
1949 	// iterate over all teams and peek into their file descriptors
1950 	TeamListIterator teamIterator;
1951 	while (Team* team = teamIterator.Next()) {
1952 		BReference<Team> teamReference(team, true);
1953 		TeamLocker teamLocker(team);
1954 
1955 		// lock the I/O context
1956 		io_context* context = team->io_context;
1957 		if (context == NULL)
1958 			continue;
1959 		MutexLocker contextLocker(context->io_mutex);
1960 
1961 		teamLocker.Unlock();
1962 
1963 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1964 			sRoot, true);
1965 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1966 			sRoot, false);
1967 
1968 		for (uint32 i = 0; i < context->table_size; i++) {
1969 			struct file_descriptor* descriptor = context->fds[i];
1970 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1971 				continue;
1972 
1973 			inc_fd_ref_count(descriptor);
1974 
1975 			// if this descriptor points at this mount, we
1976 			// need to disconnect it to be able to unmount
1977 			struct vnode* vnode = fd_vnode(descriptor);
1978 			if (vnodeToDisconnect != NULL) {
1979 				if (vnode == vnodeToDisconnect)
1980 					disconnect_fd(descriptor);
1981 			} else if ((vnode != NULL && vnode->mount == mount)
1982 				|| (vnode == NULL && descriptor->u.mount == mount))
1983 				disconnect_fd(descriptor);
1984 
1985 			put_fd(descriptor);
1986 		}
1987 	}
1988 }
1989 
1990 
1991 /*!	\brief Gets the root node of the current IO context.
1992 	If \a kernel is \c true, the kernel IO context will be used.
1993 	The caller obtains a reference to the returned node.
1994 */
1995 struct vnode*
1996 get_root_vnode(bool kernel)
1997 {
1998 	if (!kernel) {
1999 		// Get current working directory from io context
2000 		struct io_context* context = get_current_io_context(kernel);
2001 
2002 		mutex_lock(&sIOContextRootLock);
2003 
2004 		struct vnode* root = context->root;
2005 		if (root != NULL)
2006 			inc_vnode_ref_count(root);
2007 
2008 		mutex_unlock(&sIOContextRootLock);
2009 
2010 		if (root != NULL)
2011 			return root;
2012 
2013 		// That should never happen.
2014 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2015 			"have a root\n", team_get_current_team_id());
2016 	}
2017 
2018 	inc_vnode_ref_count(sRoot);
2019 	return sRoot;
2020 }
2021 
2022 
2023 /*!	\brief Gets the directory path and leaf name for a given path.
2024 
2025 	The supplied \a path is transformed to refer to the directory part of
2026 	the entry identified by the original path, and into the buffer \a filename
2027 	the leaf name of the original entry is written.
2028 	Neither the returned path nor the leaf name can be expected to be
2029 	canonical.
2030 
2031 	\param path The path to be analyzed. Must be able to store at least one
2032 		   additional character.
2033 	\param filename The buffer into which the leaf name will be written.
2034 		   Must be of size B_FILE_NAME_LENGTH at least.
2035 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2036 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2037 		   if the given path name is empty.
2038 */
2039 static status_t
2040 get_dir_path_and_leaf(char* path, char* filename)
2041 {
2042 	if (*path == '\0')
2043 		return B_ENTRY_NOT_FOUND;
2044 
2045 	char* last = strrchr(path, '/');
2046 		// '/' are not allowed in file names!
2047 
2048 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2049 
2050 	if (last == NULL) {
2051 		// this path is single segment with no '/' in it
2052 		// ex. "foo"
2053 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2054 			return B_NAME_TOO_LONG;
2055 
2056 		strcpy(path, ".");
2057 	} else {
2058 		last++;
2059 		if (last[0] == '\0') {
2060 			// special case: the path ends in one or more '/' - remove them
2061 			while (*--last == '/' && last != path);
2062 			last[1] = '\0';
2063 
2064 			if (last == path && last[0] == '/') {
2065 				// This path points to the root of the file system
2066 				strcpy(filename, ".");
2067 				return B_OK;
2068 			}
2069 			for (; last != path && *(last - 1) != '/'; last--);
2070 				// rewind to the start of the leaf before the '/'
2071 		}
2072 
2073 		// normal leaf: replace the leaf portion of the path with a '.'
2074 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2075 			return B_NAME_TOO_LONG;
2076 
2077 		last[0] = '.';
2078 		last[1] = '\0';
2079 	}
2080 	return B_OK;
2081 }
2082 
2083 
2084 static status_t
2085 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2086 	bool traverse, bool kernel, struct vnode** _vnode)
2087 {
2088 	char clonedName[B_FILE_NAME_LENGTH + 1];
2089 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2090 		return B_NAME_TOO_LONG;
2091 
2092 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2093 	struct vnode* directory;
2094 
2095 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2096 	if (status < 0)
2097 		return status;
2098 
2099 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2100 		_vnode, NULL);
2101 }
2102 
2103 
2104 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2105 	and returns the respective vnode.
2106 	On success a reference to the vnode is acquired for the caller.
2107 */
2108 static status_t
2109 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2110 {
2111 	ino_t id;
2112 	bool missing;
2113 
2114 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2115 		return missing ? B_ENTRY_NOT_FOUND
2116 			: get_vnode(dir->device, id, _vnode, true, false);
2117 	}
2118 
2119 	status_t status = FS_CALL(dir, lookup, name, &id);
2120 	if (status != B_OK)
2121 		return status;
2122 
2123 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2124 	// have a reference and just need to look the node up.
2125 	rw_lock_read_lock(&sVnodeLock);
2126 	*_vnode = lookup_vnode(dir->device, id);
2127 	rw_lock_read_unlock(&sVnodeLock);
2128 
2129 	if (*_vnode == NULL) {
2130 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2131 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2132 		return B_ENTRY_NOT_FOUND;
2133 	}
2134 
2135 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2136 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2137 //		(*_vnode)->mount->id, (*_vnode)->id);
2138 
2139 	return B_OK;
2140 }
2141 
2142 
2143 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2144 	\a path must not be NULL.
2145 	If it returns successfully, \a path contains the name of the last path
2146 	component. This function clobbers the buffer pointed to by \a path only
2147 	if it does contain more than one component.
2148 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2149 	it is successful or not!
2150 */
2151 static status_t
2152 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2153 	int count, struct io_context* ioContext, struct vnode** _vnode,
2154 	ino_t* _parentID)
2155 {
2156 	status_t status = B_OK;
2157 	ino_t lastParentID = vnode->id;
2158 
2159 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2160 
2161 	if (path == NULL) {
2162 		put_vnode(vnode);
2163 		return B_BAD_VALUE;
2164 	}
2165 
2166 	if (*path == '\0') {
2167 		put_vnode(vnode);
2168 		return B_ENTRY_NOT_FOUND;
2169 	}
2170 
2171 	while (true) {
2172 		struct vnode* nextVnode;
2173 		char* nextPath;
2174 
2175 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2176 			path));
2177 
2178 		// done?
2179 		if (path[0] == '\0')
2180 			break;
2181 
2182 		// walk to find the next path component ("path" will point to a single
2183 		// path component), and filter out multiple slashes
2184 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2185 				nextPath++);
2186 
2187 		bool directoryFound = false;
2188 		if (*nextPath == '/') {
2189 			directoryFound = true;
2190 			*nextPath = '\0';
2191 			do
2192 				nextPath++;
2193 			while (*nextPath == '/');
2194 		}
2195 
2196 		// See if the '..' is at a covering vnode move to the covered
2197 		// vnode so we pass the '..' path to the underlying filesystem.
2198 		// Also prevent breaking the root of the IO context.
2199 		if (strcmp("..", path) == 0) {
2200 			if (vnode == ioContext->root) {
2201 				// Attempted prison break! Keep it contained.
2202 				path = nextPath;
2203 				continue;
2204 			}
2205 
2206 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2207 				nextVnode = coveredVnode;
2208 				put_vnode(vnode);
2209 				vnode = nextVnode;
2210 			}
2211 		}
2212 
2213 		// check if vnode is really a directory
2214 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2215 			status = B_NOT_A_DIRECTORY;
2216 
2217 		// Check if we have the right to search the current directory vnode.
2218 		// If a file system doesn't have the access() function, we assume that
2219 		// searching a directory is always allowed
2220 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2221 			status = FS_CALL(vnode, access, X_OK);
2222 
2223 		// Tell the filesystem to get the vnode of this path component (if we
2224 		// got the permission from the call above)
2225 		if (status == B_OK)
2226 			status = lookup_dir_entry(vnode, path, &nextVnode);
2227 
2228 		if (status != B_OK) {
2229 			put_vnode(vnode);
2230 			return status;
2231 		}
2232 
2233 		// If the new node is a symbolic link, resolve it (if we've been told
2234 		// to do it)
2235 		if (S_ISLNK(nextVnode->Type())
2236 			&& (traverseLeafLink || directoryFound)) {
2237 			size_t bufferSize;
2238 			char* buffer;
2239 
2240 			TRACE(("traverse link\n"));
2241 
2242 			// it's not exactly nice style using goto in this way, but hey,
2243 			// it works :-/
2244 			if (count + 1 > B_MAX_SYMLINKS) {
2245 				status = B_LINK_LIMIT;
2246 				goto resolve_link_error;
2247 			}
2248 
2249 			bufferSize = B_PATH_NAME_LENGTH;
2250 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2251 			if (buffer == NULL) {
2252 				status = B_NO_MEMORY;
2253 				goto resolve_link_error;
2254 			}
2255 
2256 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2257 				bufferSize--;
2258 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2259 				// null-terminate
2260 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2261 					buffer[bufferSize] = '\0';
2262 			} else
2263 				status = B_BAD_VALUE;
2264 
2265 			if (status != B_OK) {
2266 				free(buffer);
2267 
2268 		resolve_link_error:
2269 				put_vnode(vnode);
2270 				put_vnode(nextVnode);
2271 
2272 				return status;
2273 			}
2274 			put_vnode(nextVnode);
2275 
2276 			// Check if we start from the root directory or the current
2277 			// directory ("vnode" still points to that one).
2278 			// Cut off all leading slashes if it's the root directory
2279 			path = buffer;
2280 			bool absoluteSymlink = false;
2281 			if (path[0] == '/') {
2282 				// we don't need the old directory anymore
2283 				put_vnode(vnode);
2284 
2285 				while (*++path == '/')
2286 					;
2287 
2288 				mutex_lock(&sIOContextRootLock);
2289 				vnode = ioContext->root;
2290 				inc_vnode_ref_count(vnode);
2291 				mutex_unlock(&sIOContextRootLock);
2292 
2293 				absoluteSymlink = true;
2294 			}
2295 
2296 			inc_vnode_ref_count(vnode);
2297 				// balance the next recursion - we will decrement the
2298 				// ref_count of the vnode, no matter if we succeeded or not
2299 
2300 			if (absoluteSymlink && *path == '\0') {
2301 				// symlink was just "/"
2302 				nextVnode = vnode;
2303 			} else {
2304 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2305 					ioContext, &nextVnode, &lastParentID);
2306 			}
2307 
2308 			object_cache_free(sPathNameCache, buffer, 0);
2309 
2310 			if (status != B_OK) {
2311 				put_vnode(vnode);
2312 				return status;
2313 			}
2314 		} else
2315 			lastParentID = vnode->id;
2316 
2317 		// decrease the ref count on the old dir we just looked up into
2318 		put_vnode(vnode);
2319 
2320 		path = nextPath;
2321 		vnode = nextVnode;
2322 
2323 		// see if we hit a covered node
2324 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2325 			put_vnode(vnode);
2326 			vnode = coveringNode;
2327 		}
2328 	}
2329 
2330 	*_vnode = vnode;
2331 	if (_parentID)
2332 		*_parentID = lastParentID;
2333 
2334 	return B_OK;
2335 }
2336 
2337 
2338 static status_t
2339 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2340 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2341 {
2342 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2343 		get_current_io_context(kernel), _vnode, _parentID);
2344 }
2345 
2346 
2347 static status_t
2348 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2349 	ino_t* _parentID, bool kernel)
2350 {
2351 	struct vnode* start = NULL;
2352 
2353 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2354 
2355 	if (!path)
2356 		return B_BAD_VALUE;
2357 
2358 	if (*path == '\0')
2359 		return B_ENTRY_NOT_FOUND;
2360 
2361 	// figure out if we need to start at root or at cwd
2362 	if (*path == '/') {
2363 		if (sRoot == NULL) {
2364 			// we're a bit early, aren't we?
2365 			return B_ERROR;
2366 		}
2367 
2368 		while (*++path == '/')
2369 			;
2370 		start = get_root_vnode(kernel);
2371 
2372 		if (*path == '\0') {
2373 			*_vnode = start;
2374 			return B_OK;
2375 		}
2376 
2377 	} else {
2378 		struct io_context* context = get_current_io_context(kernel);
2379 
2380 		mutex_lock(&context->io_mutex);
2381 		start = context->cwd;
2382 		if (start != NULL)
2383 			inc_vnode_ref_count(start);
2384 		mutex_unlock(&context->io_mutex);
2385 
2386 		if (start == NULL)
2387 			return B_ERROR;
2388 	}
2389 
2390 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2391 		_parentID);
2392 }
2393 
2394 
2395 /*! Returns the vnode in the next to last segment of the path, and returns
2396 	the last portion in filename.
2397 	The path buffer must be able to store at least one additional character.
2398 */
2399 static status_t
2400 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2401 	bool kernel)
2402 {
2403 	status_t status = get_dir_path_and_leaf(path, filename);
2404 	if (status != B_OK)
2405 		return status;
2406 
2407 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2408 }
2409 
2410 
2411 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2412 		   to by a FD + path pair.
2413 
2414 	\a path must be given in either case. \a fd might be omitted, in which
2415 	case \a path is either an absolute path or one relative to the current
2416 	directory. If both a supplied and \a path is relative it is reckoned off
2417 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2418 	ignored.
2419 
2420 	The caller has the responsibility to call put_vnode() on the returned
2421 	directory vnode.
2422 
2423 	\param fd The FD. May be < 0.
2424 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2425 	       is modified by this function. It must have at least room for a
2426 	       string one character longer than the path it contains.
2427 	\param _vnode A pointer to a variable the directory vnode shall be written
2428 		   into.
2429 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2430 		   the leaf name of the specified entry will be written.
2431 	\param kernel \c true, if invoked from inside the kernel, \c false if
2432 		   invoked from userland.
2433 	\return \c B_OK, if everything went fine, another error code otherwise.
2434 */
2435 static status_t
2436 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2437 	char* filename, bool kernel)
2438 {
2439 	if (!path)
2440 		return B_BAD_VALUE;
2441 	if (*path == '\0')
2442 		return B_ENTRY_NOT_FOUND;
2443 	if (fd < 0)
2444 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2445 
2446 	status_t status = get_dir_path_and_leaf(path, filename);
2447 	if (status != B_OK)
2448 		return status;
2449 
2450 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2451 }
2452 
2453 
2454 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2455 		   to by a vnode + path pair.
2456 
2457 	\a path must be given in either case. \a vnode might be omitted, in which
2458 	case \a path is either an absolute path or one relative to the current
2459 	directory. If both a supplied and \a path is relative it is reckoned off
2460 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2461 	ignored.
2462 
2463 	The caller has the responsibility to call put_vnode() on the returned
2464 	directory vnode.
2465 
2466 	\param vnode The vnode. May be \c NULL.
2467 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2468 	       is modified by this function. It must have at least room for a
2469 	       string one character longer than the path it contains.
2470 	\param _vnode A pointer to a variable the directory vnode shall be written
2471 		   into.
2472 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2473 		   the leaf name of the specified entry will be written.
2474 	\param kernel \c true, if invoked from inside the kernel, \c false if
2475 		   invoked from userland.
2476 	\return \c B_OK, if everything went fine, another error code otherwise.
2477 */
2478 static status_t
2479 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2480 	struct vnode** _vnode, char* filename, bool kernel)
2481 {
2482 	if (!path)
2483 		return B_BAD_VALUE;
2484 	if (*path == '\0')
2485 		return B_ENTRY_NOT_FOUND;
2486 	if (vnode == NULL || path[0] == '/')
2487 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2488 
2489 	status_t status = get_dir_path_and_leaf(path, filename);
2490 	if (status != B_OK)
2491 		return status;
2492 
2493 	inc_vnode_ref_count(vnode);
2494 		// vnode_path_to_vnode() always decrements the ref count
2495 
2496 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2497 }
2498 
2499 
2500 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2501 */
2502 static status_t
2503 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2504 	size_t bufferSize, struct io_context* ioContext)
2505 {
2506 	if (bufferSize < sizeof(struct dirent))
2507 		return B_BAD_VALUE;
2508 
2509 	// See if the vnode is covering another vnode and move to the covered
2510 	// vnode so we get the underlying file system
2511 	VNodePutter vnodePutter;
2512 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2513 		vnode = coveredVnode;
2514 		vnodePutter.SetTo(vnode);
2515 	}
2516 
2517 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2518 		// The FS supports getting the name of a vnode.
2519 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2520 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2521 			return B_OK;
2522 	}
2523 
2524 	// The FS doesn't support getting the name of a vnode. So we search the
2525 	// parent directory for the vnode, if the caller let us.
2526 
2527 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2528 		return B_UNSUPPORTED;
2529 
2530 	void* cookie;
2531 
2532 	status_t status = FS_CALL(parent, open_dir, &cookie);
2533 	if (status >= B_OK) {
2534 		while (true) {
2535 			uint32 num = 1;
2536 			// We use the FS hook directly instead of dir_read(), since we don't
2537 			// want the entries to be fixed. We have already resolved vnode to
2538 			// the covered node.
2539 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2540 				&num);
2541 			if (status != B_OK)
2542 				break;
2543 			if (num == 0) {
2544 				status = B_ENTRY_NOT_FOUND;
2545 				break;
2546 			}
2547 
2548 			if (vnode->id == buffer->d_ino) {
2549 				// found correct entry!
2550 				break;
2551 			}
2552 		}
2553 
2554 		FS_CALL(parent, close_dir, cookie);
2555 		FS_CALL(parent, free_dir_cookie, cookie);
2556 	}
2557 	return status;
2558 }
2559 
2560 
2561 static status_t
2562 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2563 	size_t nameSize, bool kernel)
2564 {
2565 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2566 	struct dirent* dirent = (struct dirent*)buffer;
2567 
2568 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2569 		get_current_io_context(kernel));
2570 	if (status != B_OK)
2571 		return status;
2572 
2573 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2574 		return B_BUFFER_OVERFLOW;
2575 
2576 	return B_OK;
2577 }
2578 
2579 
2580 /*!	Gets the full path to a given directory vnode.
2581 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2582 	file system doesn't support this call, it will fall back to iterating
2583 	through the parent directory to get the name of the child.
2584 
2585 	To protect against circular loops, it supports a maximum tree depth
2586 	of 256 levels.
2587 
2588 	Note that the path may not be correct the time this function returns!
2589 	It doesn't use any locking to prevent returning the correct path, as
2590 	paths aren't safe anyway: the path to a file can change at any time.
2591 
2592 	It might be a good idea, though, to check if the returned path exists
2593 	in the calling function (it's not done here because of efficiency)
2594 */
2595 static status_t
2596 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2597 	bool kernel)
2598 {
2599 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2600 
2601 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2602 		return B_BAD_VALUE;
2603 
2604 	if (!S_ISDIR(vnode->Type()))
2605 		return B_NOT_A_DIRECTORY;
2606 
2607 	char* path = buffer;
2608 	int32 insert = bufferSize;
2609 	int32 maxLevel = 256;
2610 	int32 length;
2611 	status_t status = B_OK;
2612 	struct io_context* ioContext = get_current_io_context(kernel);
2613 
2614 	// we don't use get_vnode() here because this call is more
2615 	// efficient and does all we need from get_vnode()
2616 	inc_vnode_ref_count(vnode);
2617 
2618 	path[--insert] = '\0';
2619 		// the path is filled right to left
2620 
2621 	while (true) {
2622 		// If the node is the context's root, bail out. Otherwise resolve mount
2623 		// points.
2624 		if (vnode == ioContext->root)
2625 			break;
2626 
2627 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2628 			put_vnode(vnode);
2629 			vnode = coveredVnode;
2630 		}
2631 
2632 		// lookup the parent vnode
2633 		struct vnode* parentVnode;
2634 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2635 		if (status != B_OK)
2636 			goto out;
2637 
2638 		if (parentVnode == vnode) {
2639 			// The caller apparently got their hands on a node outside of their
2640 			// context's root. Now we've hit the global root.
2641 			put_vnode(parentVnode);
2642 			break;
2643 		}
2644 
2645 		// get the node's name
2646 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2647 			// also used for fs_read_dir()
2648 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2649 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2650 			sizeof(nameBuffer), ioContext);
2651 
2652 		// release the current vnode, we only need its parent from now on
2653 		put_vnode(vnode);
2654 		vnode = parentVnode;
2655 
2656 		if (status != B_OK)
2657 			goto out;
2658 
2659 		// TODO: add an explicit check for loops in about 10 levels to do
2660 		// real loop detection
2661 
2662 		// don't go deeper as 'maxLevel' to prevent circular loops
2663 		if (maxLevel-- < 0) {
2664 			status = B_LINK_LIMIT;
2665 			goto out;
2666 		}
2667 
2668 		// add the name in front of the current path
2669 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2670 		length = strlen(name);
2671 		insert -= length;
2672 		if (insert <= 0) {
2673 			status = B_RESULT_NOT_REPRESENTABLE;
2674 			goto out;
2675 		}
2676 		memcpy(path + insert, name, length);
2677 		path[--insert] = '/';
2678 	}
2679 
2680 	// the root dir will result in an empty path: fix it
2681 	if (path[insert] == '\0')
2682 		path[--insert] = '/';
2683 
2684 	TRACE(("  path is: %s\n", path + insert));
2685 
2686 	// move the path to the start of the buffer
2687 	length = bufferSize - insert;
2688 	memmove(buffer, path + insert, length);
2689 
2690 out:
2691 	put_vnode(vnode);
2692 	return status;
2693 }
2694 
2695 
2696 /*!	Checks the length of every path component, and adds a '.'
2697 	if the path ends in a slash.
2698 	The given path buffer must be able to store at least one
2699 	additional character.
2700 */
2701 static status_t
2702 check_path(char* to)
2703 {
2704 	int32 length = 0;
2705 
2706 	// check length of every path component
2707 
2708 	while (*to) {
2709 		char* begin;
2710 		if (*to == '/')
2711 			to++, length++;
2712 
2713 		begin = to;
2714 		while (*to != '/' && *to)
2715 			to++, length++;
2716 
2717 		if (to - begin > B_FILE_NAME_LENGTH)
2718 			return B_NAME_TOO_LONG;
2719 	}
2720 
2721 	if (length == 0)
2722 		return B_ENTRY_NOT_FOUND;
2723 
2724 	// complete path if there is a slash at the end
2725 
2726 	if (*(to - 1) == '/') {
2727 		if (length > B_PATH_NAME_LENGTH - 2)
2728 			return B_NAME_TOO_LONG;
2729 
2730 		to[0] = '.';
2731 		to[1] = '\0';
2732 	}
2733 
2734 	return B_OK;
2735 }
2736 
2737 
2738 static struct file_descriptor*
2739 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2740 {
2741 	struct file_descriptor* descriptor
2742 		= get_fd(get_current_io_context(kernel), fd);
2743 	if (descriptor == NULL)
2744 		return NULL;
2745 
2746 	struct vnode* vnode = fd_vnode(descriptor);
2747 	if (vnode == NULL) {
2748 		put_fd(descriptor);
2749 		return NULL;
2750 	}
2751 
2752 	// ToDo: when we can close a file descriptor at any point, investigate
2753 	//	if this is still valid to do (accessing the vnode without ref_count
2754 	//	or locking)
2755 	*_vnode = vnode;
2756 	return descriptor;
2757 }
2758 
2759 
2760 static struct vnode*
2761 get_vnode_from_fd(int fd, bool kernel)
2762 {
2763 	struct file_descriptor* descriptor;
2764 	struct vnode* vnode;
2765 
2766 	descriptor = get_fd(get_current_io_context(kernel), fd);
2767 	if (descriptor == NULL)
2768 		return NULL;
2769 
2770 	vnode = fd_vnode(descriptor);
2771 	if (vnode != NULL)
2772 		inc_vnode_ref_count(vnode);
2773 
2774 	put_fd(descriptor);
2775 	return vnode;
2776 }
2777 
2778 
2779 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2780 	only the path will be considered. In this case, the \a path must not be
2781 	NULL.
2782 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2783 	and should be NULL for files.
2784 */
2785 static status_t
2786 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2787 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2788 {
2789 	if (fd < 0 && !path)
2790 		return B_BAD_VALUE;
2791 
2792 	if (path != NULL && *path == '\0')
2793 		return B_ENTRY_NOT_FOUND;
2794 
2795 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2796 		// no FD or absolute path
2797 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2798 	}
2799 
2800 	// FD only, or FD + relative path
2801 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2802 	if (vnode == NULL)
2803 		return B_FILE_ERROR;
2804 
2805 	if (path != NULL) {
2806 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2807 			_vnode, _parentID);
2808 	}
2809 
2810 	// there is no relative path to take into account
2811 
2812 	*_vnode = vnode;
2813 	if (_parentID)
2814 		*_parentID = -1;
2815 
2816 	return B_OK;
2817 }
2818 
2819 
2820 static int
2821 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2822 	void* cookie, int openMode, bool kernel)
2823 {
2824 	struct file_descriptor* descriptor;
2825 	int fd;
2826 
2827 	// If the vnode is locked, we don't allow creating a new file/directory
2828 	// file_descriptor for it
2829 	if (vnode && vnode->mandatory_locked_by != NULL
2830 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2831 		return B_BUSY;
2832 
2833 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2834 		return B_BAD_VALUE;
2835 
2836 	descriptor = alloc_fd();
2837 	if (!descriptor)
2838 		return B_NO_MEMORY;
2839 
2840 	if (vnode)
2841 		descriptor->u.vnode = vnode;
2842 	else
2843 		descriptor->u.mount = mount;
2844 	descriptor->cookie = cookie;
2845 
2846 	switch (type) {
2847 		// vnode types
2848 		case FDTYPE_FILE:
2849 			descriptor->ops = &sFileOps;
2850 			break;
2851 		case FDTYPE_DIR:
2852 			descriptor->ops = &sDirectoryOps;
2853 			break;
2854 		case FDTYPE_ATTR:
2855 			descriptor->ops = &sAttributeOps;
2856 			break;
2857 		case FDTYPE_ATTR_DIR:
2858 			descriptor->ops = &sAttributeDirectoryOps;
2859 			break;
2860 
2861 		// mount types
2862 		case FDTYPE_INDEX_DIR:
2863 			descriptor->ops = &sIndexDirectoryOps;
2864 			break;
2865 		case FDTYPE_QUERY:
2866 			descriptor->ops = &sQueryOps;
2867 			break;
2868 
2869 		default:
2870 			panic("get_new_fd() called with unknown type %d\n", type);
2871 			break;
2872 	}
2873 	descriptor->type = type;
2874 	descriptor->open_mode = openMode;
2875 
2876 	io_context* context = get_current_io_context(kernel);
2877 	fd = new_fd(context, descriptor);
2878 	if (fd < 0) {
2879 		descriptor->ops = NULL;
2880 		put_fd(descriptor);
2881 		return B_NO_MORE_FDS;
2882 	}
2883 
2884 	mutex_lock(&context->io_mutex);
2885 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2886 	mutex_unlock(&context->io_mutex);
2887 
2888 	return fd;
2889 }
2890 
2891 
2892 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2893 	vfs_normalize_path(). See there for more documentation.
2894 */
2895 static status_t
2896 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2897 {
2898 	VNodePutter dirPutter;
2899 	struct vnode* dir = NULL;
2900 	status_t error;
2901 
2902 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2903 		// get dir vnode + leaf name
2904 		struct vnode* nextDir;
2905 		char leaf[B_FILE_NAME_LENGTH];
2906 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2907 		if (error != B_OK)
2908 			return error;
2909 
2910 		dir = nextDir;
2911 		strcpy(path, leaf);
2912 		dirPutter.SetTo(dir);
2913 
2914 		// get file vnode, if we shall resolve links
2915 		bool fileExists = false;
2916 		struct vnode* fileVnode;
2917 		VNodePutter fileVnodePutter;
2918 		if (traverseLink) {
2919 			inc_vnode_ref_count(dir);
2920 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2921 					NULL) == B_OK) {
2922 				fileVnodePutter.SetTo(fileVnode);
2923 				fileExists = true;
2924 			}
2925 		}
2926 
2927 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2928 			// we're done -- construct the path
2929 			bool hasLeaf = true;
2930 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2931 				// special cases "." and ".." -- get the dir, forget the leaf
2932 				inc_vnode_ref_count(dir);
2933 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2934 					&nextDir, NULL);
2935 				if (error != B_OK)
2936 					return error;
2937 				dir = nextDir;
2938 				dirPutter.SetTo(dir);
2939 				hasLeaf = false;
2940 			}
2941 
2942 			// get the directory path
2943 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2944 			if (error != B_OK)
2945 				return error;
2946 
2947 			// append the leaf name
2948 			if (hasLeaf) {
2949 				// insert a directory separator if this is not the file system
2950 				// root
2951 				if ((strcmp(path, "/") != 0
2952 					&& strlcat(path, "/", pathSize) >= pathSize)
2953 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2954 					return B_NAME_TOO_LONG;
2955 				}
2956 			}
2957 
2958 			return B_OK;
2959 		}
2960 
2961 		// read link
2962 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2963 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2964 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2965 			if (error != B_OK)
2966 				return error;
2967 			if (bufferSize < B_PATH_NAME_LENGTH)
2968 				path[bufferSize] = '\0';
2969 		} else
2970 			return B_BAD_VALUE;
2971 	}
2972 
2973 	return B_LINK_LIMIT;
2974 }
2975 
2976 
2977 static status_t
2978 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2979 	struct io_context* ioContext)
2980 {
2981 	// Make sure the IO context root is not bypassed.
2982 	if (parent == ioContext->root) {
2983 		*_device = parent->device;
2984 		*_node = parent->id;
2985 		return B_OK;
2986 	}
2987 
2988 	inc_vnode_ref_count(parent);
2989 		// vnode_path_to_vnode() puts the node
2990 
2991 	// ".." is guaranteed not to be clobbered by this call
2992 	struct vnode* vnode;
2993 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2994 		ioContext, &vnode, NULL);
2995 	if (status == B_OK) {
2996 		*_device = vnode->device;
2997 		*_node = vnode->id;
2998 		put_vnode(vnode);
2999 	}
3000 
3001 	return status;
3002 }
3003 
3004 
3005 #ifdef ADD_DEBUGGER_COMMANDS
3006 
3007 
3008 static void
3009 _dump_advisory_locking(advisory_locking* locking)
3010 {
3011 	if (locking == NULL)
3012 		return;
3013 
3014 	kprintf("   lock:        %" B_PRId32, locking->lock);
3015 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
3016 
3017 	int32 index = 0;
3018 	LockList::Iterator iterator = locking->locks.GetIterator();
3019 	while (iterator.HasNext()) {
3020 		struct advisory_lock* lock = iterator.Next();
3021 
3022 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3023 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3024 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3025 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3026 	}
3027 }
3028 
3029 
3030 static void
3031 _dump_mount(struct fs_mount* mount)
3032 {
3033 	kprintf("MOUNT: %p\n", mount);
3034 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3035 	kprintf(" device_name:   %s\n", mount->device_name);
3036 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3037 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3038 	kprintf(" partition:     %p\n", mount->partition);
3039 	kprintf(" lock:          %p\n", &mount->lock);
3040 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3041 		mount->owns_file_device ? " owns_file_device" : "");
3042 
3043 	fs_volume* volume = mount->volume;
3044 	while (volume != NULL) {
3045 		kprintf(" volume %p:\n", volume);
3046 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3047 		kprintf("  private_volume:   %p\n", volume->private_volume);
3048 		kprintf("  ops:              %p\n", volume->ops);
3049 		kprintf("  file_system:      %p\n", volume->file_system);
3050 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3051 		volume = volume->super_volume;
3052 	}
3053 
3054 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3055 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3056 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3057 	set_debug_variable("_partition", (addr_t)mount->partition);
3058 }
3059 
3060 
3061 static bool
3062 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3063 	const char* name)
3064 {
3065 	bool insertSlash = buffer[bufferSize] != '\0';
3066 	size_t nameLength = strlen(name);
3067 
3068 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3069 		return false;
3070 
3071 	if (insertSlash)
3072 		buffer[--bufferSize] = '/';
3073 
3074 	bufferSize -= nameLength;
3075 	memcpy(buffer + bufferSize, name, nameLength);
3076 
3077 	return true;
3078 }
3079 
3080 
3081 static bool
3082 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3083 	ino_t nodeID)
3084 {
3085 	if (bufferSize == 0)
3086 		return false;
3087 
3088 	bool insertSlash = buffer[bufferSize] != '\0';
3089 	if (insertSlash)
3090 		buffer[--bufferSize] = '/';
3091 
3092 	size_t size = snprintf(buffer, bufferSize,
3093 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3094 	if (size > bufferSize) {
3095 		if (insertSlash)
3096 			bufferSize++;
3097 		return false;
3098 	}
3099 
3100 	if (size < bufferSize)
3101 		memmove(buffer + bufferSize - size, buffer, size);
3102 
3103 	bufferSize -= size;
3104 	return true;
3105 }
3106 
3107 
3108 static char*
3109 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3110 	bool& _truncated)
3111 {
3112 	// null-terminate the path
3113 	buffer[--bufferSize] = '\0';
3114 
3115 	while (true) {
3116 		while (vnode->covers != NULL)
3117 			vnode = vnode->covers;
3118 
3119 		if (vnode == sRoot) {
3120 			_truncated = bufferSize == 0;
3121 			if (!_truncated)
3122 				buffer[--bufferSize] = '/';
3123 			return buffer + bufferSize;
3124 		}
3125 
3126 		// resolve the name
3127 		ino_t dirID;
3128 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3129 			vnode->id, dirID);
3130 		if (name == NULL) {
3131 			// Failed to resolve the name -- prepend "<dev,node>/".
3132 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3133 				vnode->mount->id, vnode->id);
3134 			return buffer + bufferSize;
3135 		}
3136 
3137 		// prepend the name
3138 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3139 			_truncated = true;
3140 			return buffer + bufferSize;
3141 		}
3142 
3143 		// resolve the directory node
3144 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3145 		if (nextVnode == NULL) {
3146 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3147 				vnode->mount->id, dirID);
3148 			return buffer + bufferSize;
3149 		}
3150 
3151 		vnode = nextVnode;
3152 	}
3153 }
3154 
3155 
3156 static void
3157 _dump_vnode(struct vnode* vnode, bool printPath)
3158 {
3159 	kprintf("VNODE: %p\n", vnode);
3160 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3161 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3162 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3163 	kprintf(" private_node:  %p\n", vnode->private_node);
3164 	kprintf(" mount:         %p\n", vnode->mount);
3165 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3166 	kprintf(" covers:        %p\n", vnode->covers);
3167 	kprintf(" cache:         %p\n", vnode->cache);
3168 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3169 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3170 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3171 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3172 
3173 	_dump_advisory_locking(vnode->advisory_locking);
3174 
3175 	if (printPath) {
3176 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3177 		if (buffer != NULL) {
3178 			bool truncated;
3179 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3180 				B_PATH_NAME_LENGTH, truncated);
3181 			if (path != NULL) {
3182 				kprintf(" path:          ");
3183 				if (truncated)
3184 					kputs("<truncated>/");
3185 				kputs(path);
3186 				kputs("\n");
3187 			} else
3188 				kprintf("Failed to resolve vnode path.\n");
3189 
3190 			debug_free(buffer);
3191 		} else
3192 			kprintf("Failed to allocate memory for constructing the path.\n");
3193 	}
3194 
3195 	set_debug_variable("_node", (addr_t)vnode->private_node);
3196 	set_debug_variable("_mount", (addr_t)vnode->mount);
3197 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3198 	set_debug_variable("_covers", (addr_t)vnode->covers);
3199 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3200 }
3201 
3202 
3203 static int
3204 dump_mount(int argc, char** argv)
3205 {
3206 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3207 		kprintf("usage: %s [id|address]\n", argv[0]);
3208 		return 0;
3209 	}
3210 
3211 	ulong val = parse_expression(argv[1]);
3212 	uint32 id = val;
3213 
3214 	struct fs_mount* mount = sMountsTable->Lookup(id);
3215 	if (mount == NULL) {
3216 		if (IS_USER_ADDRESS(id)) {
3217 			kprintf("fs_mount not found\n");
3218 			return 0;
3219 		}
3220 		mount = (fs_mount*)val;
3221 	}
3222 
3223 	_dump_mount(mount);
3224 	return 0;
3225 }
3226 
3227 
3228 static int
3229 dump_mounts(int argc, char** argv)
3230 {
3231 	if (argc != 1) {
3232 		kprintf("usage: %s\n", argv[0]);
3233 		return 0;
3234 	}
3235 
3236 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3237 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3238 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3239 
3240 	struct fs_mount* mount;
3241 
3242 	MountTable::Iterator iterator(sMountsTable);
3243 	while (iterator.HasNext()) {
3244 		mount = iterator.Next();
3245 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3246 			mount->root_vnode->covers, mount->volume->private_volume,
3247 			mount->volume->file_system_name);
3248 
3249 		fs_volume* volume = mount->volume;
3250 		while (volume->super_volume != NULL) {
3251 			volume = volume->super_volume;
3252 			kprintf("                                     %p %s\n",
3253 				volume->private_volume, volume->file_system_name);
3254 		}
3255 	}
3256 
3257 	return 0;
3258 }
3259 
3260 
3261 static int
3262 dump_vnode(int argc, char** argv)
3263 {
3264 	bool printPath = false;
3265 	int argi = 1;
3266 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3267 		printPath = true;
3268 		argi++;
3269 	}
3270 
3271 	if (argi >= argc || argi + 2 < argc) {
3272 		print_debugger_command_usage(argv[0]);
3273 		return 0;
3274 	}
3275 
3276 	struct vnode* vnode = NULL;
3277 
3278 	if (argi + 1 == argc) {
3279 		vnode = (struct vnode*)parse_expression(argv[argi]);
3280 		if (IS_USER_ADDRESS(vnode)) {
3281 			kprintf("invalid vnode address\n");
3282 			return 0;
3283 		}
3284 		_dump_vnode(vnode, printPath);
3285 		return 0;
3286 	}
3287 
3288 	dev_t device = parse_expression(argv[argi]);
3289 	ino_t id = parse_expression(argv[argi + 1]);
3290 
3291 	VnodeTable::Iterator iterator(sVnodeTable);
3292 	while (iterator.HasNext()) {
3293 		vnode = iterator.Next();
3294 		if (vnode->id != id || vnode->device != device)
3295 			continue;
3296 
3297 		_dump_vnode(vnode, printPath);
3298 	}
3299 
3300 	return 0;
3301 }
3302 
3303 
3304 static int
3305 dump_vnodes(int argc, char** argv)
3306 {
3307 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3308 		kprintf("usage: %s [device]\n", argv[0]);
3309 		return 0;
3310 	}
3311 
3312 	// restrict dumped nodes to a certain device if requested
3313 	dev_t device = parse_expression(argv[1]);
3314 
3315 	struct vnode* vnode;
3316 
3317 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3318 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3319 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3320 
3321 	VnodeTable::Iterator iterator(sVnodeTable);
3322 	while (iterator.HasNext()) {
3323 		vnode = iterator.Next();
3324 		if (vnode->device != device)
3325 			continue;
3326 
3327 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3328 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3329 			vnode->private_node, vnode->advisory_locking,
3330 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3331 			vnode->IsUnpublished() ? "u" : "-");
3332 	}
3333 
3334 	return 0;
3335 }
3336 
3337 
3338 static int
3339 dump_vnode_caches(int argc, char** argv)
3340 {
3341 	struct vnode* vnode;
3342 
3343 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3344 		kprintf("usage: %s [device]\n", argv[0]);
3345 		return 0;
3346 	}
3347 
3348 	// restrict dumped nodes to a certain device if requested
3349 	dev_t device = -1;
3350 	if (argc > 1)
3351 		device = parse_expression(argv[1]);
3352 
3353 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3354 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3355 
3356 	VnodeTable::Iterator iterator(sVnodeTable);
3357 	while (iterator.HasNext()) {
3358 		vnode = iterator.Next();
3359 		if (vnode->cache == NULL)
3360 			continue;
3361 		if (device != -1 && vnode->device != device)
3362 			continue;
3363 
3364 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3365 			vnode, vnode->device, vnode->id, vnode->cache,
3366 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3367 			vnode->cache->page_count);
3368 	}
3369 
3370 	return 0;
3371 }
3372 
3373 
3374 int
3375 dump_io_context(int argc, char** argv)
3376 {
3377 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3378 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3379 		return 0;
3380 	}
3381 
3382 	struct io_context* context = NULL;
3383 
3384 	if (argc > 1) {
3385 		ulong num = parse_expression(argv[1]);
3386 		if (IS_KERNEL_ADDRESS(num))
3387 			context = (struct io_context*)num;
3388 		else {
3389 			Team* team = team_get_team_struct_locked(num);
3390 			if (team == NULL) {
3391 				kprintf("could not find team with ID %lu\n", num);
3392 				return 0;
3393 			}
3394 			context = (struct io_context*)team->io_context;
3395 		}
3396 	} else
3397 		context = get_current_io_context(true);
3398 
3399 	kprintf("I/O CONTEXT: %p\n", context);
3400 	kprintf(" root vnode:\t%p\n", context->root);
3401 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3402 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3403 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3404 
3405 	if (context->num_used_fds) {
3406 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3407 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3408 	}
3409 
3410 	for (uint32 i = 0; i < context->table_size; i++) {
3411 		struct file_descriptor* fd = context->fds[i];
3412 		if (fd == NULL)
3413 			continue;
3414 
3415 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3416 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3417 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3418 			fd->pos, fd->cookie,
3419 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3420 				? "mount" : "vnode",
3421 			fd->u.vnode);
3422 	}
3423 
3424 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3425 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3426 
3427 	set_debug_variable("_cwd", (addr_t)context->cwd);
3428 
3429 	return 0;
3430 }
3431 
3432 
3433 int
3434 dump_vnode_usage(int argc, char** argv)
3435 {
3436 	if (argc != 1) {
3437 		kprintf("usage: %s\n", argv[0]);
3438 		return 0;
3439 	}
3440 
3441 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3442 		sUnusedVnodes, kMaxUnusedVnodes);
3443 
3444 	uint32 count = sVnodeTable->CountElements();
3445 
3446 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3447 		count - sUnusedVnodes);
3448 	return 0;
3449 }
3450 
3451 #endif	// ADD_DEBUGGER_COMMANDS
3452 
3453 
3454 /*!	Clears memory specified by an iovec array.
3455 */
3456 static void
3457 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3458 {
3459 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3460 		size_t length = std::min(vecs[i].iov_len, bytes);
3461 		memset(vecs[i].iov_base, 0, length);
3462 		bytes -= length;
3463 	}
3464 }
3465 
3466 
3467 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3468 	and calls the file system hooks to read/write the request to disk.
3469 */
3470 static status_t
3471 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3472 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3473 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3474 	bool doWrite)
3475 {
3476 	if (fileVecCount == 0) {
3477 		// There are no file vecs at this offset, so we're obviously trying
3478 		// to access the file outside of its bounds
3479 		return B_BAD_VALUE;
3480 	}
3481 
3482 	size_t numBytes = *_numBytes;
3483 	uint32 fileVecIndex;
3484 	size_t vecOffset = *_vecOffset;
3485 	uint32 vecIndex = *_vecIndex;
3486 	status_t status;
3487 	size_t size;
3488 
3489 	if (!doWrite && vecOffset == 0) {
3490 		// now directly read the data from the device
3491 		// the first file_io_vec can be read directly
3492 
3493 		if (fileVecs[0].length < (off_t)numBytes)
3494 			size = fileVecs[0].length;
3495 		else
3496 			size = numBytes;
3497 
3498 		if (fileVecs[0].offset >= 0) {
3499 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3500 				&vecs[vecIndex], vecCount - vecIndex, &size);
3501 		} else {
3502 			// sparse read
3503 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3504 			status = B_OK;
3505 		}
3506 		if (status != B_OK)
3507 			return status;
3508 
3509 		// TODO: this is a work-around for buggy device drivers!
3510 		//	When our own drivers honour the length, we can:
3511 		//	a) also use this direct I/O for writes (otherwise, it would
3512 		//	   overwrite precious data)
3513 		//	b) panic if the term below is true (at least for writes)
3514 		if ((off_t)size > fileVecs[0].length) {
3515 			//dprintf("warning: device driver %p doesn't respect total length "
3516 			//	"in read_pages() call!\n", ref->device);
3517 			size = fileVecs[0].length;
3518 		}
3519 
3520 		ASSERT((off_t)size <= fileVecs[0].length);
3521 
3522 		// If the file portion was contiguous, we're already done now
3523 		if (size == numBytes)
3524 			return B_OK;
3525 
3526 		// if we reached the end of the file, we can return as well
3527 		if ((off_t)size != fileVecs[0].length) {
3528 			*_numBytes = size;
3529 			return B_OK;
3530 		}
3531 
3532 		fileVecIndex = 1;
3533 
3534 		// first, find out where we have to continue in our iovecs
3535 		for (; vecIndex < vecCount; vecIndex++) {
3536 			if (size < vecs[vecIndex].iov_len)
3537 				break;
3538 
3539 			size -= vecs[vecIndex].iov_len;
3540 		}
3541 
3542 		vecOffset = size;
3543 	} else {
3544 		fileVecIndex = 0;
3545 		size = 0;
3546 	}
3547 
3548 	// Too bad, let's process the rest of the file_io_vecs
3549 
3550 	size_t totalSize = size;
3551 	size_t bytesLeft = numBytes - size;
3552 
3553 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3554 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3555 		off_t fileOffset = fileVec.offset;
3556 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3557 
3558 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3559 			fileLeft));
3560 
3561 		// process the complete fileVec
3562 		while (fileLeft > 0) {
3563 			iovec tempVecs[MAX_TEMP_IO_VECS];
3564 			uint32 tempCount = 0;
3565 
3566 			// size tracks how much of what is left of the current fileVec
3567 			// (fileLeft) has been assigned to tempVecs
3568 			size = 0;
3569 
3570 			// assign what is left of the current fileVec to the tempVecs
3571 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3572 					&& tempCount < MAX_TEMP_IO_VECS;) {
3573 				// try to satisfy one iovec per iteration (or as much as
3574 				// possible)
3575 
3576 				// bytes left of the current iovec
3577 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3578 				if (vecLeft == 0) {
3579 					vecOffset = 0;
3580 					vecIndex++;
3581 					continue;
3582 				}
3583 
3584 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3585 					vecIndex, vecOffset, size));
3586 
3587 				// actually available bytes
3588 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3589 
3590 				tempVecs[tempCount].iov_base
3591 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3592 				tempVecs[tempCount].iov_len = tempVecSize;
3593 				tempCount++;
3594 
3595 				size += tempVecSize;
3596 				vecOffset += tempVecSize;
3597 			}
3598 
3599 			size_t bytes = size;
3600 
3601 			if (fileOffset == -1) {
3602 				if (doWrite) {
3603 					panic("sparse write attempt: vnode %p", vnode);
3604 					status = B_IO_ERROR;
3605 				} else {
3606 					// sparse read
3607 					zero_iovecs(tempVecs, tempCount, bytes);
3608 					status = B_OK;
3609 				}
3610 			} else if (doWrite) {
3611 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3612 					tempVecs, tempCount, &bytes);
3613 			} else {
3614 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3615 					tempVecs, tempCount, &bytes);
3616 			}
3617 			if (status != B_OK)
3618 				return status;
3619 
3620 			totalSize += bytes;
3621 			bytesLeft -= size;
3622 			if (fileOffset >= 0)
3623 				fileOffset += size;
3624 			fileLeft -= size;
3625 			//dprintf("-> file left = %Lu\n", fileLeft);
3626 
3627 			if (size != bytes || vecIndex >= vecCount) {
3628 				// there are no more bytes or iovecs, let's bail out
3629 				*_numBytes = totalSize;
3630 				return B_OK;
3631 			}
3632 		}
3633 	}
3634 
3635 	*_vecIndex = vecIndex;
3636 	*_vecOffset = vecOffset;
3637 	*_numBytes = totalSize;
3638 	return B_OK;
3639 }
3640 
3641 
3642 static bool
3643 is_user_in_group(gid_t gid)
3644 {
3645 	if (gid == getegid())
3646 		return true;
3647 
3648 	gid_t groups[NGROUPS_MAX];
3649 	int groupCount = getgroups(NGROUPS_MAX, groups);
3650 	for (int i = 0; i < groupCount; i++) {
3651 		if (gid == groups[i])
3652 			return true;
3653 	}
3654 
3655 	return false;
3656 }
3657 
3658 
3659 static status_t
3660 free_io_context(io_context* context)
3661 {
3662 	uint32 i;
3663 
3664 	TIOC(FreeIOContext(context));
3665 
3666 	if (context->root)
3667 		put_vnode(context->root);
3668 
3669 	if (context->cwd)
3670 		put_vnode(context->cwd);
3671 
3672 	mutex_lock(&context->io_mutex);
3673 
3674 	for (i = 0; i < context->table_size; i++) {
3675 		if (struct file_descriptor* descriptor = context->fds[i]) {
3676 			close_fd(context, descriptor);
3677 			put_fd(descriptor);
3678 		}
3679 	}
3680 
3681 	mutex_destroy(&context->io_mutex);
3682 
3683 	remove_node_monitors(context);
3684 	free(context->fds);
3685 	free(context);
3686 
3687 	return B_OK;
3688 }
3689 
3690 
3691 static status_t
3692 resize_monitor_table(struct io_context* context, const int newSize)
3693 {
3694 	int	status = B_OK;
3695 
3696 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3697 		return B_BAD_VALUE;
3698 
3699 	mutex_lock(&context->io_mutex);
3700 
3701 	if ((size_t)newSize < context->num_monitors) {
3702 		status = B_BUSY;
3703 		goto out;
3704 	}
3705 	context->max_monitors = newSize;
3706 
3707 out:
3708 	mutex_unlock(&context->io_mutex);
3709 	return status;
3710 }
3711 
3712 
3713 //	#pragma mark - public API for file systems
3714 
3715 
3716 extern "C" status_t
3717 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3718 	fs_vnode_ops* ops)
3719 {
3720 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3721 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3722 
3723 	if (privateNode == NULL)
3724 		return B_BAD_VALUE;
3725 
3726 	int32 tries = BUSY_VNODE_RETRIES;
3727 restart:
3728 	// create the node
3729 	bool nodeCreated;
3730 	struct vnode* vnode;
3731 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3732 		nodeCreated);
3733 	if (status != B_OK)
3734 		return status;
3735 
3736 	WriteLocker nodeLocker(sVnodeLock, true);
3737 		// create_new_vnode_and_lock() has locked for us
3738 
3739 	if (!nodeCreated && vnode->IsBusy()) {
3740 		nodeLocker.Unlock();
3741 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3742 			return B_BUSY;
3743 		goto restart;
3744 	}
3745 
3746 	// file system integrity check:
3747 	// test if the vnode already exists and bail out if this is the case!
3748 	if (!nodeCreated) {
3749 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3750 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3751 			vnode->private_node);
3752 		return B_ERROR;
3753 	}
3754 
3755 	vnode->private_node = privateNode;
3756 	vnode->ops = ops;
3757 	vnode->SetUnpublished(true);
3758 
3759 	TRACE(("returns: %s\n", strerror(status)));
3760 
3761 	return status;
3762 }
3763 
3764 
3765 extern "C" status_t
3766 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3767 	fs_vnode_ops* ops, int type, uint32 flags)
3768 {
3769 	FUNCTION(("publish_vnode()\n"));
3770 
3771 	int32 tries = BUSY_VNODE_RETRIES;
3772 restart:
3773 	WriteLocker locker(sVnodeLock);
3774 
3775 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3776 
3777 	bool nodeCreated = false;
3778 	if (vnode == NULL) {
3779 		if (privateNode == NULL)
3780 			return B_BAD_VALUE;
3781 
3782 		// create the node
3783 		locker.Unlock();
3784 			// create_new_vnode_and_lock() will re-lock for us on success
3785 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3786 			nodeCreated);
3787 		if (status != B_OK)
3788 			return status;
3789 
3790 		locker.SetTo(sVnodeLock, true);
3791 	}
3792 
3793 	if (nodeCreated) {
3794 		vnode->private_node = privateNode;
3795 		vnode->ops = ops;
3796 		vnode->SetUnpublished(true);
3797 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3798 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3799 		// already known, but not published
3800 	} else if (vnode->IsBusy()) {
3801 		locker.Unlock();
3802 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3803 			return B_BUSY;
3804 		goto restart;
3805 	} else
3806 		return B_BAD_VALUE;
3807 
3808 	bool publishSpecialSubNode = false;
3809 
3810 	vnode->SetType(type);
3811 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3812 	publishSpecialSubNode = is_special_node_type(type)
3813 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3814 
3815 	status_t status = B_OK;
3816 
3817 	// create sub vnodes, if necessary
3818 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3819 		locker.Unlock();
3820 
3821 		fs_volume* subVolume = volume;
3822 		if (volume->sub_volume != NULL) {
3823 			while (status == B_OK && subVolume->sub_volume != NULL) {
3824 				subVolume = subVolume->sub_volume;
3825 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3826 					vnode);
3827 			}
3828 		}
3829 
3830 		if (status == B_OK && publishSpecialSubNode)
3831 			status = create_special_sub_node(vnode, flags);
3832 
3833 		if (status != B_OK) {
3834 			// error -- clean up the created sub vnodes
3835 			while (subVolume->super_volume != volume) {
3836 				subVolume = subVolume->super_volume;
3837 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3838 			}
3839 		}
3840 
3841 		if (status == B_OK) {
3842 			ReadLocker vnodesReadLocker(sVnodeLock);
3843 			AutoLocker<Vnode> nodeLocker(vnode);
3844 			vnode->SetBusy(false);
3845 			vnode->SetUnpublished(false);
3846 		} else {
3847 			locker.Lock();
3848 			sVnodeTable->Remove(vnode);
3849 			remove_vnode_from_mount_list(vnode, vnode->mount);
3850 			object_cache_free(sVnodeCache, vnode, 0);
3851 		}
3852 	} else {
3853 		// we still hold the write lock -- mark the node unbusy and published
3854 		vnode->SetBusy(false);
3855 		vnode->SetUnpublished(false);
3856 	}
3857 
3858 	TRACE(("returns: %s\n", strerror(status)));
3859 
3860 	return status;
3861 }
3862 
3863 
3864 extern "C" status_t
3865 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3866 {
3867 	struct vnode* vnode;
3868 
3869 	if (volume == NULL)
3870 		return B_BAD_VALUE;
3871 
3872 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3873 	if (status != B_OK)
3874 		return status;
3875 
3876 	// If this is a layered FS, we need to get the node cookie for the requested
3877 	// layer.
3878 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3879 		fs_vnode resolvedNode;
3880 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3881 			&resolvedNode);
3882 		if (status != B_OK) {
3883 			panic("get_vnode(): Failed to get super node for vnode %p, "
3884 				"volume: %p", vnode, volume);
3885 			put_vnode(vnode);
3886 			return status;
3887 		}
3888 
3889 		if (_privateNode != NULL)
3890 			*_privateNode = resolvedNode.private_node;
3891 	} else if (_privateNode != NULL)
3892 		*_privateNode = vnode->private_node;
3893 
3894 	return B_OK;
3895 }
3896 
3897 
3898 extern "C" status_t
3899 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3900 {
3901 	ReadLocker nodeLocker(sVnodeLock);
3902 
3903 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3904 	if (vnode == NULL)
3905 		return B_BAD_VALUE;
3906 
3907 	inc_vnode_ref_count(vnode);
3908 	return B_OK;
3909 }
3910 
3911 
3912 extern "C" status_t
3913 put_vnode(fs_volume* volume, ino_t vnodeID)
3914 {
3915 	struct vnode* vnode;
3916 
3917 	rw_lock_read_lock(&sVnodeLock);
3918 	vnode = lookup_vnode(volume->id, vnodeID);
3919 	rw_lock_read_unlock(&sVnodeLock);
3920 
3921 	if (vnode == NULL)
3922 		return B_BAD_VALUE;
3923 
3924 	dec_vnode_ref_count(vnode, false, true);
3925 	return B_OK;
3926 }
3927 
3928 
3929 extern "C" status_t
3930 remove_vnode(fs_volume* volume, ino_t vnodeID)
3931 {
3932 	ReadLocker locker(sVnodeLock);
3933 
3934 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3935 	if (vnode == NULL)
3936 		return B_ENTRY_NOT_FOUND;
3937 
3938 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3939 		// this vnode is in use
3940 		return B_BUSY;
3941 	}
3942 
3943 	vnode->Lock();
3944 
3945 	vnode->SetRemoved(true);
3946 	bool removeUnpublished = false;
3947 
3948 	if (vnode->IsUnpublished()) {
3949 		// prepare the vnode for deletion
3950 		removeUnpublished = true;
3951 		vnode->SetBusy(true);
3952 	}
3953 
3954 	vnode->Unlock();
3955 	locker.Unlock();
3956 
3957 	if (removeUnpublished) {
3958 		// If the vnode hasn't been published yet, we delete it here
3959 		atomic_add(&vnode->ref_count, -1);
3960 		free_vnode(vnode, true);
3961 	}
3962 
3963 	return B_OK;
3964 }
3965 
3966 
3967 extern "C" status_t
3968 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3969 {
3970 	struct vnode* vnode;
3971 
3972 	rw_lock_read_lock(&sVnodeLock);
3973 
3974 	vnode = lookup_vnode(volume->id, vnodeID);
3975 	if (vnode) {
3976 		AutoLocker<Vnode> nodeLocker(vnode);
3977 		vnode->SetRemoved(false);
3978 	}
3979 
3980 	rw_lock_read_unlock(&sVnodeLock);
3981 	return B_OK;
3982 }
3983 
3984 
3985 extern "C" status_t
3986 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3987 {
3988 	ReadLocker _(sVnodeLock);
3989 
3990 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3991 		if (_removed != NULL)
3992 			*_removed = vnode->IsRemoved();
3993 		return B_OK;
3994 	}
3995 
3996 	return B_BAD_VALUE;
3997 }
3998 
3999 
4000 extern "C" fs_volume*
4001 volume_for_vnode(fs_vnode* _vnode)
4002 {
4003 	if (_vnode == NULL)
4004 		return NULL;
4005 
4006 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
4007 	return vnode->mount->volume;
4008 }
4009 
4010 
4011 extern "C" status_t
4012 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
4013 	uid_t nodeUserID)
4014 {
4015 	// get node permissions
4016 	int userPermissions = (mode & S_IRWXU) >> 6;
4017 	int groupPermissions = (mode & S_IRWXG) >> 3;
4018 	int otherPermissions = mode & S_IRWXO;
4019 
4020 	// get the node permissions for this uid/gid
4021 	int permissions = 0;
4022 	uid_t uid = geteuid();
4023 
4024 	if (uid == 0) {
4025 		// user is root
4026 		// root has always read/write permission, but at least one of the
4027 		// X bits must be set for execute permission
4028 		permissions = userPermissions | groupPermissions | otherPermissions
4029 			| S_IROTH | S_IWOTH;
4030 		if (S_ISDIR(mode))
4031 			permissions |= S_IXOTH;
4032 	} else if (uid == nodeUserID) {
4033 		// user is node owner
4034 		permissions = userPermissions;
4035 	} else if (is_user_in_group(nodeGroupID)) {
4036 		// user is in owning group
4037 		permissions = groupPermissions;
4038 	} else {
4039 		// user is one of the others
4040 		permissions = otherPermissions;
4041 	}
4042 
4043 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4044 }
4045 
4046 
4047 #if 0
4048 extern "C" status_t
4049 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4050 	size_t* _numBytes)
4051 {
4052 	struct file_descriptor* descriptor;
4053 	struct vnode* vnode;
4054 
4055 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4056 	if (descriptor == NULL)
4057 		return B_FILE_ERROR;
4058 
4059 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4060 		count, 0, _numBytes);
4061 
4062 	put_fd(descriptor);
4063 	return status;
4064 }
4065 
4066 
4067 extern "C" status_t
4068 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4069 	size_t* _numBytes)
4070 {
4071 	struct file_descriptor* descriptor;
4072 	struct vnode* vnode;
4073 
4074 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4075 	if (descriptor == NULL)
4076 		return B_FILE_ERROR;
4077 
4078 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4079 		count, 0, _numBytes);
4080 
4081 	put_fd(descriptor);
4082 	return status;
4083 }
4084 #endif
4085 
4086 
4087 extern "C" status_t
4088 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4089 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4090 	size_t* _bytes)
4091 {
4092 	struct file_descriptor* descriptor;
4093 	struct vnode* vnode;
4094 
4095 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4096 	if (descriptor == NULL)
4097 		return B_FILE_ERROR;
4098 
4099 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4100 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4101 		false);
4102 
4103 	put_fd(descriptor);
4104 	return status;
4105 }
4106 
4107 
4108 extern "C" status_t
4109 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4110 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4111 	size_t* _bytes)
4112 {
4113 	struct file_descriptor* descriptor;
4114 	struct vnode* vnode;
4115 
4116 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4117 	if (descriptor == NULL)
4118 		return B_FILE_ERROR;
4119 
4120 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4121 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4122 		true);
4123 
4124 	put_fd(descriptor);
4125 	return status;
4126 }
4127 
4128 
4129 extern "C" status_t
4130 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4131 {
4132 	// lookup mount -- the caller is required to make sure that the mount
4133 	// won't go away
4134 	ReadLocker locker(sMountLock);
4135 	struct fs_mount* mount = find_mount(mountID);
4136 	if (mount == NULL)
4137 		return B_BAD_VALUE;
4138 	locker.Unlock();
4139 
4140 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4141 }
4142 
4143 
4144 extern "C" status_t
4145 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4146 {
4147 	// lookup mount -- the caller is required to make sure that the mount
4148 	// won't go away
4149 	ReadLocker locker(sMountLock);
4150 	struct fs_mount* mount = find_mount(mountID);
4151 	if (mount == NULL)
4152 		return B_BAD_VALUE;
4153 	locker.Unlock();
4154 
4155 	return mount->entry_cache.Add(dirID, name, -1, true);
4156 }
4157 
4158 
4159 extern "C" status_t
4160 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4161 {
4162 	// lookup mount -- the caller is required to make sure that the mount
4163 	// won't go away
4164 	ReadLocker locker(sMountLock);
4165 	struct fs_mount* mount = find_mount(mountID);
4166 	if (mount == NULL)
4167 		return B_BAD_VALUE;
4168 	locker.Unlock();
4169 
4170 	return mount->entry_cache.Remove(dirID, name);
4171 }
4172 
4173 
4174 //	#pragma mark - private VFS API
4175 //	Functions the VFS exports for other parts of the kernel
4176 
4177 
4178 /*! Acquires another reference to the vnode that has to be released
4179 	by calling vfs_put_vnode().
4180 */
4181 void
4182 vfs_acquire_vnode(struct vnode* vnode)
4183 {
4184 	inc_vnode_ref_count(vnode);
4185 }
4186 
4187 
4188 /*! This is currently called from file_cache_create() only.
4189 	It's probably a temporary solution as long as devfs requires that
4190 	fs_read_pages()/fs_write_pages() are called with the standard
4191 	open cookie and not with a device cookie.
4192 	If that's done differently, remove this call; it has no other
4193 	purpose.
4194 */
4195 extern "C" status_t
4196 vfs_get_cookie_from_fd(int fd, void** _cookie)
4197 {
4198 	struct file_descriptor* descriptor;
4199 
4200 	descriptor = get_fd(get_current_io_context(true), fd);
4201 	if (descriptor == NULL)
4202 		return B_FILE_ERROR;
4203 
4204 	*_cookie = descriptor->cookie;
4205 	return B_OK;
4206 }
4207 
4208 
4209 extern "C" status_t
4210 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4211 {
4212 	*vnode = get_vnode_from_fd(fd, kernel);
4213 
4214 	if (*vnode == NULL)
4215 		return B_FILE_ERROR;
4216 
4217 	return B_NO_ERROR;
4218 }
4219 
4220 
4221 extern "C" status_t
4222 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4223 {
4224 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4225 		path, kernel));
4226 
4227 	KPath pathBuffer;
4228 	if (pathBuffer.InitCheck() != B_OK)
4229 		return B_NO_MEMORY;
4230 
4231 	char* buffer = pathBuffer.LockBuffer();
4232 	strlcpy(buffer, path, pathBuffer.BufferSize());
4233 
4234 	struct vnode* vnode;
4235 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4236 	if (status != B_OK)
4237 		return status;
4238 
4239 	*_vnode = vnode;
4240 	return B_OK;
4241 }
4242 
4243 
4244 extern "C" status_t
4245 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4246 {
4247 	struct vnode* vnode = NULL;
4248 
4249 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4250 	if (status != B_OK)
4251 		return status;
4252 
4253 	*_vnode = vnode;
4254 	return B_OK;
4255 }
4256 
4257 
4258 extern "C" status_t
4259 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4260 	const char* name, struct vnode** _vnode)
4261 {
4262 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4263 }
4264 
4265 
4266 extern "C" void
4267 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4268 {
4269 	*_mountID = vnode->device;
4270 	*_vnodeID = vnode->id;
4271 }
4272 
4273 
4274 /*!
4275 	Helper function abstracting the process of "converting" a given
4276 	vnode-pointer to a fs_vnode-pointer.
4277 	Currently only used in bindfs.
4278 */
4279 extern "C" fs_vnode*
4280 vfs_fsnode_for_vnode(struct vnode* vnode)
4281 {
4282 	return vnode;
4283 }
4284 
4285 
4286 /*!
4287 	Calls fs_open() on the given vnode and returns a new
4288 	file descriptor for it
4289 */
4290 int
4291 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4292 {
4293 	return open_vnode(vnode, openMode, kernel);
4294 }
4295 
4296 
4297 /*!	Looks up a vnode with the given mount and vnode ID.
4298 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4299 	to the node.
4300 	It's currently only be used by file_cache_create().
4301 */
4302 extern "C" status_t
4303 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4304 {
4305 	rw_lock_read_lock(&sVnodeLock);
4306 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4307 	rw_lock_read_unlock(&sVnodeLock);
4308 
4309 	if (vnode == NULL)
4310 		return B_ERROR;
4311 
4312 	*_vnode = vnode;
4313 	return B_OK;
4314 }
4315 
4316 
4317 extern "C" status_t
4318 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4319 	bool traverseLeafLink, bool kernel, void** _node)
4320 {
4321 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4322 		volume, path, kernel));
4323 
4324 	KPath pathBuffer;
4325 	if (pathBuffer.InitCheck() != B_OK)
4326 		return B_NO_MEMORY;
4327 
4328 	fs_mount* mount;
4329 	status_t status = get_mount(volume->id, &mount);
4330 	if (status != B_OK)
4331 		return status;
4332 
4333 	char* buffer = pathBuffer.LockBuffer();
4334 	strlcpy(buffer, path, pathBuffer.BufferSize());
4335 
4336 	struct vnode* vnode = mount->root_vnode;
4337 
4338 	if (buffer[0] == '/')
4339 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4340 	else {
4341 		inc_vnode_ref_count(vnode);
4342 			// vnode_path_to_vnode() releases a reference to the starting vnode
4343 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4344 			kernel, &vnode, NULL);
4345 	}
4346 
4347 	put_mount(mount);
4348 
4349 	if (status != B_OK)
4350 		return status;
4351 
4352 	if (vnode->device != volume->id) {
4353 		// wrong mount ID - must not gain access on foreign file system nodes
4354 		put_vnode(vnode);
4355 		return B_BAD_VALUE;
4356 	}
4357 
4358 	// Use get_vnode() to resolve the cookie for the right layer.
4359 	status = get_vnode(volume, vnode->id, _node);
4360 	put_vnode(vnode);
4361 
4362 	return status;
4363 }
4364 
4365 
4366 status_t
4367 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4368 	struct stat* stat, bool kernel)
4369 {
4370 	status_t status;
4371 
4372 	if (path != NULL) {
4373 		// path given: get the stat of the node referred to by (fd, path)
4374 		KPath pathBuffer(path);
4375 		if (pathBuffer.InitCheck() != B_OK)
4376 			return B_NO_MEMORY;
4377 
4378 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4379 			traverseLeafLink, stat, kernel);
4380 	} else {
4381 		// no path given: get the FD and use the FD operation
4382 		struct file_descriptor* descriptor
4383 			= get_fd(get_current_io_context(kernel), fd);
4384 		if (descriptor == NULL)
4385 			return B_FILE_ERROR;
4386 
4387 		if (descriptor->ops->fd_read_stat)
4388 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4389 		else
4390 			status = B_UNSUPPORTED;
4391 
4392 		put_fd(descriptor);
4393 	}
4394 
4395 	return status;
4396 }
4397 
4398 
4399 /*!	Finds the full path to the file that contains the module \a moduleName,
4400 	puts it into \a pathBuffer, and returns B_OK for success.
4401 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4402 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4403 	\a pathBuffer is clobbered in any case and must not be relied on if this
4404 	functions returns unsuccessfully.
4405 	\a basePath and \a pathBuffer must not point to the same space.
4406 */
4407 status_t
4408 vfs_get_module_path(const char* basePath, const char* moduleName,
4409 	char* pathBuffer, size_t bufferSize)
4410 {
4411 	struct vnode* dir;
4412 	struct vnode* file;
4413 	status_t status;
4414 	size_t length;
4415 	char* path;
4416 
4417 	if (bufferSize == 0
4418 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4419 		return B_BUFFER_OVERFLOW;
4420 
4421 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4422 	if (status != B_OK)
4423 		return status;
4424 
4425 	// the path buffer had been clobbered by the above call
4426 	length = strlcpy(pathBuffer, basePath, bufferSize);
4427 	if (pathBuffer[length - 1] != '/')
4428 		pathBuffer[length++] = '/';
4429 
4430 	path = pathBuffer + length;
4431 	bufferSize -= length;
4432 
4433 	while (moduleName) {
4434 		char* nextPath = strchr(moduleName, '/');
4435 		if (nextPath == NULL)
4436 			length = strlen(moduleName);
4437 		else {
4438 			length = nextPath - moduleName;
4439 			nextPath++;
4440 		}
4441 
4442 		if (length + 1 >= bufferSize) {
4443 			status = B_BUFFER_OVERFLOW;
4444 			goto err;
4445 		}
4446 
4447 		memcpy(path, moduleName, length);
4448 		path[length] = '\0';
4449 		moduleName = nextPath;
4450 
4451 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4452 		if (status != B_OK) {
4453 			// vnode_path_to_vnode() has already released the reference to dir
4454 			return status;
4455 		}
4456 
4457 		if (S_ISDIR(file->Type())) {
4458 			// goto the next directory
4459 			path[length] = '/';
4460 			path[length + 1] = '\0';
4461 			path += length + 1;
4462 			bufferSize -= length + 1;
4463 
4464 			dir = file;
4465 		} else if (S_ISREG(file->Type())) {
4466 			// it's a file so it should be what we've searched for
4467 			put_vnode(file);
4468 
4469 			return B_OK;
4470 		} else {
4471 			TRACE(("vfs_get_module_path(): something is strange here: "
4472 				"0x%08" B_PRIx32 "...\n", file->Type()));
4473 			status = B_ERROR;
4474 			dir = file;
4475 			goto err;
4476 		}
4477 	}
4478 
4479 	// if we got here, the moduleName just pointed to a directory, not to
4480 	// a real module - what should we do in this case?
4481 	status = B_ENTRY_NOT_FOUND;
4482 
4483 err:
4484 	put_vnode(dir);
4485 	return status;
4486 }
4487 
4488 
4489 /*!	\brief Normalizes a given path.
4490 
4491 	The path must refer to an existing or non-existing entry in an existing
4492 	directory, that is chopping off the leaf component the remaining path must
4493 	refer to an existing directory.
4494 
4495 	The returned will be canonical in that it will be absolute, will not
4496 	contain any "." or ".." components or duplicate occurrences of '/'s,
4497 	and none of the directory components will by symbolic links.
4498 
4499 	Any two paths referring to the same entry, will result in the same
4500 	normalized path (well, that is pretty much the definition of `normalized',
4501 	isn't it :-).
4502 
4503 	\param path The path to be normalized.
4504 	\param buffer The buffer into which the normalized path will be written.
4505 		   May be the same one as \a path.
4506 	\param bufferSize The size of \a buffer.
4507 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4508 	\param kernel \c true, if the IO context of the kernel shall be used,
4509 		   otherwise that of the team this thread belongs to. Only relevant,
4510 		   if the path is relative (to get the CWD).
4511 	\return \c B_OK if everything went fine, another error code otherwise.
4512 */
4513 status_t
4514 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4515 	bool traverseLink, bool kernel)
4516 {
4517 	if (!path || !buffer || bufferSize < 1)
4518 		return B_BAD_VALUE;
4519 
4520 	if (path != buffer) {
4521 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4522 			return B_BUFFER_OVERFLOW;
4523 	}
4524 
4525 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4526 }
4527 
4528 
4529 /*!	\brief Gets the parent of the passed in node.
4530 
4531 	Gets the parent of the passed in node, and correctly resolves covered
4532 	nodes.
4533 */
4534 extern "C" status_t
4535 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4536 {
4537 	return resolve_covered_parent(parent, device, node,
4538 		get_current_io_context(true));
4539 }
4540 
4541 
4542 /*!	\brief Creates a special node in the file system.
4543 
4544 	The caller gets a reference to the newly created node (which is passed
4545 	back through \a _createdVnode) and is responsible for releasing it.
4546 
4547 	\param path The path where to create the entry for the node. Can be \c NULL,
4548 		in which case the node is created without an entry in the root FS -- it
4549 		will automatically be deleted when the last reference has been released.
4550 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4551 		the target file system will just create the node with its standard
4552 		operations. Depending on the type of the node a subnode might be created
4553 		automatically, though.
4554 	\param mode The type and permissions for the node to be created.
4555 	\param flags Flags to be passed to the creating FS.
4556 	\param kernel \c true, if called in the kernel context (relevant only if
4557 		\a path is not \c NULL and not absolute).
4558 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4559 		file system creating the node, with the private data pointer and
4560 		operations for the super node. Can be \c NULL.
4561 	\param _createVnode Pointer to pre-allocated storage where to store the
4562 		pointer to the newly created node.
4563 	\return \c B_OK, if everything went fine, another error code otherwise.
4564 */
4565 status_t
4566 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4567 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4568 	struct vnode** _createdVnode)
4569 {
4570 	struct vnode* dirNode;
4571 	char _leaf[B_FILE_NAME_LENGTH];
4572 	char* leaf = NULL;
4573 
4574 	if (path) {
4575 		// We've got a path. Get the dir vnode and the leaf name.
4576 		KPath tmpPathBuffer;
4577 		if (tmpPathBuffer.InitCheck() != B_OK)
4578 			return B_NO_MEMORY;
4579 
4580 		char* tmpPath = tmpPathBuffer.LockBuffer();
4581 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4582 			return B_NAME_TOO_LONG;
4583 
4584 		// get the dir vnode and the leaf name
4585 		leaf = _leaf;
4586 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4587 		if (error != B_OK)
4588 			return error;
4589 	} else {
4590 		// No path. Create the node in the root FS.
4591 		dirNode = sRoot;
4592 		inc_vnode_ref_count(dirNode);
4593 	}
4594 
4595 	VNodePutter _(dirNode);
4596 
4597 	// check support for creating special nodes
4598 	if (!HAS_FS_CALL(dirNode, create_special_node))
4599 		return B_UNSUPPORTED;
4600 
4601 	// create the node
4602 	fs_vnode superVnode;
4603 	ino_t nodeID;
4604 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4605 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4606 	if (status != B_OK)
4607 		return status;
4608 
4609 	// lookup the node
4610 	rw_lock_read_lock(&sVnodeLock);
4611 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4612 	rw_lock_read_unlock(&sVnodeLock);
4613 
4614 	if (*_createdVnode == NULL) {
4615 		panic("vfs_create_special_node(): lookup of node failed");
4616 		return B_ERROR;
4617 	}
4618 
4619 	return B_OK;
4620 }
4621 
4622 
4623 extern "C" void
4624 vfs_put_vnode(struct vnode* vnode)
4625 {
4626 	put_vnode(vnode);
4627 }
4628 
4629 
4630 extern "C" status_t
4631 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4632 {
4633 	// Get current working directory from io context
4634 	struct io_context* context = get_current_io_context(false);
4635 	status_t status = B_OK;
4636 
4637 	mutex_lock(&context->io_mutex);
4638 
4639 	if (context->cwd != NULL) {
4640 		*_mountID = context->cwd->device;
4641 		*_vnodeID = context->cwd->id;
4642 	} else
4643 		status = B_ERROR;
4644 
4645 	mutex_unlock(&context->io_mutex);
4646 	return status;
4647 }
4648 
4649 
4650 status_t
4651 vfs_unmount(dev_t mountID, uint32 flags)
4652 {
4653 	return fs_unmount(NULL, mountID, flags, true);
4654 }
4655 
4656 
4657 extern "C" status_t
4658 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4659 {
4660 	struct vnode* vnode;
4661 
4662 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4663 	if (status != B_OK)
4664 		return status;
4665 
4666 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4667 	put_vnode(vnode);
4668 	return B_OK;
4669 }
4670 
4671 
4672 extern "C" void
4673 vfs_free_unused_vnodes(int32 level)
4674 {
4675 	vnode_low_resource_handler(NULL,
4676 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4677 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4678 		level);
4679 }
4680 
4681 
4682 extern "C" bool
4683 vfs_can_page(struct vnode* vnode, void* cookie)
4684 {
4685 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4686 
4687 	if (HAS_FS_CALL(vnode, can_page))
4688 		return FS_CALL(vnode, can_page, cookie);
4689 	return false;
4690 }
4691 
4692 
4693 extern "C" status_t
4694 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4695 	const generic_io_vec* vecs, size_t count, uint32 flags,
4696 	generic_size_t* _numBytes)
4697 {
4698 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4699 		vecs, pos));
4700 
4701 #if VFS_PAGES_IO_TRACING
4702 	generic_size_t bytesRequested = *_numBytes;
4703 #endif
4704 
4705 	IORequest request;
4706 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4707 	if (status == B_OK) {
4708 		status = vfs_vnode_io(vnode, cookie, &request);
4709 		if (status == B_OK)
4710 			status = request.Wait();
4711 		*_numBytes = request.TransferredBytes();
4712 	}
4713 
4714 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4715 		status, *_numBytes));
4716 
4717 	return status;
4718 }
4719 
4720 
4721 extern "C" status_t
4722 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4723 	const generic_io_vec* vecs, size_t count, uint32 flags,
4724 	generic_size_t* _numBytes)
4725 {
4726 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4727 		vecs, pos));
4728 
4729 #if VFS_PAGES_IO_TRACING
4730 	generic_size_t bytesRequested = *_numBytes;
4731 #endif
4732 
4733 	IORequest request;
4734 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4735 	if (status == B_OK) {
4736 		status = vfs_vnode_io(vnode, cookie, &request);
4737 		if (status == B_OK)
4738 			status = request.Wait();
4739 		*_numBytes = request.TransferredBytes();
4740 	}
4741 
4742 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4743 		status, *_numBytes));
4744 
4745 	return status;
4746 }
4747 
4748 
4749 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4750 	created if \a allocate is \c true.
4751 	In case it's successful, it will also grab a reference to the cache
4752 	it returns.
4753 */
4754 extern "C" status_t
4755 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4756 {
4757 	if (vnode->cache != NULL) {
4758 		vnode->cache->AcquireRef();
4759 		*_cache = vnode->cache;
4760 		return B_OK;
4761 	}
4762 
4763 	rw_lock_read_lock(&sVnodeLock);
4764 	vnode->Lock();
4765 
4766 	status_t status = B_OK;
4767 
4768 	// The cache could have been created in the meantime
4769 	if (vnode->cache == NULL) {
4770 		if (allocate) {
4771 			// TODO: actually the vnode needs to be busy already here, or
4772 			//	else this won't work...
4773 			bool wasBusy = vnode->IsBusy();
4774 			vnode->SetBusy(true);
4775 
4776 			vnode->Unlock();
4777 			rw_lock_read_unlock(&sVnodeLock);
4778 
4779 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4780 
4781 			rw_lock_read_lock(&sVnodeLock);
4782 			vnode->Lock();
4783 			vnode->SetBusy(wasBusy);
4784 		} else
4785 			status = B_BAD_VALUE;
4786 	}
4787 
4788 	vnode->Unlock();
4789 	rw_lock_read_unlock(&sVnodeLock);
4790 
4791 	if (status == B_OK) {
4792 		vnode->cache->AcquireRef();
4793 		*_cache = vnode->cache;
4794 	}
4795 
4796 	return status;
4797 }
4798 
4799 
4800 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4801 	their own.
4802 	In case it's successful, it will also grab a reference to the cache
4803 	it returns.
4804 */
4805 extern "C" status_t
4806 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4807 {
4808 	rw_lock_read_lock(&sVnodeLock);
4809 	vnode->Lock();
4810 
4811 	status_t status = B_OK;
4812 	if (vnode->cache != NULL) {
4813 		status = B_NOT_ALLOWED;
4814 	} else {
4815 		vnode->cache = _cache;
4816 		_cache->AcquireRef();
4817 	}
4818 
4819 	vnode->Unlock();
4820 	rw_lock_read_unlock(&sVnodeLock);
4821 	return status;
4822 }
4823 
4824 
4825 status_t
4826 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4827 	file_io_vec* vecs, size_t* _count)
4828 {
4829 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4830 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4831 
4832 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4833 }
4834 
4835 
4836 status_t
4837 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4838 {
4839 	status_t status = FS_CALL(vnode, read_stat, stat);
4840 
4841 	// fill in the st_dev and st_ino fields
4842 	if (status == B_OK) {
4843 		stat->st_dev = vnode->device;
4844 		stat->st_ino = vnode->id;
4845 		// the rdev field must stay unset for non-special files
4846 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4847 			stat->st_rdev = -1;
4848 	}
4849 
4850 	return status;
4851 }
4852 
4853 
4854 status_t
4855 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4856 {
4857 	struct vnode* vnode;
4858 	status_t status = get_vnode(device, inode, &vnode, true, false);
4859 	if (status != B_OK)
4860 		return status;
4861 
4862 	status = vfs_stat_vnode(vnode, stat);
4863 
4864 	put_vnode(vnode);
4865 	return status;
4866 }
4867 
4868 
4869 status_t
4870 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4871 {
4872 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4873 }
4874 
4875 
4876 status_t
4877 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4878 	bool kernel, char* path, size_t pathLength)
4879 {
4880 	struct vnode* vnode;
4881 	status_t status;
4882 
4883 	// filter invalid leaf names
4884 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4885 		return B_BAD_VALUE;
4886 
4887 	// get the vnode matching the dir's node_ref
4888 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4889 		// special cases "." and "..": we can directly get the vnode of the
4890 		// referenced directory
4891 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4892 		leaf = NULL;
4893 	} else
4894 		status = get_vnode(device, inode, &vnode, true, false);
4895 	if (status != B_OK)
4896 		return status;
4897 
4898 	// get the directory path
4899 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4900 	put_vnode(vnode);
4901 		// we don't need the vnode anymore
4902 	if (status != B_OK)
4903 		return status;
4904 
4905 	// append the leaf name
4906 	if (leaf) {
4907 		// insert a directory separator if this is not the file system root
4908 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4909 				>= pathLength)
4910 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4911 			return B_NAME_TOO_LONG;
4912 		}
4913 	}
4914 
4915 	return B_OK;
4916 }
4917 
4918 
4919 /*!	If the given descriptor locked its vnode, that lock will be released. */
4920 void
4921 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4922 {
4923 	struct vnode* vnode = fd_vnode(descriptor);
4924 
4925 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4926 		vnode->mandatory_locked_by = NULL;
4927 }
4928 
4929 
4930 /*!	Releases any POSIX locks on the file descriptor. */
4931 status_t
4932 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4933 {
4934 	struct vnode* vnode = descriptor->u.vnode;
4935 	if (vnode == NULL)
4936 		return B_OK;
4937 
4938 	if (HAS_FS_CALL(vnode, release_lock))
4939 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4940 
4941 	return release_advisory_lock(vnode, context, NULL, NULL);
4942 }
4943 
4944 
4945 /*!	Closes all file descriptors of the specified I/O context that
4946 	have the O_CLOEXEC flag set.
4947 */
4948 void
4949 vfs_exec_io_context(io_context* context)
4950 {
4951 	uint32 i;
4952 
4953 	for (i = 0; i < context->table_size; i++) {
4954 		mutex_lock(&context->io_mutex);
4955 
4956 		struct file_descriptor* descriptor = context->fds[i];
4957 		bool remove = false;
4958 
4959 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4960 			context->fds[i] = NULL;
4961 			context->num_used_fds--;
4962 
4963 			remove = true;
4964 		}
4965 
4966 		mutex_unlock(&context->io_mutex);
4967 
4968 		if (remove) {
4969 			close_fd(context, descriptor);
4970 			put_fd(descriptor);
4971 		}
4972 	}
4973 }
4974 
4975 
4976 /*! Sets up a new io_control structure, and inherits the properties
4977 	of the parent io_control if it is given.
4978 */
4979 io_context*
4980 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4981 {
4982 	io_context* context = (io_context*)malloc(sizeof(io_context));
4983 	if (context == NULL)
4984 		return NULL;
4985 
4986 	TIOC(NewIOContext(context, parentContext));
4987 
4988 	memset(context, 0, sizeof(io_context));
4989 	context->ref_count = 1;
4990 
4991 	MutexLocker parentLocker;
4992 
4993 	size_t tableSize;
4994 	if (parentContext != NULL) {
4995 		parentLocker.SetTo(parentContext->io_mutex, false);
4996 		tableSize = parentContext->table_size;
4997 	} else
4998 		tableSize = DEFAULT_FD_TABLE_SIZE;
4999 
5000 	// allocate space for FDs and their close-on-exec flag
5001 	context->fds = (file_descriptor**)malloc(
5002 		sizeof(struct file_descriptor*) * tableSize
5003 		+ sizeof(struct select_info**) * tableSize
5004 		+ (tableSize + 7) / 8);
5005 	if (context->fds == NULL) {
5006 		free(context);
5007 		return NULL;
5008 	}
5009 
5010 	context->select_infos = (select_info**)(context->fds + tableSize);
5011 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
5012 
5013 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
5014 		+ sizeof(struct select_info**) * tableSize
5015 		+ (tableSize + 7) / 8);
5016 
5017 	mutex_init(&context->io_mutex, "I/O context");
5018 
5019 	// Copy all parent file descriptors
5020 
5021 	if (parentContext != NULL) {
5022 		size_t i;
5023 
5024 		mutex_lock(&sIOContextRootLock);
5025 		context->root = parentContext->root;
5026 		if (context->root)
5027 			inc_vnode_ref_count(context->root);
5028 		mutex_unlock(&sIOContextRootLock);
5029 
5030 		context->cwd = parentContext->cwd;
5031 		if (context->cwd)
5032 			inc_vnode_ref_count(context->cwd);
5033 
5034 		if (parentContext->inherit_fds) {
5035 			for (i = 0; i < tableSize; i++) {
5036 				struct file_descriptor* descriptor = parentContext->fds[i];
5037 
5038 				if (descriptor != NULL
5039 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
5040 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5041 					if (closeOnExec && purgeCloseOnExec)
5042 						continue;
5043 
5044 					TFD(InheritFD(context, i, descriptor, parentContext));
5045 
5046 					context->fds[i] = descriptor;
5047 					context->num_used_fds++;
5048 					atomic_add(&descriptor->ref_count, 1);
5049 					atomic_add(&descriptor->open_count, 1);
5050 
5051 					if (closeOnExec)
5052 						fd_set_close_on_exec(context, i, true);
5053 				}
5054 			}
5055 		}
5056 
5057 		parentLocker.Unlock();
5058 	} else {
5059 		context->root = sRoot;
5060 		context->cwd = sRoot;
5061 
5062 		if (context->root)
5063 			inc_vnode_ref_count(context->root);
5064 
5065 		if (context->cwd)
5066 			inc_vnode_ref_count(context->cwd);
5067 	}
5068 
5069 	context->table_size = tableSize;
5070 	context->inherit_fds = parentContext != NULL;
5071 
5072 	list_init(&context->node_monitors);
5073 	context->max_monitors = DEFAULT_NODE_MONITORS;
5074 
5075 	return context;
5076 }
5077 
5078 
5079 void
5080 vfs_get_io_context(io_context* context)
5081 {
5082 	atomic_add(&context->ref_count, 1);
5083 }
5084 
5085 
5086 void
5087 vfs_put_io_context(io_context* context)
5088 {
5089 	if (atomic_add(&context->ref_count, -1) == 1)
5090 		free_io_context(context);
5091 }
5092 
5093 
5094 status_t
5095 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5096 {
5097 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5098 		return B_BAD_VALUE;
5099 
5100 	TIOC(ResizeIOContext(context, newSize));
5101 
5102 	MutexLocker _(context->io_mutex);
5103 
5104 	uint32 oldSize = context->table_size;
5105 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5106 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5107 
5108 	// If the tables shrink, make sure none of the fds being dropped are in use.
5109 	if (newSize < oldSize) {
5110 		for (uint32 i = oldSize; i-- > newSize;) {
5111 			if (context->fds[i])
5112 				return B_BUSY;
5113 		}
5114 	}
5115 
5116 	// store pointers to the old tables
5117 	file_descriptor** oldFDs = context->fds;
5118 	select_info** oldSelectInfos = context->select_infos;
5119 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5120 
5121 	// allocate new tables
5122 	file_descriptor** newFDs = (file_descriptor**)malloc(
5123 		sizeof(struct file_descriptor*) * newSize
5124 		+ sizeof(struct select_infos**) * newSize
5125 		+ newCloseOnExitBitmapSize);
5126 	if (newFDs == NULL)
5127 		return B_NO_MEMORY;
5128 
5129 	context->fds = newFDs;
5130 	context->select_infos = (select_info**)(context->fds + newSize);
5131 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5132 	context->table_size = newSize;
5133 
5134 	// copy entries from old tables
5135 	uint32 toCopy = min_c(oldSize, newSize);
5136 
5137 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5138 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5139 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5140 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5141 
5142 	// clear additional entries, if the tables grow
5143 	if (newSize > oldSize) {
5144 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5145 		memset(context->select_infos + oldSize, 0,
5146 			sizeof(void*) * (newSize - oldSize));
5147 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5148 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5149 	}
5150 
5151 	free(oldFDs);
5152 
5153 	return B_OK;
5154 }
5155 
5156 
5157 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5158 
5159 	Given an arbitrary vnode (identified by mount and node ID), the function
5160 	checks, whether the vnode is covered by another vnode. If it is, the
5161 	function returns the mount and node ID of the covering vnode. Otherwise
5162 	it simply returns the supplied mount and node ID.
5163 
5164 	In case of error (e.g. the supplied node could not be found) the variables
5165 	for storing the resolved mount and node ID remain untouched and an error
5166 	code is returned.
5167 
5168 	\param mountID The mount ID of the vnode in question.
5169 	\param nodeID The node ID of the vnode in question.
5170 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5171 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5172 	\return
5173 	- \c B_OK, if everything went fine,
5174 	- another error code, if something went wrong.
5175 */
5176 status_t
5177 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5178 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5179 {
5180 	// get the node
5181 	struct vnode* node;
5182 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5183 	if (error != B_OK)
5184 		return error;
5185 
5186 	// resolve the node
5187 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5188 		put_vnode(node);
5189 		node = coveringNode;
5190 	}
5191 
5192 	// set the return values
5193 	*resolvedMountID = node->device;
5194 	*resolvedNodeID = node->id;
5195 
5196 	put_vnode(node);
5197 
5198 	return B_OK;
5199 }
5200 
5201 
5202 status_t
5203 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5204 	ino_t* _mountPointNodeID)
5205 {
5206 	ReadLocker nodeLocker(sVnodeLock);
5207 	ReadLocker mountLocker(sMountLock);
5208 
5209 	struct fs_mount* mount = find_mount(mountID);
5210 	if (mount == NULL)
5211 		return B_BAD_VALUE;
5212 
5213 	Vnode* mountPoint = mount->covers_vnode;
5214 
5215 	*_mountPointMountID = mountPoint->device;
5216 	*_mountPointNodeID = mountPoint->id;
5217 
5218 	return B_OK;
5219 }
5220 
5221 
5222 status_t
5223 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5224 	ino_t coveredNodeID)
5225 {
5226 	// get the vnodes
5227 	Vnode* vnode;
5228 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5229 	if (error != B_OK)
5230 		return B_BAD_VALUE;
5231 	VNodePutter vnodePutter(vnode);
5232 
5233 	Vnode* coveredVnode;
5234 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5235 		false);
5236 	if (error != B_OK)
5237 		return B_BAD_VALUE;
5238 	VNodePutter coveredVnodePutter(coveredVnode);
5239 
5240 	// establish the covered/covering links
5241 	WriteLocker locker(sVnodeLock);
5242 
5243 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5244 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5245 		return B_BUSY;
5246 	}
5247 
5248 	vnode->covers = coveredVnode;
5249 	vnode->SetCovering(true);
5250 
5251 	coveredVnode->covered_by = vnode;
5252 	coveredVnode->SetCovered(true);
5253 
5254 	// the vnodes do now reference each other
5255 	inc_vnode_ref_count(vnode);
5256 	inc_vnode_ref_count(coveredVnode);
5257 
5258 	return B_OK;
5259 }
5260 
5261 
5262 int
5263 vfs_getrlimit(int resource, struct rlimit* rlp)
5264 {
5265 	if (!rlp)
5266 		return B_BAD_ADDRESS;
5267 
5268 	switch (resource) {
5269 		case RLIMIT_NOFILE:
5270 		{
5271 			struct io_context* context = get_current_io_context(false);
5272 			MutexLocker _(context->io_mutex);
5273 
5274 			rlp->rlim_cur = context->table_size;
5275 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5276 			return 0;
5277 		}
5278 
5279 		case RLIMIT_NOVMON:
5280 		{
5281 			struct io_context* context = get_current_io_context(false);
5282 			MutexLocker _(context->io_mutex);
5283 
5284 			rlp->rlim_cur = context->max_monitors;
5285 			rlp->rlim_max = MAX_NODE_MONITORS;
5286 			return 0;
5287 		}
5288 
5289 		default:
5290 			return B_BAD_VALUE;
5291 	}
5292 }
5293 
5294 
5295 int
5296 vfs_setrlimit(int resource, const struct rlimit* rlp)
5297 {
5298 	if (!rlp)
5299 		return B_BAD_ADDRESS;
5300 
5301 	switch (resource) {
5302 		case RLIMIT_NOFILE:
5303 			/* TODO: check getuid() */
5304 			if (rlp->rlim_max != RLIM_SAVED_MAX
5305 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5306 				return B_NOT_ALLOWED;
5307 
5308 			return vfs_resize_fd_table(get_current_io_context(false),
5309 				rlp->rlim_cur);
5310 
5311 		case RLIMIT_NOVMON:
5312 			/* TODO: check getuid() */
5313 			if (rlp->rlim_max != RLIM_SAVED_MAX
5314 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5315 				return B_NOT_ALLOWED;
5316 
5317 			return resize_monitor_table(get_current_io_context(false),
5318 				rlp->rlim_cur);
5319 
5320 		default:
5321 			return B_BAD_VALUE;
5322 	}
5323 }
5324 
5325 
5326 status_t
5327 vfs_init(kernel_args* args)
5328 {
5329 	vnode::StaticInit();
5330 
5331 	sVnodeTable = new(std::nothrow) VnodeTable();
5332 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5333 		panic("vfs_init: error creating vnode hash table\n");
5334 
5335 	struct vnode dummy_vnode;
5336 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5337 
5338 	struct fs_mount dummyMount;
5339 	sMountsTable = new(std::nothrow) MountTable();
5340 	if (sMountsTable == NULL
5341 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5342 		panic("vfs_init: error creating mounts hash table\n");
5343 
5344 	sPathNameCache = create_object_cache("vfs path names",
5345 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5346 	if (sPathNameCache == NULL)
5347 		panic("vfs_init: error creating path name object_cache\n");
5348 
5349 	sVnodeCache = create_object_cache("vfs vnodes",
5350 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5351 	if (sVnodeCache == NULL)
5352 		panic("vfs_init: error creating vnode object_cache\n");
5353 
5354 	sFileDescriptorCache = create_object_cache("vfs fds",
5355 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5356 	if (sFileDescriptorCache == NULL)
5357 		panic("vfs_init: error creating file descriptor object_cache\n");
5358 
5359 	node_monitor_init();
5360 
5361 	sRoot = NULL;
5362 
5363 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5364 
5365 	if (block_cache_init() != B_OK)
5366 		return B_ERROR;
5367 
5368 #ifdef ADD_DEBUGGER_COMMANDS
5369 	// add some debugger commands
5370 	add_debugger_command_etc("vnode", &dump_vnode,
5371 		"Print info about the specified vnode",
5372 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5373 		"Prints information about the vnode specified by address <vnode> or\n"
5374 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5375 		"constructed and printed. It might not be possible to construct a\n"
5376 		"complete path, though.\n",
5377 		0);
5378 	add_debugger_command("vnodes", &dump_vnodes,
5379 		"list all vnodes (from the specified device)");
5380 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5381 		"list all vnode caches");
5382 	add_debugger_command("mount", &dump_mount,
5383 		"info about the specified fs_mount");
5384 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5385 	add_debugger_command("io_context", &dump_io_context,
5386 		"info about the I/O context");
5387 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5388 		"info about vnode usage");
5389 #endif
5390 
5391 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5392 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5393 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5394 		0);
5395 
5396 	fifo_init();
5397 	file_map_init();
5398 
5399 	return file_cache_init();
5400 }
5401 
5402 
5403 //	#pragma mark - fd_ops implementations
5404 
5405 
5406 /*!
5407 	Calls fs_open() on the given vnode and returns a new
5408 	file descriptor for it
5409 */
5410 static int
5411 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5412 {
5413 	void* cookie;
5414 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5415 	if (status != B_OK)
5416 		return status;
5417 
5418 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5419 	if (fd < 0) {
5420 		FS_CALL(vnode, close, cookie);
5421 		FS_CALL(vnode, free_cookie, cookie);
5422 	}
5423 	return fd;
5424 }
5425 
5426 
5427 /*!
5428 	Calls fs_open() on the given vnode and returns a new
5429 	file descriptor for it
5430 */
5431 static int
5432 create_vnode(struct vnode* directory, const char* name, int openMode,
5433 	int perms, bool kernel)
5434 {
5435 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5436 	status_t status = B_ERROR;
5437 	struct vnode* vnode;
5438 	void* cookie;
5439 	ino_t newID;
5440 
5441 	// This is somewhat tricky: If the entry already exists, the FS responsible
5442 	// for the directory might not necessarily also be the one responsible for
5443 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5444 	// we can actually never call the create() hook without O_EXCL. Instead we
5445 	// try to look the entry up first. If it already exists, we just open the
5446 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5447 	// introduces a race condition, since someone else might have created the
5448 	// entry in the meantime. We hope the respective FS returns the correct
5449 	// error code and retry (up to 3 times) again.
5450 
5451 	for (int i = 0; i < 3 && status != B_OK; i++) {
5452 		// look the node up
5453 		status = lookup_dir_entry(directory, name, &vnode);
5454 		if (status == B_OK) {
5455 			VNodePutter putter(vnode);
5456 
5457 			if ((openMode & O_EXCL) != 0)
5458 				return B_FILE_EXISTS;
5459 
5460 			// If the node is a symlink, we have to follow it, unless
5461 			// O_NOTRAVERSE is set.
5462 			if (S_ISLNK(vnode->Type()) && traverse) {
5463 				putter.Put();
5464 				char clonedName[B_FILE_NAME_LENGTH + 1];
5465 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5466 						>= B_FILE_NAME_LENGTH) {
5467 					return B_NAME_TOO_LONG;
5468 				}
5469 
5470 				inc_vnode_ref_count(directory);
5471 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5472 					kernel, &vnode, NULL);
5473 				if (status != B_OK)
5474 					return status;
5475 
5476 				putter.SetTo(vnode);
5477 			}
5478 
5479 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5480 				return B_LINK_LIMIT;
5481 
5482 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5483 			// on success keep the vnode reference for the FD
5484 			if (fd >= 0)
5485 				putter.Detach();
5486 
5487 			return fd;
5488 		}
5489 
5490 		// it doesn't exist yet -- try to create it
5491 
5492 		if (!HAS_FS_CALL(directory, create))
5493 			return B_READ_ONLY_DEVICE;
5494 
5495 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5496 			&cookie, &newID);
5497 		if (status != B_OK
5498 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5499 			return status;
5500 		}
5501 	}
5502 
5503 	if (status != B_OK)
5504 		return status;
5505 
5506 	// the node has been created successfully
5507 
5508 	rw_lock_read_lock(&sVnodeLock);
5509 	vnode = lookup_vnode(directory->device, newID);
5510 	rw_lock_read_unlock(&sVnodeLock);
5511 
5512 	if (vnode == NULL) {
5513 		panic("vfs: fs_create() returned success but there is no vnode, "
5514 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5515 		return B_BAD_VALUE;
5516 	}
5517 
5518 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5519 	if (fd >= 0)
5520 		return fd;
5521 
5522 	status = fd;
5523 
5524 	// something went wrong, clean up
5525 
5526 	FS_CALL(vnode, close, cookie);
5527 	FS_CALL(vnode, free_cookie, cookie);
5528 	put_vnode(vnode);
5529 
5530 	FS_CALL(directory, unlink, name);
5531 
5532 	return status;
5533 }
5534 
5535 
5536 /*! Calls fs open_dir() on the given vnode and returns a new
5537 	file descriptor for it
5538 */
5539 static int
5540 open_dir_vnode(struct vnode* vnode, bool kernel)
5541 {
5542 	if (!HAS_FS_CALL(vnode, open_dir))
5543 		return B_UNSUPPORTED;
5544 
5545 	void* cookie;
5546 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5547 	if (status != B_OK)
5548 		return status;
5549 
5550 	// directory is opened, create a fd
5551 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5552 	if (status >= 0)
5553 		return status;
5554 
5555 	FS_CALL(vnode, close_dir, cookie);
5556 	FS_CALL(vnode, free_dir_cookie, cookie);
5557 
5558 	return status;
5559 }
5560 
5561 
5562 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5563 	file descriptor for it.
5564 	Used by attr_dir_open(), and attr_dir_open_fd().
5565 */
5566 static int
5567 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5568 {
5569 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5570 		return B_UNSUPPORTED;
5571 
5572 	void* cookie;
5573 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5574 	if (status != B_OK)
5575 		return status;
5576 
5577 	// directory is opened, create a fd
5578 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5579 		kernel);
5580 	if (status >= 0)
5581 		return status;
5582 
5583 	FS_CALL(vnode, close_attr_dir, cookie);
5584 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5585 
5586 	return status;
5587 }
5588 
5589 
5590 static int
5591 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5592 	int openMode, int perms, bool kernel)
5593 {
5594 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5595 		"kernel %d\n", name, openMode, perms, kernel));
5596 
5597 	// get directory to put the new file in
5598 	struct vnode* directory;
5599 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5600 	if (status != B_OK)
5601 		return status;
5602 
5603 	status = create_vnode(directory, name, openMode, perms, kernel);
5604 	put_vnode(directory);
5605 
5606 	return status;
5607 }
5608 
5609 
5610 static int
5611 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5612 {
5613 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5614 		openMode, perms, kernel));
5615 
5616 	// get directory to put the new file in
5617 	char name[B_FILE_NAME_LENGTH];
5618 	struct vnode* directory;
5619 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5620 		kernel);
5621 	if (status < 0)
5622 		return status;
5623 
5624 	status = create_vnode(directory, name, openMode, perms, kernel);
5625 
5626 	put_vnode(directory);
5627 	return status;
5628 }
5629 
5630 
5631 static int
5632 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5633 	int openMode, bool kernel)
5634 {
5635 	if (name == NULL || *name == '\0')
5636 		return B_BAD_VALUE;
5637 
5638 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5639 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5640 
5641 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5642 
5643 	// get the vnode matching the entry_ref
5644 	struct vnode* vnode;
5645 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5646 		kernel, &vnode);
5647 	if (status != B_OK)
5648 		return status;
5649 
5650 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5651 		put_vnode(vnode);
5652 		return B_LINK_LIMIT;
5653 	}
5654 
5655 	int newFD = open_vnode(vnode, openMode, kernel);
5656 	if (newFD >= 0) {
5657 		// The vnode reference has been transferred to the FD
5658 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5659 			directoryID, vnode->id, name);
5660 	} else
5661 		put_vnode(vnode);
5662 
5663 	return newFD;
5664 }
5665 
5666 
5667 static int
5668 file_open(int fd, char* path, int openMode, bool kernel)
5669 {
5670 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5671 
5672 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5673 		fd, path, openMode, kernel));
5674 
5675 	// get the vnode matching the vnode + path combination
5676 	struct vnode* vnode;
5677 	ino_t parentID;
5678 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5679 		&parentID, kernel);
5680 	if (status != B_OK)
5681 		return status;
5682 
5683 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5684 		put_vnode(vnode);
5685 		return B_LINK_LIMIT;
5686 	}
5687 
5688 	// open the vnode
5689 	int newFD = open_vnode(vnode, openMode, kernel);
5690 	if (newFD >= 0) {
5691 		// The vnode reference has been transferred to the FD
5692 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5693 			vnode->device, parentID, vnode->id, NULL);
5694 	} else
5695 		put_vnode(vnode);
5696 
5697 	return newFD;
5698 }
5699 
5700 
5701 static status_t
5702 file_close(struct file_descriptor* descriptor)
5703 {
5704 	struct vnode* vnode = descriptor->u.vnode;
5705 	status_t status = B_OK;
5706 
5707 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5708 
5709 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5710 		vnode->id);
5711 	if (HAS_FS_CALL(vnode, close)) {
5712 		status = FS_CALL(vnode, close, descriptor->cookie);
5713 	}
5714 
5715 	if (status == B_OK) {
5716 		// remove all outstanding locks for this team
5717 		if (HAS_FS_CALL(vnode, release_lock))
5718 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5719 		else
5720 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5721 	}
5722 	return status;
5723 }
5724 
5725 
5726 static void
5727 file_free_fd(struct file_descriptor* descriptor)
5728 {
5729 	struct vnode* vnode = descriptor->u.vnode;
5730 
5731 	if (vnode != NULL) {
5732 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5733 		put_vnode(vnode);
5734 	}
5735 }
5736 
5737 
5738 static status_t
5739 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5740 	size_t* length)
5741 {
5742 	struct vnode* vnode = descriptor->u.vnode;
5743 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5744 		pos, length, *length));
5745 
5746 	if (S_ISDIR(vnode->Type()))
5747 		return B_IS_A_DIRECTORY;
5748 
5749 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5750 }
5751 
5752 
5753 static status_t
5754 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5755 	size_t* length)
5756 {
5757 	struct vnode* vnode = descriptor->u.vnode;
5758 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5759 		length));
5760 
5761 	if (S_ISDIR(vnode->Type()))
5762 		return B_IS_A_DIRECTORY;
5763 	if (!HAS_FS_CALL(vnode, write))
5764 		return B_READ_ONLY_DEVICE;
5765 
5766 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5767 }
5768 
5769 
5770 static off_t
5771 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5772 {
5773 	struct vnode* vnode = descriptor->u.vnode;
5774 	off_t offset;
5775 	bool isDevice = false;
5776 
5777 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5778 		seekType));
5779 
5780 	// some kinds of files are not seekable
5781 	switch (vnode->Type() & S_IFMT) {
5782 		case S_IFIFO:
5783 		case S_IFSOCK:
5784 			return ESPIPE;
5785 
5786 		// drivers publish block devices as chr, so pick both
5787 		case S_IFBLK:
5788 		case S_IFCHR:
5789 			isDevice = true;
5790 			break;
5791 		// The Open Group Base Specs don't mention any file types besides pipes,
5792 		// fifos, and sockets specially, so we allow seeking them.
5793 		case S_IFREG:
5794 		case S_IFDIR:
5795 		case S_IFLNK:
5796 			break;
5797 	}
5798 
5799 	switch (seekType) {
5800 		case SEEK_SET:
5801 			offset = 0;
5802 			break;
5803 		case SEEK_CUR:
5804 			offset = descriptor->pos;
5805 			break;
5806 		case SEEK_END:
5807 		{
5808 			// stat() the node
5809 			if (!HAS_FS_CALL(vnode, read_stat))
5810 				return B_UNSUPPORTED;
5811 
5812 			struct stat stat;
5813 			status_t status = FS_CALL(vnode, read_stat, &stat);
5814 			if (status != B_OK)
5815 				return status;
5816 
5817 			offset = stat.st_size;
5818 
5819 			if (offset == 0 && isDevice) {
5820 				// stat() on regular drivers doesn't report size
5821 				device_geometry geometry;
5822 
5823 				if (HAS_FS_CALL(vnode, ioctl)) {
5824 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5825 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5826 					if (status == B_OK)
5827 						offset = (off_t)geometry.bytes_per_sector
5828 							* geometry.sectors_per_track
5829 							* geometry.cylinder_count
5830 							* geometry.head_count;
5831 				}
5832 			}
5833 
5834 			break;
5835 		}
5836 		case SEEK_DATA:
5837 		case SEEK_HOLE:
5838 		{
5839 			status_t status = B_BAD_VALUE;
5840 			if (HAS_FS_CALL(vnode, ioctl)) {
5841 				offset = pos;
5842 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5843 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5844 					&offset, sizeof(offset));
5845 				if (status == B_OK) {
5846 					if (offset > pos)
5847 						offset -= pos;
5848 					break;
5849 				}
5850 			}
5851 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5852 				return status;
5853 
5854 			// basic implementation with stat() the node
5855 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5856 				return B_BAD_VALUE;
5857 
5858 			struct stat stat;
5859 			status = FS_CALL(vnode, read_stat, &stat);
5860 			if (status != B_OK)
5861 				return status;
5862 
5863 			off_t end = stat.st_size;
5864 			if (pos >= end)
5865 				return ENXIO;
5866 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5867 			break;
5868 		}
5869 		default:
5870 			return B_BAD_VALUE;
5871 	}
5872 
5873 	// assumes off_t is 64 bits wide
5874 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5875 		return B_BUFFER_OVERFLOW;
5876 
5877 	pos += offset;
5878 	if (pos < 0)
5879 		return B_BAD_VALUE;
5880 
5881 	return descriptor->pos = pos;
5882 }
5883 
5884 
5885 static status_t
5886 file_select(struct file_descriptor* descriptor, uint8 event,
5887 	struct selectsync* sync)
5888 {
5889 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5890 
5891 	struct vnode* vnode = descriptor->u.vnode;
5892 
5893 	// If the FS has no select() hook, notify select() now.
5894 	if (!HAS_FS_CALL(vnode, select)) {
5895 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5896 			return notify_select_event(sync, event);
5897 		else
5898 			return B_OK;
5899 	}
5900 
5901 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5902 }
5903 
5904 
5905 static status_t
5906 file_deselect(struct file_descriptor* descriptor, uint8 event,
5907 	struct selectsync* sync)
5908 {
5909 	struct vnode* vnode = descriptor->u.vnode;
5910 
5911 	if (!HAS_FS_CALL(vnode, deselect))
5912 		return B_OK;
5913 
5914 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5915 }
5916 
5917 
5918 static status_t
5919 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5920 	bool kernel)
5921 {
5922 	struct vnode* vnode;
5923 	status_t status;
5924 
5925 	if (name == NULL || *name == '\0')
5926 		return B_BAD_VALUE;
5927 
5928 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5929 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5930 
5931 	status = get_vnode(mountID, parentID, &vnode, true, false);
5932 	if (status != B_OK)
5933 		return status;
5934 
5935 	if (HAS_FS_CALL(vnode, create_dir))
5936 		status = FS_CALL(vnode, create_dir, name, perms);
5937 	else
5938 		status = B_READ_ONLY_DEVICE;
5939 
5940 	put_vnode(vnode);
5941 	return status;
5942 }
5943 
5944 
5945 static status_t
5946 dir_create(int fd, char* path, int perms, bool kernel)
5947 {
5948 	char filename[B_FILE_NAME_LENGTH];
5949 	struct vnode* vnode;
5950 	status_t status;
5951 
5952 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5953 		kernel));
5954 
5955 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5956 	if (status < 0)
5957 		return status;
5958 
5959 	if (HAS_FS_CALL(vnode, create_dir)) {
5960 		status = FS_CALL(vnode, create_dir, filename, perms);
5961 	} else
5962 		status = B_READ_ONLY_DEVICE;
5963 
5964 	put_vnode(vnode);
5965 	return status;
5966 }
5967 
5968 
5969 static int
5970 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5971 {
5972 	FUNCTION(("dir_open_entry_ref()\n"));
5973 
5974 	if (name && name[0] == '\0')
5975 		return B_BAD_VALUE;
5976 
5977 	// get the vnode matching the entry_ref/node_ref
5978 	struct vnode* vnode;
5979 	status_t status;
5980 	if (name) {
5981 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5982 			&vnode);
5983 	} else
5984 		status = get_vnode(mountID, parentID, &vnode, true, false);
5985 	if (status != B_OK)
5986 		return status;
5987 
5988 	int newFD = open_dir_vnode(vnode, kernel);
5989 	if (newFD >= 0) {
5990 		// The vnode reference has been transferred to the FD
5991 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5992 			vnode->id, name);
5993 	} else
5994 		put_vnode(vnode);
5995 
5996 	return newFD;
5997 }
5998 
5999 
6000 static int
6001 dir_open(int fd, char* path, bool kernel)
6002 {
6003 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
6004 		kernel));
6005 
6006 	// get the vnode matching the vnode + path combination
6007 	struct vnode* vnode = NULL;
6008 	ino_t parentID;
6009 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
6010 		kernel);
6011 	if (status != B_OK)
6012 		return status;
6013 
6014 	// open the dir
6015 	int newFD = open_dir_vnode(vnode, kernel);
6016 	if (newFD >= 0) {
6017 		// The vnode reference has been transferred to the FD
6018 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6019 			parentID, vnode->id, NULL);
6020 	} else
6021 		put_vnode(vnode);
6022 
6023 	return newFD;
6024 }
6025 
6026 
6027 static status_t
6028 dir_close(struct file_descriptor* descriptor)
6029 {
6030 	struct vnode* vnode = descriptor->u.vnode;
6031 
6032 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6033 
6034 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6035 		vnode->id);
6036 	if (HAS_FS_CALL(vnode, close_dir))
6037 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6038 
6039 	return B_OK;
6040 }
6041 
6042 
6043 static void
6044 dir_free_fd(struct file_descriptor* descriptor)
6045 {
6046 	struct vnode* vnode = descriptor->u.vnode;
6047 
6048 	if (vnode != NULL) {
6049 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6050 		put_vnode(vnode);
6051 	}
6052 }
6053 
6054 
6055 static status_t
6056 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6057 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6058 {
6059 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6060 		bufferSize, _count);
6061 }
6062 
6063 
6064 static status_t
6065 fix_dirent(struct vnode* parent, struct dirent* entry,
6066 	struct io_context* ioContext)
6067 {
6068 	// set d_pdev and d_pino
6069 	entry->d_pdev = parent->device;
6070 	entry->d_pino = parent->id;
6071 
6072 	// If this is the ".." entry and the directory covering another vnode,
6073 	// we need to replace d_dev and d_ino with the actual values.
6074 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6075 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6076 			ioContext);
6077 	}
6078 
6079 	// resolve covered vnodes
6080 	ReadLocker _(&sVnodeLock);
6081 
6082 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6083 	if (vnode != NULL && vnode->covered_by != NULL) {
6084 		do {
6085 			vnode = vnode->covered_by;
6086 		} while (vnode->covered_by != NULL);
6087 
6088 		entry->d_dev = vnode->device;
6089 		entry->d_ino = vnode->id;
6090 	}
6091 
6092 	return B_OK;
6093 }
6094 
6095 
6096 static status_t
6097 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6098 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6099 {
6100 	if (!HAS_FS_CALL(vnode, read_dir))
6101 		return B_UNSUPPORTED;
6102 
6103 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6104 		_count);
6105 	if (error != B_OK)
6106 		return error;
6107 
6108 	// we need to adjust the read dirents
6109 	uint32 count = *_count;
6110 	for (uint32 i = 0; i < count; i++) {
6111 		error = fix_dirent(vnode, buffer, ioContext);
6112 		if (error != B_OK)
6113 			return error;
6114 
6115 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6116 	}
6117 
6118 	return error;
6119 }
6120 
6121 
6122 static status_t
6123 dir_rewind(struct file_descriptor* descriptor)
6124 {
6125 	struct vnode* vnode = descriptor->u.vnode;
6126 
6127 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6128 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6129 	}
6130 
6131 	return B_UNSUPPORTED;
6132 }
6133 
6134 
6135 static status_t
6136 dir_remove(int fd, char* path, bool kernel)
6137 {
6138 	char name[B_FILE_NAME_LENGTH];
6139 	struct vnode* directory;
6140 	status_t status;
6141 
6142 	if (path != NULL) {
6143 		// we need to make sure our path name doesn't stop with "/", ".",
6144 		// or ".."
6145 		char* lastSlash;
6146 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6147 			char* leaf = lastSlash + 1;
6148 			if (!strcmp(leaf, ".."))
6149 				return B_NOT_ALLOWED;
6150 
6151 			// omit multiple slashes
6152 			while (lastSlash > path && lastSlash[-1] == '/')
6153 				lastSlash--;
6154 
6155 			if (leaf[0]
6156 				&& strcmp(leaf, ".")) {
6157 				break;
6158 			}
6159 			// "name/" -> "name", or "name/." -> "name"
6160 			lastSlash[0] = '\0';
6161 		}
6162 
6163 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6164 			return B_NOT_ALLOWED;
6165 	}
6166 
6167 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6168 	if (status != B_OK)
6169 		return status;
6170 
6171 	if (HAS_FS_CALL(directory, remove_dir))
6172 		status = FS_CALL(directory, remove_dir, name);
6173 	else
6174 		status = B_READ_ONLY_DEVICE;
6175 
6176 	put_vnode(directory);
6177 	return status;
6178 }
6179 
6180 
6181 static status_t
6182 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6183 	size_t length)
6184 {
6185 	struct vnode* vnode = descriptor->u.vnode;
6186 
6187 	if (HAS_FS_CALL(vnode, ioctl))
6188 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6189 
6190 	return B_DEV_INVALID_IOCTL;
6191 }
6192 
6193 
6194 static status_t
6195 common_fcntl(int fd, int op, size_t argument, bool kernel)
6196 {
6197 	struct flock flock;
6198 
6199 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6200 		fd, op, argument, kernel ? "kernel" : "user"));
6201 
6202 	struct io_context* context = get_current_io_context(kernel);
6203 
6204 	struct file_descriptor* descriptor = get_fd(context, fd);
6205 	if (descriptor == NULL)
6206 		return B_FILE_ERROR;
6207 
6208 	struct vnode* vnode = fd_vnode(descriptor);
6209 
6210 	status_t status = B_OK;
6211 
6212 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6213 		if (descriptor->type != FDTYPE_FILE)
6214 			status = B_BAD_VALUE;
6215 		else if (kernel)
6216 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6217 		else if (user_memcpy(&flock, (struct flock*)argument,
6218 				sizeof(struct flock)) != B_OK)
6219 			status = B_BAD_ADDRESS;
6220 		if (status != B_OK) {
6221 			put_fd(descriptor);
6222 			return status;
6223 		}
6224 	}
6225 
6226 	switch (op) {
6227 		case F_SETFD:
6228 		{
6229 			// Set file descriptor flags
6230 
6231 			// O_CLOEXEC is the only flag available at this time
6232 			mutex_lock(&context->io_mutex);
6233 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6234 			mutex_unlock(&context->io_mutex);
6235 
6236 			status = B_OK;
6237 			break;
6238 		}
6239 
6240 		case F_GETFD:
6241 		{
6242 			// Get file descriptor flags
6243 			mutex_lock(&context->io_mutex);
6244 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6245 			mutex_unlock(&context->io_mutex);
6246 			break;
6247 		}
6248 
6249 		case F_SETFL:
6250 			// Set file descriptor open mode
6251 
6252 			// we only accept changes to O_APPEND and O_NONBLOCK
6253 			argument &= O_APPEND | O_NONBLOCK;
6254 			if (descriptor->ops->fd_set_flags != NULL) {
6255 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6256 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6257 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6258 					(int)argument);
6259 			} else
6260 				status = B_UNSUPPORTED;
6261 
6262 			if (status == B_OK) {
6263 				// update this descriptor's open_mode field
6264 				descriptor->open_mode = (descriptor->open_mode
6265 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6266 			}
6267 
6268 			break;
6269 
6270 		case F_GETFL:
6271 			// Get file descriptor open mode
6272 			status = descriptor->open_mode;
6273 			break;
6274 
6275 		case F_DUPFD:
6276 		case F_DUPFD_CLOEXEC:
6277 		{
6278 			status = new_fd_etc(context, descriptor, (int)argument);
6279 			if (status >= 0) {
6280 				mutex_lock(&context->io_mutex);
6281 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6282 				mutex_unlock(&context->io_mutex);
6283 
6284 				atomic_add(&descriptor->ref_count, 1);
6285 			}
6286 			break;
6287 		}
6288 
6289 		case F_GETLK:
6290 			if (vnode != NULL) {
6291 				struct flock normalizedLock;
6292 
6293 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6294 				status = normalize_flock(descriptor, &normalizedLock);
6295 				if (status != B_OK)
6296 					break;
6297 
6298 				if (HAS_FS_CALL(vnode, test_lock)) {
6299 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6300 						&normalizedLock);
6301 				} else
6302 					status = test_advisory_lock(vnode, &normalizedLock);
6303 				if (status == B_OK) {
6304 					if (normalizedLock.l_type == F_UNLCK) {
6305 						// no conflicting lock found, copy back the same struct
6306 						// we were given except change type to F_UNLCK
6307 						flock.l_type = F_UNLCK;
6308 						if (kernel) {
6309 							memcpy((struct flock*)argument, &flock,
6310 								sizeof(struct flock));
6311 						} else {
6312 							status = user_memcpy((struct flock*)argument,
6313 								&flock, sizeof(struct flock));
6314 						}
6315 					} else {
6316 						// a conflicting lock was found, copy back its range and
6317 						// type
6318 						if (normalizedLock.l_len == OFF_MAX)
6319 							normalizedLock.l_len = 0;
6320 
6321 						if (kernel) {
6322 							memcpy((struct flock*)argument,
6323 								&normalizedLock, sizeof(struct flock));
6324 						} else {
6325 							status = user_memcpy((struct flock*)argument,
6326 								&normalizedLock, sizeof(struct flock));
6327 						}
6328 					}
6329 				}
6330 			} else
6331 				status = B_BAD_VALUE;
6332 			break;
6333 
6334 		case F_SETLK:
6335 		case F_SETLKW:
6336 			status = normalize_flock(descriptor, &flock);
6337 			if (status != B_OK)
6338 				break;
6339 
6340 			if (vnode == NULL) {
6341 				status = B_BAD_VALUE;
6342 			} else if (flock.l_type == F_UNLCK) {
6343 				if (HAS_FS_CALL(vnode, release_lock)) {
6344 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6345 						&flock);
6346 				} else {
6347 					status = release_advisory_lock(vnode, context, NULL,
6348 						&flock);
6349 				}
6350 			} else {
6351 				// the open mode must match the lock type
6352 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6353 						&& flock.l_type == F_WRLCK)
6354 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6355 						&& flock.l_type == F_RDLCK))
6356 					status = B_FILE_ERROR;
6357 				else {
6358 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6359 						status = FS_CALL(vnode, acquire_lock,
6360 							descriptor->cookie, &flock, op == F_SETLKW);
6361 					} else {
6362 						status = acquire_advisory_lock(vnode, context, NULL,
6363 							&flock, op == F_SETLKW);
6364 					}
6365 				}
6366 			}
6367 			break;
6368 
6369 		// ToDo: add support for more ops?
6370 
6371 		default:
6372 			status = B_BAD_VALUE;
6373 	}
6374 
6375 	put_fd(descriptor);
6376 	return status;
6377 }
6378 
6379 
6380 static status_t
6381 common_sync(int fd, bool kernel)
6382 {
6383 	struct file_descriptor* descriptor;
6384 	struct vnode* vnode;
6385 	status_t status;
6386 
6387 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6388 
6389 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6390 	if (descriptor == NULL)
6391 		return B_FILE_ERROR;
6392 
6393 	if (HAS_FS_CALL(vnode, fsync))
6394 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6395 	else
6396 		status = B_UNSUPPORTED;
6397 
6398 	put_fd(descriptor);
6399 	return status;
6400 }
6401 
6402 
6403 static status_t
6404 common_lock_node(int fd, bool kernel)
6405 {
6406 	struct file_descriptor* descriptor;
6407 	struct vnode* vnode;
6408 
6409 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6410 	if (descriptor == NULL)
6411 		return B_FILE_ERROR;
6412 
6413 	status_t status = B_OK;
6414 
6415 	// We need to set the locking atomically - someone
6416 	// else might set one at the same time
6417 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6418 			(file_descriptor*)NULL) != NULL)
6419 		status = B_BUSY;
6420 
6421 	put_fd(descriptor);
6422 	return status;
6423 }
6424 
6425 
6426 static status_t
6427 common_unlock_node(int fd, bool kernel)
6428 {
6429 	struct file_descriptor* descriptor;
6430 	struct vnode* vnode;
6431 
6432 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6433 	if (descriptor == NULL)
6434 		return B_FILE_ERROR;
6435 
6436 	status_t status = B_OK;
6437 
6438 	// We need to set the locking atomically - someone
6439 	// else might set one at the same time
6440 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6441 			(file_descriptor*)NULL, descriptor) != descriptor)
6442 		status = B_BAD_VALUE;
6443 
6444 	put_fd(descriptor);
6445 	return status;
6446 }
6447 
6448 
6449 static status_t
6450 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6451 {
6452 	struct file_descriptor* descriptor;
6453 	struct vnode* vnode;
6454 
6455 	if (offset < 0 || length == 0)
6456 		return B_BAD_VALUE;
6457 	if (offset > OFF_MAX - length)
6458 		return B_FILE_TOO_LARGE;
6459 
6460 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6461 	if (descriptor == NULL || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6462 		return B_FILE_ERROR;
6463 
6464 	switch (vnode->Type() & S_IFMT) {
6465 		case S_IFIFO:
6466 		case S_IFSOCK:
6467 			return ESPIPE;
6468 
6469 		case S_IFBLK:
6470 		case S_IFCHR:
6471 		case S_IFDIR:
6472 		case S_IFLNK:
6473 			return B_DEVICE_NOT_FOUND;
6474 
6475 		case S_IFREG:
6476 			break;
6477 	}
6478 
6479 	status_t status = B_OK;
6480 	if (HAS_FS_CALL(vnode, preallocate)) {
6481 		status = FS_CALL(vnode, preallocate, offset, length);
6482 	} else {
6483 		status = HAS_FS_CALL(vnode, write)
6484 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6485 	}
6486 
6487 	return status;
6488 }
6489 
6490 
6491 static status_t
6492 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6493 	bool kernel)
6494 {
6495 	struct vnode* vnode;
6496 	status_t status;
6497 
6498 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6499 	if (status != B_OK)
6500 		return status;
6501 
6502 	if (HAS_FS_CALL(vnode, read_symlink)) {
6503 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6504 	} else
6505 		status = B_BAD_VALUE;
6506 
6507 	put_vnode(vnode);
6508 	return status;
6509 }
6510 
6511 
6512 static status_t
6513 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6514 	bool kernel)
6515 {
6516 	// path validity checks have to be in the calling function!
6517 	char name[B_FILE_NAME_LENGTH];
6518 	struct vnode* vnode;
6519 	status_t status;
6520 
6521 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6522 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6523 
6524 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6525 	if (status != B_OK)
6526 		return status;
6527 
6528 	if (HAS_FS_CALL(vnode, create_symlink))
6529 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6530 	else {
6531 		status = HAS_FS_CALL(vnode, write)
6532 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6533 	}
6534 
6535 	put_vnode(vnode);
6536 
6537 	return status;
6538 }
6539 
6540 
6541 static status_t
6542 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6543 	bool traverseLeafLink, bool kernel)
6544 {
6545 	// path validity checks have to be in the calling function!
6546 
6547 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6548 		toPath, kernel));
6549 
6550 	char name[B_FILE_NAME_LENGTH];
6551 	struct vnode* directory;
6552 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6553 		kernel);
6554 	if (status != B_OK)
6555 		return status;
6556 
6557 	struct vnode* vnode;
6558 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6559 		kernel);
6560 	if (status != B_OK)
6561 		goto err;
6562 
6563 	if (directory->mount != vnode->mount) {
6564 		status = B_CROSS_DEVICE_LINK;
6565 		goto err1;
6566 	}
6567 
6568 	if (HAS_FS_CALL(directory, link))
6569 		status = FS_CALL(directory, link, name, vnode);
6570 	else
6571 		status = B_READ_ONLY_DEVICE;
6572 
6573 err1:
6574 	put_vnode(vnode);
6575 err:
6576 	put_vnode(directory);
6577 
6578 	return status;
6579 }
6580 
6581 
6582 static status_t
6583 common_unlink(int fd, char* path, bool kernel)
6584 {
6585 	char filename[B_FILE_NAME_LENGTH];
6586 	struct vnode* vnode;
6587 	status_t status;
6588 
6589 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6590 		kernel));
6591 
6592 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6593 	if (status < 0)
6594 		return status;
6595 
6596 	if (HAS_FS_CALL(vnode, unlink))
6597 		status = FS_CALL(vnode, unlink, filename);
6598 	else
6599 		status = B_READ_ONLY_DEVICE;
6600 
6601 	put_vnode(vnode);
6602 
6603 	return status;
6604 }
6605 
6606 
6607 static status_t
6608 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6609 {
6610 	struct vnode* vnode;
6611 	status_t status;
6612 
6613 	// TODO: honor effectiveUserGroup argument
6614 
6615 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6616 	if (status != B_OK)
6617 		return status;
6618 
6619 	if (HAS_FS_CALL(vnode, access))
6620 		status = FS_CALL(vnode, access, mode);
6621 	else
6622 		status = B_OK;
6623 
6624 	put_vnode(vnode);
6625 
6626 	return status;
6627 }
6628 
6629 
6630 static status_t
6631 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6632 {
6633 	struct vnode* fromVnode;
6634 	struct vnode* toVnode;
6635 	char fromName[B_FILE_NAME_LENGTH];
6636 	char toName[B_FILE_NAME_LENGTH];
6637 	status_t status;
6638 
6639 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6640 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6641 
6642 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6643 	if (status != B_OK)
6644 		return status;
6645 
6646 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6647 	if (status != B_OK)
6648 		goto err1;
6649 
6650 	if (fromVnode->device != toVnode->device) {
6651 		status = B_CROSS_DEVICE_LINK;
6652 		goto err2;
6653 	}
6654 
6655 	if (fromName[0] == '\0' || toName[0] == '\0'
6656 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6657 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6658 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6659 		status = B_BAD_VALUE;
6660 		goto err2;
6661 	}
6662 
6663 	if (HAS_FS_CALL(fromVnode, rename))
6664 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6665 	else
6666 		status = B_READ_ONLY_DEVICE;
6667 
6668 err2:
6669 	put_vnode(toVnode);
6670 err1:
6671 	put_vnode(fromVnode);
6672 
6673 	return status;
6674 }
6675 
6676 
6677 static status_t
6678 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6679 {
6680 	struct vnode* vnode = descriptor->u.vnode;
6681 
6682 	FUNCTION(("common_read_stat: stat %p\n", stat));
6683 
6684 	// TODO: remove this once all file systems properly set them!
6685 	stat->st_crtim.tv_nsec = 0;
6686 	stat->st_ctim.tv_nsec = 0;
6687 	stat->st_mtim.tv_nsec = 0;
6688 	stat->st_atim.tv_nsec = 0;
6689 
6690 	return vfs_stat_vnode(vnode, stat);
6691 }
6692 
6693 
6694 static status_t
6695 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6696 	int statMask)
6697 {
6698 	struct vnode* vnode = descriptor->u.vnode;
6699 
6700 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6701 		vnode, stat, statMask));
6702 
6703 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6704 		&& (statMask & B_STAT_SIZE) != 0) {
6705 		return B_BAD_VALUE;
6706 	}
6707 
6708 	if (!HAS_FS_CALL(vnode, write_stat))
6709 		return B_READ_ONLY_DEVICE;
6710 
6711 	return FS_CALL(vnode, write_stat, stat, statMask);
6712 }
6713 
6714 
6715 static status_t
6716 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6717 	struct stat* stat, bool kernel)
6718 {
6719 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6720 		stat));
6721 
6722 	struct vnode* vnode;
6723 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6724 		NULL, kernel);
6725 	if (status != B_OK)
6726 		return status;
6727 
6728 	status = vfs_stat_vnode(vnode, stat);
6729 
6730 	put_vnode(vnode);
6731 	return status;
6732 }
6733 
6734 
6735 static status_t
6736 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6737 	const struct stat* stat, int statMask, bool kernel)
6738 {
6739 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6740 		"kernel %d\n", fd, path, stat, statMask, kernel));
6741 
6742 	struct vnode* vnode;
6743 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6744 		NULL, kernel);
6745 	if (status != B_OK)
6746 		return status;
6747 
6748 	if (HAS_FS_CALL(vnode, write_stat))
6749 		status = FS_CALL(vnode, write_stat, stat, statMask);
6750 	else
6751 		status = B_READ_ONLY_DEVICE;
6752 
6753 	put_vnode(vnode);
6754 
6755 	return status;
6756 }
6757 
6758 
6759 static int
6760 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6761 {
6762 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6763 		kernel));
6764 
6765 	struct vnode* vnode;
6766 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6767 		NULL, kernel);
6768 	if (status != B_OK)
6769 		return status;
6770 
6771 	status = open_attr_dir_vnode(vnode, kernel);
6772 	if (status < 0)
6773 		put_vnode(vnode);
6774 
6775 	return status;
6776 }
6777 
6778 
6779 static status_t
6780 attr_dir_close(struct file_descriptor* descriptor)
6781 {
6782 	struct vnode* vnode = descriptor->u.vnode;
6783 
6784 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6785 
6786 	if (HAS_FS_CALL(vnode, close_attr_dir))
6787 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6788 
6789 	return B_OK;
6790 }
6791 
6792 
6793 static void
6794 attr_dir_free_fd(struct file_descriptor* descriptor)
6795 {
6796 	struct vnode* vnode = descriptor->u.vnode;
6797 
6798 	if (vnode != NULL) {
6799 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6800 		put_vnode(vnode);
6801 	}
6802 }
6803 
6804 
6805 static status_t
6806 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6807 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6808 {
6809 	struct vnode* vnode = descriptor->u.vnode;
6810 
6811 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6812 
6813 	if (HAS_FS_CALL(vnode, read_attr_dir))
6814 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6815 			bufferSize, _count);
6816 
6817 	return B_UNSUPPORTED;
6818 }
6819 
6820 
6821 static status_t
6822 attr_dir_rewind(struct file_descriptor* descriptor)
6823 {
6824 	struct vnode* vnode = descriptor->u.vnode;
6825 
6826 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6827 
6828 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6829 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6830 
6831 	return B_UNSUPPORTED;
6832 }
6833 
6834 
6835 static int
6836 attr_create(int fd, char* path, const char* name, uint32 type,
6837 	int openMode, bool kernel)
6838 {
6839 	if (name == NULL || *name == '\0')
6840 		return B_BAD_VALUE;
6841 
6842 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6843 	struct vnode* vnode;
6844 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6845 		kernel);
6846 	if (status != B_OK)
6847 		return status;
6848 
6849 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6850 		status = B_LINK_LIMIT;
6851 		goto err;
6852 	}
6853 
6854 	if (!HAS_FS_CALL(vnode, create_attr)) {
6855 		status = B_READ_ONLY_DEVICE;
6856 		goto err;
6857 	}
6858 
6859 	void* cookie;
6860 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6861 	if (status != B_OK)
6862 		goto err;
6863 
6864 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6865 	if (fd >= 0)
6866 		return fd;
6867 
6868 	status = fd;
6869 
6870 	FS_CALL(vnode, close_attr, cookie);
6871 	FS_CALL(vnode, free_attr_cookie, cookie);
6872 
6873 	FS_CALL(vnode, remove_attr, name);
6874 
6875 err:
6876 	put_vnode(vnode);
6877 
6878 	return status;
6879 }
6880 
6881 
6882 static int
6883 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6884 {
6885 	if (name == NULL || *name == '\0')
6886 		return B_BAD_VALUE;
6887 
6888 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6889 	struct vnode* vnode;
6890 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6891 		kernel);
6892 	if (status != B_OK)
6893 		return status;
6894 
6895 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6896 		status = B_LINK_LIMIT;
6897 		goto err;
6898 	}
6899 
6900 	if (!HAS_FS_CALL(vnode, open_attr)) {
6901 		status = B_UNSUPPORTED;
6902 		goto err;
6903 	}
6904 
6905 	void* cookie;
6906 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6907 	if (status != B_OK)
6908 		goto err;
6909 
6910 	// now we only need a file descriptor for this attribute and we're done
6911 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6912 	if (fd >= 0)
6913 		return fd;
6914 
6915 	status = fd;
6916 
6917 	FS_CALL(vnode, close_attr, cookie);
6918 	FS_CALL(vnode, free_attr_cookie, cookie);
6919 
6920 err:
6921 	put_vnode(vnode);
6922 
6923 	return status;
6924 }
6925 
6926 
6927 static status_t
6928 attr_close(struct file_descriptor* descriptor)
6929 {
6930 	struct vnode* vnode = descriptor->u.vnode;
6931 
6932 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6933 
6934 	if (HAS_FS_CALL(vnode, close_attr))
6935 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6936 
6937 	return B_OK;
6938 }
6939 
6940 
6941 static void
6942 attr_free_fd(struct file_descriptor* descriptor)
6943 {
6944 	struct vnode* vnode = descriptor->u.vnode;
6945 
6946 	if (vnode != NULL) {
6947 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6948 		put_vnode(vnode);
6949 	}
6950 }
6951 
6952 
6953 static status_t
6954 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6955 	size_t* length)
6956 {
6957 	struct vnode* vnode = descriptor->u.vnode;
6958 
6959 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6960 		pos, length, *length));
6961 
6962 	if (!HAS_FS_CALL(vnode, read_attr))
6963 		return B_UNSUPPORTED;
6964 
6965 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6966 }
6967 
6968 
6969 static status_t
6970 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6971 	size_t* length)
6972 {
6973 	struct vnode* vnode = descriptor->u.vnode;
6974 
6975 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6976 		length));
6977 
6978 	if (!HAS_FS_CALL(vnode, write_attr))
6979 		return B_UNSUPPORTED;
6980 
6981 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6982 }
6983 
6984 
6985 static off_t
6986 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6987 {
6988 	off_t offset;
6989 
6990 	switch (seekType) {
6991 		case SEEK_SET:
6992 			offset = 0;
6993 			break;
6994 		case SEEK_CUR:
6995 			offset = descriptor->pos;
6996 			break;
6997 		case SEEK_END:
6998 		{
6999 			struct vnode* vnode = descriptor->u.vnode;
7000 			if (!HAS_FS_CALL(vnode, read_stat))
7001 				return B_UNSUPPORTED;
7002 
7003 			struct stat stat;
7004 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
7005 				&stat);
7006 			if (status != B_OK)
7007 				return status;
7008 
7009 			offset = stat.st_size;
7010 			break;
7011 		}
7012 		default:
7013 			return B_BAD_VALUE;
7014 	}
7015 
7016 	// assumes off_t is 64 bits wide
7017 	if (offset > 0 && LONGLONG_MAX - offset < pos)
7018 		return B_BUFFER_OVERFLOW;
7019 
7020 	pos += offset;
7021 	if (pos < 0)
7022 		return B_BAD_VALUE;
7023 
7024 	return descriptor->pos = pos;
7025 }
7026 
7027 
7028 static status_t
7029 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7030 {
7031 	struct vnode* vnode = descriptor->u.vnode;
7032 
7033 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7034 
7035 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7036 		return B_UNSUPPORTED;
7037 
7038 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7039 }
7040 
7041 
7042 static status_t
7043 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7044 	int statMask)
7045 {
7046 	struct vnode* vnode = descriptor->u.vnode;
7047 
7048 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7049 
7050 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7051 		return B_READ_ONLY_DEVICE;
7052 
7053 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7054 }
7055 
7056 
7057 static status_t
7058 attr_remove(int fd, const char* name, bool kernel)
7059 {
7060 	struct file_descriptor* descriptor;
7061 	struct vnode* vnode;
7062 	status_t status;
7063 
7064 	if (name == NULL || *name == '\0')
7065 		return B_BAD_VALUE;
7066 
7067 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7068 		kernel));
7069 
7070 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
7071 	if (descriptor == NULL)
7072 		return B_FILE_ERROR;
7073 
7074 	if (HAS_FS_CALL(vnode, remove_attr))
7075 		status = FS_CALL(vnode, remove_attr, name);
7076 	else
7077 		status = B_READ_ONLY_DEVICE;
7078 
7079 	put_fd(descriptor);
7080 
7081 	return status;
7082 }
7083 
7084 
7085 static status_t
7086 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7087 	bool kernel)
7088 {
7089 	struct file_descriptor* fromDescriptor;
7090 	struct file_descriptor* toDescriptor;
7091 	struct vnode* fromVnode;
7092 	struct vnode* toVnode;
7093 	status_t status;
7094 
7095 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7096 		|| *toName == '\0')
7097 		return B_BAD_VALUE;
7098 
7099 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7100 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7101 
7102 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
7103 	if (fromDescriptor == NULL)
7104 		return B_FILE_ERROR;
7105 
7106 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
7107 	if (toDescriptor == NULL) {
7108 		status = B_FILE_ERROR;
7109 		goto err;
7110 	}
7111 
7112 	// are the files on the same volume?
7113 	if (fromVnode->device != toVnode->device) {
7114 		status = B_CROSS_DEVICE_LINK;
7115 		goto err1;
7116 	}
7117 
7118 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7119 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7120 	} else
7121 		status = B_READ_ONLY_DEVICE;
7122 
7123 err1:
7124 	put_fd(toDescriptor);
7125 err:
7126 	put_fd(fromDescriptor);
7127 
7128 	return status;
7129 }
7130 
7131 
7132 static int
7133 index_dir_open(dev_t mountID, bool kernel)
7134 {
7135 	struct fs_mount* mount;
7136 	void* cookie;
7137 
7138 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7139 		kernel));
7140 
7141 	status_t status = get_mount(mountID, &mount);
7142 	if (status != B_OK)
7143 		return status;
7144 
7145 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7146 		status = B_UNSUPPORTED;
7147 		goto error;
7148 	}
7149 
7150 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7151 	if (status != B_OK)
7152 		goto error;
7153 
7154 	// get fd for the index directory
7155 	int fd;
7156 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7157 	if (fd >= 0)
7158 		return fd;
7159 
7160 	// something went wrong
7161 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7162 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7163 
7164 	status = fd;
7165 
7166 error:
7167 	put_mount(mount);
7168 	return status;
7169 }
7170 
7171 
7172 static status_t
7173 index_dir_close(struct file_descriptor* descriptor)
7174 {
7175 	struct fs_mount* mount = descriptor->u.mount;
7176 
7177 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7178 
7179 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7180 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7181 
7182 	return B_OK;
7183 }
7184 
7185 
7186 static void
7187 index_dir_free_fd(struct file_descriptor* descriptor)
7188 {
7189 	struct fs_mount* mount = descriptor->u.mount;
7190 
7191 	if (mount != NULL) {
7192 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7193 		put_mount(mount);
7194 	}
7195 }
7196 
7197 
7198 static status_t
7199 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7200 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7201 {
7202 	struct fs_mount* mount = descriptor->u.mount;
7203 
7204 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7205 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7206 			bufferSize, _count);
7207 	}
7208 
7209 	return B_UNSUPPORTED;
7210 }
7211 
7212 
7213 static status_t
7214 index_dir_rewind(struct file_descriptor* descriptor)
7215 {
7216 	struct fs_mount* mount = descriptor->u.mount;
7217 
7218 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7219 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7220 
7221 	return B_UNSUPPORTED;
7222 }
7223 
7224 
7225 static status_t
7226 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7227 	bool kernel)
7228 {
7229 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7230 		mountID, name, kernel));
7231 
7232 	struct fs_mount* mount;
7233 	status_t status = get_mount(mountID, &mount);
7234 	if (status != B_OK)
7235 		return status;
7236 
7237 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7238 		status = B_READ_ONLY_DEVICE;
7239 		goto out;
7240 	}
7241 
7242 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7243 
7244 out:
7245 	put_mount(mount);
7246 	return status;
7247 }
7248 
7249 
7250 #if 0
7251 static status_t
7252 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7253 {
7254 	struct vnode* vnode = descriptor->u.vnode;
7255 
7256 	// ToDo: currently unused!
7257 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7258 	if (!HAS_FS_CALL(vnode, read_index_stat))
7259 		return B_UNSUPPORTED;
7260 
7261 	return B_UNSUPPORTED;
7262 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7263 }
7264 
7265 
7266 static void
7267 index_free_fd(struct file_descriptor* descriptor)
7268 {
7269 	struct vnode* vnode = descriptor->u.vnode;
7270 
7271 	if (vnode != NULL) {
7272 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7273 		put_vnode(vnode);
7274 	}
7275 }
7276 #endif
7277 
7278 
7279 static status_t
7280 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7281 	bool kernel)
7282 {
7283 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7284 		mountID, name, kernel));
7285 
7286 	struct fs_mount* mount;
7287 	status_t status = get_mount(mountID, &mount);
7288 	if (status != B_OK)
7289 		return status;
7290 
7291 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7292 		status = B_UNSUPPORTED;
7293 		goto out;
7294 	}
7295 
7296 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7297 
7298 out:
7299 	put_mount(mount);
7300 	return status;
7301 }
7302 
7303 
7304 static status_t
7305 index_remove(dev_t mountID, const char* name, bool kernel)
7306 {
7307 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7308 		mountID, name, kernel));
7309 
7310 	struct fs_mount* mount;
7311 	status_t status = get_mount(mountID, &mount);
7312 	if (status != B_OK)
7313 		return status;
7314 
7315 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7316 		status = B_READ_ONLY_DEVICE;
7317 		goto out;
7318 	}
7319 
7320 	status = FS_MOUNT_CALL(mount, remove_index, name);
7321 
7322 out:
7323 	put_mount(mount);
7324 	return status;
7325 }
7326 
7327 
7328 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7329 		It would be nice if the FS would find some more kernel support
7330 		for them.
7331 		For example, query parsing should be moved into the kernel.
7332 */
7333 static int
7334 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7335 	int32 token, bool kernel)
7336 {
7337 	struct fs_mount* mount;
7338 	void* cookie;
7339 
7340 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7341 		device, query, kernel));
7342 
7343 	status_t status = get_mount(device, &mount);
7344 	if (status != B_OK)
7345 		return status;
7346 
7347 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7348 		status = B_UNSUPPORTED;
7349 		goto error;
7350 	}
7351 
7352 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7353 		&cookie);
7354 	if (status != B_OK)
7355 		goto error;
7356 
7357 	// get fd for the index directory
7358 	int fd;
7359 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7360 	if (fd >= 0)
7361 		return fd;
7362 
7363 	status = fd;
7364 
7365 	// something went wrong
7366 	FS_MOUNT_CALL(mount, close_query, cookie);
7367 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7368 
7369 error:
7370 	put_mount(mount);
7371 	return status;
7372 }
7373 
7374 
7375 static status_t
7376 query_close(struct file_descriptor* descriptor)
7377 {
7378 	struct fs_mount* mount = descriptor->u.mount;
7379 
7380 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7381 
7382 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7383 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7384 
7385 	return B_OK;
7386 }
7387 
7388 
7389 static void
7390 query_free_fd(struct file_descriptor* descriptor)
7391 {
7392 	struct fs_mount* mount = descriptor->u.mount;
7393 
7394 	if (mount != NULL) {
7395 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7396 		put_mount(mount);
7397 	}
7398 }
7399 
7400 
7401 static status_t
7402 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7403 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7404 {
7405 	struct fs_mount* mount = descriptor->u.mount;
7406 
7407 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7408 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7409 			bufferSize, _count);
7410 	}
7411 
7412 	return B_UNSUPPORTED;
7413 }
7414 
7415 
7416 static status_t
7417 query_rewind(struct file_descriptor* descriptor)
7418 {
7419 	struct fs_mount* mount = descriptor->u.mount;
7420 
7421 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7422 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7423 
7424 	return B_UNSUPPORTED;
7425 }
7426 
7427 
7428 //	#pragma mark - General File System functions
7429 
7430 
7431 static dev_t
7432 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7433 	const char* args, bool kernel)
7434 {
7435 	struct ::fs_mount* mount;
7436 	status_t status = B_OK;
7437 	fs_volume* volume = NULL;
7438 	int32 layer = 0;
7439 	Vnode* coveredNode = NULL;
7440 
7441 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7442 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7443 
7444 	// The path is always safe, we just have to make sure that fsName is
7445 	// almost valid - we can't make any assumptions about args, though.
7446 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7447 	// We'll get it from the DDM later.
7448 	if (fsName == NULL) {
7449 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7450 			return B_BAD_VALUE;
7451 	} else if (fsName[0] == '\0')
7452 		return B_BAD_VALUE;
7453 
7454 	RecursiveLocker mountOpLocker(sMountOpLock);
7455 
7456 	// Helper to delete a newly created file device on failure.
7457 	// Not exactly beautiful, but helps to keep the code below cleaner.
7458 	struct FileDeviceDeleter {
7459 		FileDeviceDeleter() : id(-1) {}
7460 		~FileDeviceDeleter()
7461 		{
7462 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7463 		}
7464 
7465 		partition_id id;
7466 	} fileDeviceDeleter;
7467 
7468 	// If the file system is not a "virtual" one, the device argument should
7469 	// point to a real file/device (if given at all).
7470 	// get the partition
7471 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7472 	KPartition* partition = NULL;
7473 	KPath normalizedDevice;
7474 	bool newlyCreatedFileDevice = false;
7475 
7476 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7477 		// normalize the device path
7478 		status = normalizedDevice.SetTo(device, true);
7479 		if (status != B_OK)
7480 			return status;
7481 
7482 		// get a corresponding partition from the DDM
7483 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7484 		if (partition == NULL) {
7485 			// Partition not found: This either means, the user supplied
7486 			// an invalid path, or the path refers to an image file. We try
7487 			// to let the DDM create a file device for the path.
7488 			partition_id deviceID = ddm->CreateFileDevice(
7489 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7490 			if (deviceID >= 0) {
7491 				partition = ddm->RegisterPartition(deviceID);
7492 				if (newlyCreatedFileDevice)
7493 					fileDeviceDeleter.id = deviceID;
7494 			}
7495 		}
7496 
7497 		if (!partition) {
7498 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7499 				normalizedDevice.Path()));
7500 			return B_ENTRY_NOT_FOUND;
7501 		}
7502 
7503 		device = normalizedDevice.Path();
7504 			// correct path to file device
7505 	}
7506 	PartitionRegistrar partitionRegistrar(partition, true);
7507 
7508 	// Write lock the partition's device. For the time being, we keep the lock
7509 	// until we're done mounting -- not nice, but ensure, that no-one is
7510 	// interfering.
7511 	// TODO: Just mark the partition busy while mounting!
7512 	KDiskDevice* diskDevice = NULL;
7513 	if (partition) {
7514 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7515 		if (!diskDevice) {
7516 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7517 			return B_ERROR;
7518 		}
7519 	}
7520 
7521 	DeviceWriteLocker writeLocker(diskDevice, true);
7522 		// this takes over the write lock acquired before
7523 
7524 	if (partition != NULL) {
7525 		// make sure, that the partition is not busy
7526 		if (partition->IsBusy()) {
7527 			TRACE(("fs_mount(): Partition is busy.\n"));
7528 			return B_BUSY;
7529 		}
7530 
7531 		// if no FS name had been supplied, we get it from the partition
7532 		if (fsName == NULL) {
7533 			KDiskSystem* diskSystem = partition->DiskSystem();
7534 			if (!diskSystem) {
7535 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7536 					"recognize it.\n"));
7537 				return B_BAD_VALUE;
7538 			}
7539 
7540 			if (!diskSystem->IsFileSystem()) {
7541 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7542 					"partitioning system.\n"));
7543 				return B_BAD_VALUE;
7544 			}
7545 
7546 			// The disk system name will not change, and the KDiskSystem
7547 			// object will not go away while the disk device is locked (and
7548 			// the partition has a reference to it), so this is safe.
7549 			fsName = diskSystem->Name();
7550 		}
7551 	}
7552 
7553 	mount = new(std::nothrow) (struct ::fs_mount);
7554 	if (mount == NULL)
7555 		return B_NO_MEMORY;
7556 
7557 	mount->device_name = strdup(device);
7558 		// "device" can be NULL
7559 
7560 	status = mount->entry_cache.Init();
7561 	if (status != B_OK)
7562 		goto err1;
7563 
7564 	// initialize structure
7565 	mount->id = sNextMountID++;
7566 	mount->partition = NULL;
7567 	mount->root_vnode = NULL;
7568 	mount->covers_vnode = NULL;
7569 	mount->unmounting = false;
7570 	mount->owns_file_device = false;
7571 	mount->volume = NULL;
7572 
7573 	// build up the volume(s)
7574 	while (true) {
7575 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7576 		if (layerFSName == NULL) {
7577 			if (layer == 0) {
7578 				status = B_NO_MEMORY;
7579 				goto err1;
7580 			}
7581 
7582 			break;
7583 		}
7584 		MemoryDeleter layerFSNameDeleter(layerFSName);
7585 
7586 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7587 		if (volume == NULL) {
7588 			status = B_NO_MEMORY;
7589 			goto err1;
7590 		}
7591 
7592 		volume->id = mount->id;
7593 		volume->partition = partition != NULL ? partition->ID() : -1;
7594 		volume->layer = layer++;
7595 		volume->private_volume = NULL;
7596 		volume->ops = NULL;
7597 		volume->sub_volume = NULL;
7598 		volume->super_volume = NULL;
7599 		volume->file_system = NULL;
7600 		volume->file_system_name = NULL;
7601 
7602 		volume->file_system_name = get_file_system_name(layerFSName);
7603 		if (volume->file_system_name == NULL) {
7604 			status = B_NO_MEMORY;
7605 			free(volume);
7606 			goto err1;
7607 		}
7608 
7609 		volume->file_system = get_file_system(layerFSName);
7610 		if (volume->file_system == NULL) {
7611 			status = B_DEVICE_NOT_FOUND;
7612 			free(volume->file_system_name);
7613 			free(volume);
7614 			goto err1;
7615 		}
7616 
7617 		if (mount->volume == NULL)
7618 			mount->volume = volume;
7619 		else {
7620 			volume->super_volume = mount->volume;
7621 			mount->volume->sub_volume = volume;
7622 			mount->volume = volume;
7623 		}
7624 	}
7625 
7626 	// insert mount struct into list before we call FS's mount() function
7627 	// so that vnodes can be created for this mount
7628 	rw_lock_write_lock(&sMountLock);
7629 	sMountsTable->Insert(mount);
7630 	rw_lock_write_unlock(&sMountLock);
7631 
7632 	ino_t rootID;
7633 
7634 	if (!sRoot) {
7635 		// we haven't mounted anything yet
7636 		if (strcmp(path, "/") != 0) {
7637 			status = B_ERROR;
7638 			goto err2;
7639 		}
7640 
7641 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7642 			args, &rootID);
7643 		if (status != B_OK || mount->volume->ops == NULL)
7644 			goto err2;
7645 	} else {
7646 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7647 		if (status != B_OK)
7648 			goto err2;
7649 
7650 		mount->covers_vnode = coveredNode;
7651 
7652 		// make sure covered_vnode is a directory
7653 		if (!S_ISDIR(coveredNode->Type())) {
7654 			status = B_NOT_A_DIRECTORY;
7655 			goto err3;
7656 		}
7657 
7658 		if (coveredNode->IsCovered()) {
7659 			// this is already a covered vnode
7660 			status = B_BUSY;
7661 			goto err3;
7662 		}
7663 
7664 		// mount it/them
7665 		fs_volume* volume = mount->volume;
7666 		while (volume) {
7667 			status = volume->file_system->mount(volume, device, flags, args,
7668 				&rootID);
7669 			if (status != B_OK || volume->ops == NULL) {
7670 				if (status == B_OK && volume->ops == NULL)
7671 					panic("fs_mount: mount() succeeded but ops is NULL!");
7672 				if (volume->sub_volume)
7673 					goto err4;
7674 				goto err3;
7675 			}
7676 
7677 			volume = volume->super_volume;
7678 		}
7679 
7680 		volume = mount->volume;
7681 		while (volume) {
7682 			if (volume->ops->all_layers_mounted != NULL)
7683 				volume->ops->all_layers_mounted(volume);
7684 			volume = volume->super_volume;
7685 		}
7686 	}
7687 
7688 	// the root node is supposed to be owned by the file system - it must
7689 	// exist at this point
7690 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7691 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7692 		panic("fs_mount: file system does not own its root node!\n");
7693 		status = B_ERROR;
7694 		goto err4;
7695 	}
7696 
7697 	// set up the links between the root vnode and the vnode it covers
7698 	rw_lock_write_lock(&sVnodeLock);
7699 	if (coveredNode != NULL) {
7700 		if (coveredNode->IsCovered()) {
7701 			// the vnode is covered now
7702 			status = B_BUSY;
7703 			rw_lock_write_unlock(&sVnodeLock);
7704 			goto err4;
7705 		}
7706 
7707 		mount->root_vnode->covers = coveredNode;
7708 		mount->root_vnode->SetCovering(true);
7709 
7710 		coveredNode->covered_by = mount->root_vnode;
7711 		coveredNode->SetCovered(true);
7712 	}
7713 	rw_lock_write_unlock(&sVnodeLock);
7714 
7715 	if (!sRoot) {
7716 		sRoot = mount->root_vnode;
7717 		mutex_lock(&sIOContextRootLock);
7718 		get_current_io_context(true)->root = sRoot;
7719 		mutex_unlock(&sIOContextRootLock);
7720 		inc_vnode_ref_count(sRoot);
7721 	}
7722 
7723 	// supply the partition (if any) with the mount cookie and mark it mounted
7724 	if (partition) {
7725 		partition->SetMountCookie(mount->volume->private_volume);
7726 		partition->SetVolumeID(mount->id);
7727 
7728 		// keep a partition reference as long as the partition is mounted
7729 		partitionRegistrar.Detach();
7730 		mount->partition = partition;
7731 		mount->owns_file_device = newlyCreatedFileDevice;
7732 		fileDeviceDeleter.id = -1;
7733 	}
7734 
7735 	notify_mount(mount->id,
7736 		coveredNode != NULL ? coveredNode->device : -1,
7737 		coveredNode ? coveredNode->id : -1);
7738 
7739 	return mount->id;
7740 
7741 err4:
7742 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7743 err3:
7744 	if (coveredNode != NULL)
7745 		put_vnode(coveredNode);
7746 err2:
7747 	rw_lock_write_lock(&sMountLock);
7748 	sMountsTable->Remove(mount);
7749 	rw_lock_write_unlock(&sMountLock);
7750 err1:
7751 	delete mount;
7752 
7753 	return status;
7754 }
7755 
7756 
7757 static status_t
7758 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7759 {
7760 	struct fs_mount* mount;
7761 	status_t err;
7762 
7763 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7764 		mountID, kernel));
7765 
7766 	struct vnode* pathVnode = NULL;
7767 	if (path != NULL) {
7768 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7769 		if (err != B_OK)
7770 			return B_ENTRY_NOT_FOUND;
7771 	}
7772 
7773 	RecursiveLocker mountOpLocker(sMountOpLock);
7774 	ReadLocker mountLocker(sMountLock);
7775 
7776 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7777 	if (mount == NULL) {
7778 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7779 			pathVnode);
7780 	}
7781 
7782 	mountLocker.Unlock();
7783 
7784 	if (path != NULL) {
7785 		put_vnode(pathVnode);
7786 
7787 		if (mount->root_vnode != pathVnode) {
7788 			// not mountpoint
7789 			return B_BAD_VALUE;
7790 		}
7791 	}
7792 
7793 	// if the volume is associated with a partition, lock the device of the
7794 	// partition as long as we are unmounting
7795 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7796 	KPartition* partition = mount->partition;
7797 	KDiskDevice* diskDevice = NULL;
7798 	if (partition != NULL) {
7799 		if (partition->Device() == NULL) {
7800 			dprintf("fs_unmount(): There is no device!\n");
7801 			return B_ERROR;
7802 		}
7803 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7804 		if (!diskDevice) {
7805 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7806 			return B_ERROR;
7807 		}
7808 	}
7809 	DeviceWriteLocker writeLocker(diskDevice, true);
7810 
7811 	// make sure, that the partition is not busy
7812 	if (partition != NULL) {
7813 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7814 			TRACE(("fs_unmount(): Partition is busy.\n"));
7815 			return B_BUSY;
7816 		}
7817 	}
7818 
7819 	// grab the vnode master mutex to keep someone from creating
7820 	// a vnode while we're figuring out if we can continue
7821 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7822 
7823 	bool disconnectedDescriptors = false;
7824 
7825 	while (true) {
7826 		bool busy = false;
7827 
7828 		// cycle through the list of vnodes associated with this mount and
7829 		// make sure all of them are not busy or have refs on them
7830 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7831 		while (struct vnode* vnode = iterator.Next()) {
7832 			if (vnode->IsBusy()) {
7833 				busy = true;
7834 				break;
7835 			}
7836 
7837 			// check the vnode's ref count -- subtract additional references for
7838 			// covering
7839 			int32 refCount = vnode->ref_count;
7840 			if (vnode->covers != NULL)
7841 				refCount--;
7842 			if (vnode->covered_by != NULL)
7843 				refCount--;
7844 
7845 			if (refCount != 0) {
7846 				// there are still vnodes in use on this mount, so we cannot
7847 				// unmount yet
7848 				busy = true;
7849 				break;
7850 			}
7851 		}
7852 
7853 		if (!busy)
7854 			break;
7855 
7856 		if ((flags & B_FORCE_UNMOUNT) == 0)
7857 			return B_BUSY;
7858 
7859 		if (disconnectedDescriptors) {
7860 			// wait a bit until the last access is finished, and then try again
7861 			vnodesWriteLocker.Unlock();
7862 			snooze(100000);
7863 			// TODO: if there is some kind of bug that prevents the ref counts
7864 			// from getting back to zero, this will fall into an endless loop...
7865 			vnodesWriteLocker.Lock();
7866 			continue;
7867 		}
7868 
7869 		// the file system is still busy - but we're forced to unmount it,
7870 		// so let's disconnect all open file descriptors
7871 
7872 		mount->unmounting = true;
7873 			// prevent new vnodes from being created
7874 
7875 		vnodesWriteLocker.Unlock();
7876 
7877 		disconnect_mount_or_vnode_fds(mount, NULL);
7878 		disconnectedDescriptors = true;
7879 
7880 		vnodesWriteLocker.Lock();
7881 	}
7882 
7883 	// We can safely continue. Mark all of the vnodes busy and this mount
7884 	// structure in unmounting state. Also undo the vnode covers/covered_by
7885 	// links.
7886 	mount->unmounting = true;
7887 
7888 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7889 	while (struct vnode* vnode = iterator.Next()) {
7890 		// Remove all covers/covered_by links from other mounts' nodes to this
7891 		// vnode and adjust the node ref count accordingly. We will release the
7892 		// references to the external vnodes below.
7893 		if (Vnode* coveredNode = vnode->covers) {
7894 			if (Vnode* coveringNode = vnode->covered_by) {
7895 				// We have both covered and covering vnodes, so just remove us
7896 				// from the chain.
7897 				coveredNode->covered_by = coveringNode;
7898 				coveringNode->covers = coveredNode;
7899 				vnode->ref_count -= 2;
7900 
7901 				vnode->covered_by = NULL;
7902 				vnode->covers = NULL;
7903 				vnode->SetCovering(false);
7904 				vnode->SetCovered(false);
7905 			} else {
7906 				// We only have a covered vnode. Remove its link to us.
7907 				coveredNode->covered_by = NULL;
7908 				coveredNode->SetCovered(false);
7909 				vnode->ref_count--;
7910 
7911 				// If the other node is an external vnode, we keep its link
7912 				// link around so we can put the reference later on. Otherwise
7913 				// we get rid of it right now.
7914 				if (coveredNode->mount == mount) {
7915 					vnode->covers = NULL;
7916 					coveredNode->ref_count--;
7917 				}
7918 			}
7919 		} else if (Vnode* coveringNode = vnode->covered_by) {
7920 			// We only have a covering vnode. Remove its link to us.
7921 			coveringNode->covers = NULL;
7922 			coveringNode->SetCovering(false);
7923 			vnode->ref_count--;
7924 
7925 			// If the other node is an external vnode, we keep its link
7926 			// link around so we can put the reference later on. Otherwise
7927 			// we get rid of it right now.
7928 			if (coveringNode->mount == mount) {
7929 				vnode->covered_by = NULL;
7930 				coveringNode->ref_count--;
7931 			}
7932 		}
7933 
7934 		vnode->SetBusy(true);
7935 		vnode_to_be_freed(vnode);
7936 	}
7937 
7938 	vnodesWriteLocker.Unlock();
7939 
7940 	// Free all vnodes associated with this mount.
7941 	// They will be removed from the mount list by free_vnode(), so
7942 	// we don't have to do this.
7943 	while (struct vnode* vnode = mount->vnodes.Head()) {
7944 		// Put the references to external covered/covering vnodes we kept above.
7945 		if (Vnode* coveredNode = vnode->covers)
7946 			put_vnode(coveredNode);
7947 		if (Vnode* coveringNode = vnode->covered_by)
7948 			put_vnode(coveringNode);
7949 
7950 		free_vnode(vnode, false);
7951 	}
7952 
7953 	// remove the mount structure from the hash table
7954 	rw_lock_write_lock(&sMountLock);
7955 	sMountsTable->Remove(mount);
7956 	rw_lock_write_unlock(&sMountLock);
7957 
7958 	mountOpLocker.Unlock();
7959 
7960 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7961 	notify_unmount(mount->id);
7962 
7963 	// dereference the partition and mark it unmounted
7964 	if (partition) {
7965 		partition->SetVolumeID(-1);
7966 		partition->SetMountCookie(NULL);
7967 
7968 		if (mount->owns_file_device)
7969 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7970 		partition->Unregister();
7971 	}
7972 
7973 	delete mount;
7974 	return B_OK;
7975 }
7976 
7977 
7978 static status_t
7979 fs_sync(dev_t device)
7980 {
7981 	struct fs_mount* mount;
7982 	status_t status = get_mount(device, &mount);
7983 	if (status != B_OK)
7984 		return status;
7985 
7986 	struct vnode marker;
7987 	memset(&marker, 0, sizeof(marker));
7988 	marker.SetBusy(true);
7989 	marker.SetRemoved(true);
7990 
7991 	// First, synchronize all file caches
7992 
7993 	while (true) {
7994 		WriteLocker locker(sVnodeLock);
7995 			// Note: That's the easy way. Which is probably OK for sync(),
7996 			// since it's a relatively rare call and doesn't need to allow for
7997 			// a lot of concurrency. Using a read lock would be possible, but
7998 			// also more involved, since we had to lock the individual nodes
7999 			// and take care of the locking order, which we might not want to
8000 			// do while holding fs_mount::lock.
8001 
8002 		// synchronize access to vnode list
8003 		mutex_lock(&mount->lock);
8004 
8005 		struct vnode* vnode;
8006 		if (!marker.IsRemoved()) {
8007 			vnode = mount->vnodes.GetNext(&marker);
8008 			mount->vnodes.Remove(&marker);
8009 			marker.SetRemoved(true);
8010 		} else
8011 			vnode = mount->vnodes.First();
8012 
8013 		while (vnode != NULL && (vnode->cache == NULL
8014 			|| vnode->IsRemoved() || vnode->IsBusy())) {
8015 			// TODO: we could track writes (and writable mapped vnodes)
8016 			//	and have a simple flag that we could test for here
8017 			vnode = mount->vnodes.GetNext(vnode);
8018 		}
8019 
8020 		if (vnode != NULL) {
8021 			// insert marker vnode again
8022 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
8023 			marker.SetRemoved(false);
8024 		}
8025 
8026 		mutex_unlock(&mount->lock);
8027 
8028 		if (vnode == NULL)
8029 			break;
8030 
8031 		vnode = lookup_vnode(mount->id, vnode->id);
8032 		if (vnode == NULL || vnode->IsBusy())
8033 			continue;
8034 
8035 		if (vnode->ref_count == 0) {
8036 			// this vnode has been unused before
8037 			vnode_used(vnode);
8038 		}
8039 		inc_vnode_ref_count(vnode);
8040 
8041 		locker.Unlock();
8042 
8043 		if (vnode->cache != NULL && !vnode->IsRemoved())
8044 			vnode->cache->WriteModified();
8045 
8046 		put_vnode(vnode);
8047 	}
8048 
8049 	// Let the file systems do their synchronizing work
8050 	if (HAS_FS_MOUNT_CALL(mount, sync))
8051 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8052 
8053 	// Finally, flush the underlying device's write cache (if possible.)
8054 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8055 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8056 
8057 	put_mount(mount);
8058 	return status;
8059 }
8060 
8061 
8062 static status_t
8063 fs_read_info(dev_t device, struct fs_info* info)
8064 {
8065 	struct fs_mount* mount;
8066 	status_t status = get_mount(device, &mount);
8067 	if (status != B_OK)
8068 		return status;
8069 
8070 	memset(info, 0, sizeof(struct fs_info));
8071 
8072 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8073 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8074 
8075 	// fill in info the file system doesn't (have to) know about
8076 	if (status == B_OK) {
8077 		info->dev = mount->id;
8078 		info->root = mount->root_vnode->id;
8079 
8080 		fs_volume* volume = mount->volume;
8081 		while (volume->super_volume != NULL)
8082 			volume = volume->super_volume;
8083 
8084 		strlcpy(info->fsh_name, volume->file_system_name,
8085 			sizeof(info->fsh_name));
8086 		if (mount->device_name != NULL) {
8087 			strlcpy(info->device_name, mount->device_name,
8088 				sizeof(info->device_name));
8089 		}
8090 	}
8091 
8092 	// if the call is not supported by the file system, there are still
8093 	// the parts that we filled out ourselves
8094 
8095 	put_mount(mount);
8096 	return status;
8097 }
8098 
8099 
8100 static status_t
8101 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8102 {
8103 	struct fs_mount* mount;
8104 	status_t status = get_mount(device, &mount);
8105 	if (status != B_OK)
8106 		return status;
8107 
8108 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8109 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8110 	else
8111 		status = B_READ_ONLY_DEVICE;
8112 
8113 	put_mount(mount);
8114 	return status;
8115 }
8116 
8117 
8118 static dev_t
8119 fs_next_device(int32* _cookie)
8120 {
8121 	struct fs_mount* mount = NULL;
8122 	dev_t device = *_cookie;
8123 
8124 	rw_lock_read_lock(&sMountLock);
8125 
8126 	// Since device IDs are assigned sequentially, this algorithm
8127 	// does work good enough. It makes sure that the device list
8128 	// returned is sorted, and that no device is skipped when an
8129 	// already visited device got unmounted.
8130 
8131 	while (device < sNextMountID) {
8132 		mount = find_mount(device++);
8133 		if (mount != NULL && mount->volume->private_volume != NULL)
8134 			break;
8135 	}
8136 
8137 	*_cookie = device;
8138 
8139 	if (mount != NULL)
8140 		device = mount->id;
8141 	else
8142 		device = B_BAD_VALUE;
8143 
8144 	rw_lock_read_unlock(&sMountLock);
8145 
8146 	return device;
8147 }
8148 
8149 
8150 ssize_t
8151 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8152 	void *buffer, size_t readBytes)
8153 {
8154 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8155 	if (attrFD < 0)
8156 		return attrFD;
8157 
8158 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8159 
8160 	_kern_close(attrFD);
8161 
8162 	return bytesRead;
8163 }
8164 
8165 
8166 static status_t
8167 get_cwd(char* buffer, size_t size, bool kernel)
8168 {
8169 	// Get current working directory from io context
8170 	struct io_context* context = get_current_io_context(kernel);
8171 	status_t status;
8172 
8173 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8174 
8175 	mutex_lock(&context->io_mutex);
8176 
8177 	struct vnode* vnode = context->cwd;
8178 	if (vnode)
8179 		inc_vnode_ref_count(vnode);
8180 
8181 	mutex_unlock(&context->io_mutex);
8182 
8183 	if (vnode) {
8184 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8185 		put_vnode(vnode);
8186 	} else
8187 		status = B_ERROR;
8188 
8189 	return status;
8190 }
8191 
8192 
8193 static status_t
8194 set_cwd(int fd, char* path, bool kernel)
8195 {
8196 	struct io_context* context;
8197 	struct vnode* vnode = NULL;
8198 	struct vnode* oldDirectory;
8199 	status_t status;
8200 
8201 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8202 
8203 	// Get vnode for passed path, and bail if it failed
8204 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8205 	if (status < 0)
8206 		return status;
8207 
8208 	if (!S_ISDIR(vnode->Type())) {
8209 		// nope, can't cwd to here
8210 		status = B_NOT_A_DIRECTORY;
8211 		goto err;
8212 	}
8213 
8214 	// We need to have the permission to enter the directory, too
8215 	if (HAS_FS_CALL(vnode, access)) {
8216 		status = FS_CALL(vnode, access, X_OK);
8217 		if (status != B_OK)
8218 			goto err;
8219 	}
8220 
8221 	// Get current io context and lock
8222 	context = get_current_io_context(kernel);
8223 	mutex_lock(&context->io_mutex);
8224 
8225 	// save the old current working directory first
8226 	oldDirectory = context->cwd;
8227 	context->cwd = vnode;
8228 
8229 	mutex_unlock(&context->io_mutex);
8230 
8231 	if (oldDirectory)
8232 		put_vnode(oldDirectory);
8233 
8234 	return B_NO_ERROR;
8235 
8236 err:
8237 	put_vnode(vnode);
8238 	return status;
8239 }
8240 
8241 
8242 static status_t
8243 user_copy_name(char* to, const char* from, size_t length)
8244 {
8245 	ssize_t len = user_strlcpy(to, from, length);
8246 	if (len < 0)
8247 		return len;
8248 	if (len >= (ssize_t)length)
8249 		return B_NAME_TOO_LONG;
8250 	return B_OK;
8251 }
8252 
8253 
8254 //	#pragma mark - kernel mirrored syscalls
8255 
8256 
8257 dev_t
8258 _kern_mount(const char* path, const char* device, const char* fsName,
8259 	uint32 flags, const char* args, size_t argsLength)
8260 {
8261 	KPath pathBuffer(path);
8262 	if (pathBuffer.InitCheck() != B_OK)
8263 		return B_NO_MEMORY;
8264 
8265 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8266 }
8267 
8268 
8269 status_t
8270 _kern_unmount(const char* path, uint32 flags)
8271 {
8272 	KPath pathBuffer(path);
8273 	if (pathBuffer.InitCheck() != B_OK)
8274 		return B_NO_MEMORY;
8275 
8276 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8277 }
8278 
8279 
8280 status_t
8281 _kern_read_fs_info(dev_t device, struct fs_info* info)
8282 {
8283 	if (info == NULL)
8284 		return B_BAD_VALUE;
8285 
8286 	return fs_read_info(device, info);
8287 }
8288 
8289 
8290 status_t
8291 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8292 {
8293 	if (info == NULL)
8294 		return B_BAD_VALUE;
8295 
8296 	return fs_write_info(device, info, mask);
8297 }
8298 
8299 
8300 status_t
8301 _kern_sync(void)
8302 {
8303 	// Note: _kern_sync() is also called from _user_sync()
8304 	int32 cookie = 0;
8305 	dev_t device;
8306 	while ((device = next_dev(&cookie)) >= 0) {
8307 		status_t status = fs_sync(device);
8308 		if (status != B_OK && status != B_BAD_VALUE) {
8309 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8310 				strerror(status));
8311 		}
8312 	}
8313 
8314 	return B_OK;
8315 }
8316 
8317 
8318 dev_t
8319 _kern_next_device(int32* _cookie)
8320 {
8321 	return fs_next_device(_cookie);
8322 }
8323 
8324 
8325 status_t
8326 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8327 	size_t infoSize)
8328 {
8329 	if (infoSize != sizeof(fd_info))
8330 		return B_BAD_VALUE;
8331 
8332 	// get the team
8333 	Team* team = Team::Get(teamID);
8334 	if (team == NULL)
8335 		return B_BAD_TEAM_ID;
8336 	BReference<Team> teamReference(team, true);
8337 
8338 	// now that we have a team reference, its I/O context won't go away
8339 	io_context* context = team->io_context;
8340 	MutexLocker contextLocker(context->io_mutex);
8341 
8342 	uint32 slot = *_cookie;
8343 
8344 	struct file_descriptor* descriptor;
8345 	while (slot < context->table_size
8346 		&& (descriptor = context->fds[slot]) == NULL) {
8347 		slot++;
8348 	}
8349 
8350 	if (slot >= context->table_size)
8351 		return B_ENTRY_NOT_FOUND;
8352 
8353 	info->number = slot;
8354 	info->open_mode = descriptor->open_mode;
8355 
8356 	struct vnode* vnode = fd_vnode(descriptor);
8357 	if (vnode != NULL) {
8358 		info->device = vnode->device;
8359 		info->node = vnode->id;
8360 	} else if (descriptor->u.mount != NULL) {
8361 		info->device = descriptor->u.mount->id;
8362 		info->node = -1;
8363 	}
8364 
8365 	*_cookie = slot + 1;
8366 	return B_OK;
8367 }
8368 
8369 
8370 int
8371 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8372 	int perms)
8373 {
8374 	if ((openMode & O_CREAT) != 0) {
8375 		return file_create_entry_ref(device, inode, name, openMode, perms,
8376 			true);
8377 	}
8378 
8379 	return file_open_entry_ref(device, inode, name, openMode, true);
8380 }
8381 
8382 
8383 /*!	\brief Opens a node specified by a FD + path pair.
8384 
8385 	At least one of \a fd and \a path must be specified.
8386 	If only \a fd is given, the function opens the node identified by this
8387 	FD. If only a path is given, this path is opened. If both are given and
8388 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8389 	of the directory (!) identified by \a fd.
8390 
8391 	\param fd The FD. May be < 0.
8392 	\param path The absolute or relative path. May be \c NULL.
8393 	\param openMode The open mode.
8394 	\return A FD referring to the newly opened node, or an error code,
8395 			if an error occurs.
8396 */
8397 int
8398 _kern_open(int fd, const char* path, int openMode, int perms)
8399 {
8400 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8401 	if (pathBuffer.InitCheck() != B_OK)
8402 		return B_NO_MEMORY;
8403 
8404 	if ((openMode & O_CREAT) != 0)
8405 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8406 
8407 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8408 }
8409 
8410 
8411 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8412 
8413 	The supplied name may be \c NULL, in which case directory identified
8414 	by \a device and \a inode will be opened. Otherwise \a device and
8415 	\a inode identify the parent directory of the directory to be opened
8416 	and \a name its entry name.
8417 
8418 	\param device If \a name is specified the ID of the device the parent
8419 		   directory of the directory to be opened resides on, otherwise
8420 		   the device of the directory itself.
8421 	\param inode If \a name is specified the node ID of the parent
8422 		   directory of the directory to be opened, otherwise node ID of the
8423 		   directory itself.
8424 	\param name The entry name of the directory to be opened. If \c NULL,
8425 		   the \a device + \a inode pair identify the node to be opened.
8426 	\return The FD of the newly opened directory or an error code, if
8427 			something went wrong.
8428 */
8429 int
8430 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8431 {
8432 	return dir_open_entry_ref(device, inode, name, true);
8433 }
8434 
8435 
8436 /*!	\brief Opens a directory specified by a FD + path pair.
8437 
8438 	At least one of \a fd and \a path must be specified.
8439 	If only \a fd is given, the function opens the directory identified by this
8440 	FD. If only a path is given, this path is opened. If both are given and
8441 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8442 	of the directory (!) identified by \a fd.
8443 
8444 	\param fd The FD. May be < 0.
8445 	\param path The absolute or relative path. May be \c NULL.
8446 	\return A FD referring to the newly opened directory, or an error code,
8447 			if an error occurs.
8448 */
8449 int
8450 _kern_open_dir(int fd, const char* path)
8451 {
8452 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8453 	if (pathBuffer.InitCheck() != B_OK)
8454 		return B_NO_MEMORY;
8455 
8456 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8457 }
8458 
8459 
8460 status_t
8461 _kern_fcntl(int fd, int op, size_t argument)
8462 {
8463 	return common_fcntl(fd, op, argument, true);
8464 }
8465 
8466 
8467 status_t
8468 _kern_fsync(int fd)
8469 {
8470 	return common_sync(fd, true);
8471 }
8472 
8473 
8474 status_t
8475 _kern_lock_node(int fd)
8476 {
8477 	return common_lock_node(fd, true);
8478 }
8479 
8480 
8481 status_t
8482 _kern_unlock_node(int fd)
8483 {
8484 	return common_unlock_node(fd, true);
8485 }
8486 
8487 
8488 status_t
8489 _kern_preallocate(int fd, off_t offset, off_t length)
8490 {
8491 	return common_preallocate(fd, offset, length, true);
8492 }
8493 
8494 
8495 status_t
8496 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8497 	int perms)
8498 {
8499 	return dir_create_entry_ref(device, inode, name, perms, true);
8500 }
8501 
8502 
8503 /*!	\brief Creates a directory specified by a FD + path pair.
8504 
8505 	\a path must always be specified (it contains the name of the new directory
8506 	at least). If only a path is given, this path identifies the location at
8507 	which the directory shall be created. If both \a fd and \a path are given
8508 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8509 	of the directory (!) identified by \a fd.
8510 
8511 	\param fd The FD. May be < 0.
8512 	\param path The absolute or relative path. Must not be \c NULL.
8513 	\param perms The access permissions the new directory shall have.
8514 	\return \c B_OK, if the directory has been created successfully, another
8515 			error code otherwise.
8516 */
8517 status_t
8518 _kern_create_dir(int fd, const char* path, int perms)
8519 {
8520 	KPath pathBuffer(path, KPath::DEFAULT);
8521 	if (pathBuffer.InitCheck() != B_OK)
8522 		return B_NO_MEMORY;
8523 
8524 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8525 }
8526 
8527 
8528 status_t
8529 _kern_remove_dir(int fd, const char* path)
8530 {
8531 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8532 	if (pathBuffer.InitCheck() != B_OK)
8533 		return B_NO_MEMORY;
8534 
8535 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8536 }
8537 
8538 
8539 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8540 
8541 	At least one of \a fd and \a path must be specified.
8542 	If only \a fd is given, the function the symlink to be read is the node
8543 	identified by this FD. If only a path is given, this path identifies the
8544 	symlink to be read. If both are given and the path is absolute, \a fd is
8545 	ignored; a relative path is reckoned off of the directory (!) identified
8546 	by \a fd.
8547 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8548 	will still be updated to reflect the required buffer size.
8549 
8550 	\param fd The FD. May be < 0.
8551 	\param path The absolute or relative path. May be \c NULL.
8552 	\param buffer The buffer into which the contents of the symlink shall be
8553 		   written.
8554 	\param _bufferSize A pointer to the size of the supplied buffer.
8555 	\return The length of the link on success or an appropriate error code
8556 */
8557 status_t
8558 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8559 {
8560 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8561 	if (pathBuffer.InitCheck() != B_OK)
8562 		return B_NO_MEMORY;
8563 
8564 	return common_read_link(fd, pathBuffer.LockBuffer(),
8565 		buffer, _bufferSize, true);
8566 }
8567 
8568 
8569 /*!	\brief Creates a symlink specified by a FD + path pair.
8570 
8571 	\a path must always be specified (it contains the name of the new symlink
8572 	at least). If only a path is given, this path identifies the location at
8573 	which the symlink shall be created. If both \a fd and \a path are given and
8574 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8575 	of the directory (!) identified by \a fd.
8576 
8577 	\param fd The FD. May be < 0.
8578 	\param toPath The absolute or relative path. Must not be \c NULL.
8579 	\param mode The access permissions the new symlink shall have.
8580 	\return \c B_OK, if the symlink has been created successfully, another
8581 			error code otherwise.
8582 */
8583 status_t
8584 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8585 {
8586 	KPath pathBuffer(path);
8587 	if (pathBuffer.InitCheck() != B_OK)
8588 		return B_NO_MEMORY;
8589 
8590 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8591 		toPath, mode, true);
8592 }
8593 
8594 
8595 status_t
8596 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8597 	bool traverseLeafLink)
8598 {
8599 	KPath pathBuffer(path);
8600 	KPath toPathBuffer(toPath);
8601 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8602 		return B_NO_MEMORY;
8603 
8604 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8605 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8606 }
8607 
8608 
8609 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8610 
8611 	\a path must always be specified (it contains at least the name of the entry
8612 	to be deleted). If only a path is given, this path identifies the entry
8613 	directly. If both \a fd and \a path are given and the path is absolute,
8614 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8615 	identified by \a fd.
8616 
8617 	\param fd The FD. May be < 0.
8618 	\param path The absolute or relative path. Must not be \c NULL.
8619 	\return \c B_OK, if the entry has been removed successfully, another
8620 			error code otherwise.
8621 */
8622 status_t
8623 _kern_unlink(int fd, const char* path)
8624 {
8625 	KPath pathBuffer(path);
8626 	if (pathBuffer.InitCheck() != B_OK)
8627 		return B_NO_MEMORY;
8628 
8629 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8630 }
8631 
8632 
8633 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8634 		   by another FD + path pair.
8635 
8636 	\a oldPath and \a newPath must always be specified (they contain at least
8637 	the name of the entry). If only a path is given, this path identifies the
8638 	entry directly. If both a FD and a path are given and the path is absolute,
8639 	the FD is ignored; a relative path is reckoned off of the directory (!)
8640 	identified by the respective FD.
8641 
8642 	\param oldFD The FD of the old location. May be < 0.
8643 	\param oldPath The absolute or relative path of the old location. Must not
8644 		   be \c NULL.
8645 	\param newFD The FD of the new location. May be < 0.
8646 	\param newPath The absolute or relative path of the new location. Must not
8647 		   be \c NULL.
8648 	\return \c B_OK, if the entry has been moved successfully, another
8649 			error code otherwise.
8650 */
8651 status_t
8652 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8653 {
8654 	KPath oldPathBuffer(oldPath);
8655 	KPath newPathBuffer(newPath);
8656 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8657 		return B_NO_MEMORY;
8658 
8659 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8660 		newFD, newPathBuffer.LockBuffer(), true);
8661 }
8662 
8663 
8664 status_t
8665 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8666 {
8667 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8668 	if (pathBuffer.InitCheck() != B_OK)
8669 		return B_NO_MEMORY;
8670 
8671 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8672 		true);
8673 }
8674 
8675 
8676 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8677 
8678 	If only \a fd is given, the stat operation associated with the type
8679 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8680 	given, this path identifies the entry for whose node to retrieve the
8681 	stat data. If both \a fd and \a path are given and the path is absolute,
8682 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8683 	identified by \a fd and specifies the entry whose stat data shall be
8684 	retrieved.
8685 
8686 	\param fd The FD. May be < 0.
8687 	\param path The absolute or relative path. Must not be \c NULL.
8688 	\param traverseLeafLink If \a path is given, \c true specifies that the
8689 		   function shall not stick to symlinks, but traverse them.
8690 	\param stat The buffer the stat data shall be written into.
8691 	\param statSize The size of the supplied stat buffer.
8692 	\return \c B_OK, if the the stat data have been read successfully, another
8693 			error code otherwise.
8694 */
8695 status_t
8696 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8697 	struct stat* stat, size_t statSize)
8698 {
8699 	struct stat completeStat;
8700 	struct stat* originalStat = NULL;
8701 	status_t status;
8702 
8703 	if (statSize > sizeof(struct stat))
8704 		return B_BAD_VALUE;
8705 
8706 	// this supports different stat extensions
8707 	if (statSize < sizeof(struct stat)) {
8708 		originalStat = stat;
8709 		stat = &completeStat;
8710 	}
8711 
8712 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8713 
8714 	if (status == B_OK && originalStat != NULL)
8715 		memcpy(originalStat, stat, statSize);
8716 
8717 	return status;
8718 }
8719 
8720 
8721 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8722 
8723 	If only \a fd is given, the stat operation associated with the type
8724 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8725 	given, this path identifies the entry for whose node to write the
8726 	stat data. If both \a fd and \a path are given and the path is absolute,
8727 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8728 	identified by \a fd and specifies the entry whose stat data shall be
8729 	written.
8730 
8731 	\param fd The FD. May be < 0.
8732 	\param path The absolute or relative path. May be \c NULL.
8733 	\param traverseLeafLink If \a path is given, \c true specifies that the
8734 		   function shall not stick to symlinks, but traverse them.
8735 	\param stat The buffer containing the stat data to be written.
8736 	\param statSize The size of the supplied stat buffer.
8737 	\param statMask A mask specifying which parts of the stat data shall be
8738 		   written.
8739 	\return \c B_OK, if the the stat data have been written successfully,
8740 			another error code otherwise.
8741 */
8742 status_t
8743 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8744 	const struct stat* stat, size_t statSize, int statMask)
8745 {
8746 	struct stat completeStat;
8747 
8748 	if (statSize > sizeof(struct stat))
8749 		return B_BAD_VALUE;
8750 
8751 	// this supports different stat extensions
8752 	if (statSize < sizeof(struct stat)) {
8753 		memset((uint8*)&completeStat + statSize, 0,
8754 			sizeof(struct stat) - statSize);
8755 		memcpy(&completeStat, stat, statSize);
8756 		stat = &completeStat;
8757 	}
8758 
8759 	status_t status;
8760 
8761 	if (path != NULL) {
8762 		// path given: write the stat of the node referred to by (fd, path)
8763 		KPath pathBuffer(path);
8764 		if (pathBuffer.InitCheck() != B_OK)
8765 			return B_NO_MEMORY;
8766 
8767 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8768 			traverseLeafLink, stat, statMask, true);
8769 	} else {
8770 		// no path given: get the FD and use the FD operation
8771 		struct file_descriptor* descriptor
8772 			= get_fd(get_current_io_context(true), fd);
8773 		if (descriptor == NULL)
8774 			return B_FILE_ERROR;
8775 
8776 		if (descriptor->ops->fd_write_stat)
8777 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8778 		else
8779 			status = B_UNSUPPORTED;
8780 
8781 		put_fd(descriptor);
8782 	}
8783 
8784 	return status;
8785 }
8786 
8787 
8788 int
8789 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8790 {
8791 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8792 	if (pathBuffer.InitCheck() != B_OK)
8793 		return B_NO_MEMORY;
8794 
8795 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8796 }
8797 
8798 
8799 int
8800 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8801 	int openMode)
8802 {
8803 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8804 	if (pathBuffer.InitCheck() != B_OK)
8805 		return B_NO_MEMORY;
8806 
8807 	if ((openMode & O_CREAT) != 0) {
8808 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8809 			true);
8810 	}
8811 
8812 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8813 }
8814 
8815 
8816 status_t
8817 _kern_remove_attr(int fd, const char* name)
8818 {
8819 	return attr_remove(fd, name, true);
8820 }
8821 
8822 
8823 status_t
8824 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8825 	const char* toName)
8826 {
8827 	return attr_rename(fromFile, fromName, toFile, toName, true);
8828 }
8829 
8830 
8831 int
8832 _kern_open_index_dir(dev_t device)
8833 {
8834 	return index_dir_open(device, true);
8835 }
8836 
8837 
8838 status_t
8839 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8840 {
8841 	return index_create(device, name, type, flags, true);
8842 }
8843 
8844 
8845 status_t
8846 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8847 {
8848 	return index_name_read_stat(device, name, stat, true);
8849 }
8850 
8851 
8852 status_t
8853 _kern_remove_index(dev_t device, const char* name)
8854 {
8855 	return index_remove(device, name, true);
8856 }
8857 
8858 
8859 status_t
8860 _kern_getcwd(char* buffer, size_t size)
8861 {
8862 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8863 
8864 	// Call vfs to get current working directory
8865 	return get_cwd(buffer, size, true);
8866 }
8867 
8868 
8869 status_t
8870 _kern_setcwd(int fd, const char* path)
8871 {
8872 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8873 	if (pathBuffer.InitCheck() != B_OK)
8874 		return B_NO_MEMORY;
8875 
8876 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8877 }
8878 
8879 
8880 //	#pragma mark - userland syscalls
8881 
8882 
8883 dev_t
8884 _user_mount(const char* userPath, const char* userDevice,
8885 	const char* userFileSystem, uint32 flags, const char* userArgs,
8886 	size_t argsLength)
8887 {
8888 	char fileSystem[B_FILE_NAME_LENGTH];
8889 	KPath path, device;
8890 	char* args = NULL;
8891 	status_t status;
8892 
8893 	if (!IS_USER_ADDRESS(userPath))
8894 		return B_BAD_ADDRESS;
8895 
8896 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8897 		return B_NO_MEMORY;
8898 
8899 	status = user_copy_name(path.LockBuffer(), userPath,
8900 		B_PATH_NAME_LENGTH);
8901 	if (status != B_OK)
8902 		return status;
8903 	path.UnlockBuffer();
8904 
8905 	if (userFileSystem != NULL) {
8906 		if (!IS_USER_ADDRESS(userFileSystem))
8907 			return B_BAD_ADDRESS;
8908 
8909 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8910 		if (status != B_OK)
8911 			return status;
8912 	}
8913 
8914 	if (userDevice != NULL) {
8915 		if (!IS_USER_ADDRESS(userDevice))
8916 			return B_BAD_ADDRESS;
8917 
8918 		status = user_copy_name(device.LockBuffer(), userDevice,
8919 			B_PATH_NAME_LENGTH);
8920 		if (status != B_OK)
8921 			return status;
8922 		device.UnlockBuffer();
8923 	}
8924 
8925 	if (userArgs != NULL && argsLength > 0) {
8926 		if (!IS_USER_ADDRESS(userArgs))
8927 			return B_BAD_ADDRESS;
8928 
8929 		// this is a safety restriction
8930 		if (argsLength >= 65536)
8931 			return B_NAME_TOO_LONG;
8932 
8933 		args = (char*)malloc(argsLength + 1);
8934 		if (args == NULL)
8935 			return B_NO_MEMORY;
8936 
8937 		status = user_copy_name(args, userArgs, argsLength + 1);
8938 		if (status != B_OK) {
8939 			free(args);
8940 			return status;
8941 		}
8942 	}
8943 
8944 	status = fs_mount(path.LockBuffer(),
8945 		userDevice != NULL ? device.Path() : NULL,
8946 		userFileSystem ? fileSystem : NULL, flags, args, false);
8947 
8948 	free(args);
8949 	return status;
8950 }
8951 
8952 
8953 status_t
8954 _user_unmount(const char* userPath, uint32 flags)
8955 {
8956 	if (!IS_USER_ADDRESS(userPath))
8957 		return B_BAD_ADDRESS;
8958 
8959 	KPath pathBuffer;
8960 	if (pathBuffer.InitCheck() != B_OK)
8961 		return B_NO_MEMORY;
8962 
8963 	char* path = pathBuffer.LockBuffer();
8964 
8965 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8966 	if (status != B_OK)
8967 		return status;
8968 
8969 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8970 }
8971 
8972 
8973 status_t
8974 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8975 {
8976 	struct fs_info info;
8977 	status_t status;
8978 
8979 	if (userInfo == NULL)
8980 		return B_BAD_VALUE;
8981 
8982 	if (!IS_USER_ADDRESS(userInfo))
8983 		return B_BAD_ADDRESS;
8984 
8985 	status = fs_read_info(device, &info);
8986 	if (status != B_OK)
8987 		return status;
8988 
8989 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8990 		return B_BAD_ADDRESS;
8991 
8992 	return B_OK;
8993 }
8994 
8995 
8996 status_t
8997 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8998 {
8999 	struct fs_info info;
9000 
9001 	if (userInfo == NULL)
9002 		return B_BAD_VALUE;
9003 
9004 	if (!IS_USER_ADDRESS(userInfo)
9005 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
9006 		return B_BAD_ADDRESS;
9007 
9008 	return fs_write_info(device, &info, mask);
9009 }
9010 
9011 
9012 dev_t
9013 _user_next_device(int32* _userCookie)
9014 {
9015 	int32 cookie;
9016 	dev_t device;
9017 
9018 	if (!IS_USER_ADDRESS(_userCookie)
9019 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
9020 		return B_BAD_ADDRESS;
9021 
9022 	device = fs_next_device(&cookie);
9023 
9024 	if (device >= B_OK) {
9025 		// update user cookie
9026 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
9027 			return B_BAD_ADDRESS;
9028 	}
9029 
9030 	return device;
9031 }
9032 
9033 
9034 status_t
9035 _user_sync(void)
9036 {
9037 	return _kern_sync();
9038 }
9039 
9040 
9041 status_t
9042 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
9043 	size_t infoSize)
9044 {
9045 	struct fd_info info;
9046 	uint32 cookie;
9047 
9048 	// only root can do this
9049 	if (geteuid() != 0)
9050 		return B_NOT_ALLOWED;
9051 
9052 	if (infoSize != sizeof(fd_info))
9053 		return B_BAD_VALUE;
9054 
9055 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9056 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9057 		return B_BAD_ADDRESS;
9058 
9059 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9060 	if (status != B_OK)
9061 		return status;
9062 
9063 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9064 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9065 		return B_BAD_ADDRESS;
9066 
9067 	return status;
9068 }
9069 
9070 
9071 status_t
9072 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9073 	char* userPath, size_t pathLength)
9074 {
9075 	if (!IS_USER_ADDRESS(userPath))
9076 		return B_BAD_ADDRESS;
9077 
9078 	KPath path;
9079 	if (path.InitCheck() != B_OK)
9080 		return B_NO_MEMORY;
9081 
9082 	// copy the leaf name onto the stack
9083 	char stackLeaf[B_FILE_NAME_LENGTH];
9084 	if (leaf != NULL) {
9085 		if (!IS_USER_ADDRESS(leaf))
9086 			return B_BAD_ADDRESS;
9087 
9088 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9089 		if (status != B_OK)
9090 			return status;
9091 
9092 		leaf = stackLeaf;
9093 	}
9094 
9095 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9096 		false, path.LockBuffer(), path.BufferSize());
9097 	if (status != B_OK)
9098 		return status;
9099 
9100 	path.UnlockBuffer();
9101 
9102 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9103 	if (length < 0)
9104 		return length;
9105 	if (length >= (int)pathLength)
9106 		return B_BUFFER_OVERFLOW;
9107 
9108 	return B_OK;
9109 }
9110 
9111 
9112 status_t
9113 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9114 {
9115 	if (userPath == NULL || buffer == NULL)
9116 		return B_BAD_VALUE;
9117 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9118 		return B_BAD_ADDRESS;
9119 
9120 	// copy path from userland
9121 	KPath pathBuffer;
9122 	if (pathBuffer.InitCheck() != B_OK)
9123 		return B_NO_MEMORY;
9124 	char* path = pathBuffer.LockBuffer();
9125 
9126 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9127 	if (status != B_OK)
9128 		return status;
9129 
9130 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9131 		false);
9132 	if (error != B_OK)
9133 		return error;
9134 
9135 	// copy back to userland
9136 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9137 	if (len < 0)
9138 		return len;
9139 	if (len >= B_PATH_NAME_LENGTH)
9140 		return B_BUFFER_OVERFLOW;
9141 
9142 	return B_OK;
9143 }
9144 
9145 
9146 int
9147 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9148 	int openMode, int perms)
9149 {
9150 	char name[B_FILE_NAME_LENGTH];
9151 
9152 	if (userName == NULL || device < 0 || inode < 0)
9153 		return B_BAD_VALUE;
9154 	if (!IS_USER_ADDRESS(userName))
9155 		return B_BAD_ADDRESS;
9156 	status_t status = user_copy_name(name, userName, sizeof(name));
9157 	if (status != B_OK)
9158 		return status;
9159 
9160 	if ((openMode & O_CREAT) != 0) {
9161 		return file_create_entry_ref(device, inode, name, openMode, perms,
9162 			false);
9163 	}
9164 
9165 	return file_open_entry_ref(device, inode, name, openMode, false);
9166 }
9167 
9168 
9169 int
9170 _user_open(int fd, const char* userPath, int openMode, int perms)
9171 {
9172 	KPath path;
9173 	if (path.InitCheck() != B_OK)
9174 		return B_NO_MEMORY;
9175 
9176 	char* buffer = path.LockBuffer();
9177 
9178 	if (!IS_USER_ADDRESS(userPath))
9179 		return B_BAD_ADDRESS;
9180 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9181 	if (status != B_OK)
9182 		return status;
9183 
9184 	if ((openMode & O_CREAT) != 0)
9185 		return file_create(fd, buffer, openMode, perms, false);
9186 
9187 	return file_open(fd, buffer, openMode, false);
9188 }
9189 
9190 
9191 int
9192 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9193 {
9194 	if (userName != NULL) {
9195 		char name[B_FILE_NAME_LENGTH];
9196 
9197 		if (!IS_USER_ADDRESS(userName))
9198 			return B_BAD_ADDRESS;
9199 		status_t status = user_copy_name(name, userName, sizeof(name));
9200 		if (status != B_OK)
9201 			return status;
9202 
9203 		return dir_open_entry_ref(device, inode, name, false);
9204 	}
9205 	return dir_open_entry_ref(device, inode, NULL, false);
9206 }
9207 
9208 
9209 int
9210 _user_open_dir(int fd, const char* userPath)
9211 {
9212 	if (userPath == NULL)
9213 		return dir_open(fd, NULL, false);
9214 
9215 	KPath path;
9216 	if (path.InitCheck() != B_OK)
9217 		return B_NO_MEMORY;
9218 
9219 	char* buffer = path.LockBuffer();
9220 
9221 	if (!IS_USER_ADDRESS(userPath))
9222 		return B_BAD_ADDRESS;
9223 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9224 	if (status != B_OK)
9225 		return status;
9226 
9227 	return dir_open(fd, buffer, false);
9228 }
9229 
9230 
9231 /*!	\brief Opens a directory's parent directory and returns the entry name
9232 		   of the former.
9233 
9234 	Aside from that it returns the directory's entry name, this method is
9235 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9236 	equivalent, if \a userName is \c NULL.
9237 
9238 	If a name buffer is supplied and the name does not fit the buffer, the
9239 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9240 
9241 	\param fd A FD referring to a directory.
9242 	\param userName Buffer the directory's entry name shall be written into.
9243 		   May be \c NULL.
9244 	\param nameLength Size of the name buffer.
9245 	\return The file descriptor of the opened parent directory, if everything
9246 			went fine, an error code otherwise.
9247 */
9248 int
9249 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9250 {
9251 	bool kernel = false;
9252 
9253 	if (userName && !IS_USER_ADDRESS(userName))
9254 		return B_BAD_ADDRESS;
9255 
9256 	// open the parent dir
9257 	int parentFD = dir_open(fd, (char*)"..", kernel);
9258 	if (parentFD < 0)
9259 		return parentFD;
9260 	FDCloser fdCloser(parentFD, kernel);
9261 
9262 	if (userName) {
9263 		// get the vnodes
9264 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9265 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9266 		VNodePutter parentVNodePutter(parentVNode);
9267 		VNodePutter dirVNodePutter(dirVNode);
9268 		if (!parentVNode || !dirVNode)
9269 			return B_FILE_ERROR;
9270 
9271 		// get the vnode name
9272 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9273 		struct dirent* buffer = (struct dirent*)_buffer;
9274 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9275 			sizeof(_buffer), get_current_io_context(false));
9276 		if (status != B_OK)
9277 			return status;
9278 
9279 		// copy the name to the userland buffer
9280 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9281 		if (len < 0)
9282 			return len;
9283 		if (len >= (int)nameLength)
9284 			return B_BUFFER_OVERFLOW;
9285 	}
9286 
9287 	return fdCloser.Detach();
9288 }
9289 
9290 
9291 status_t
9292 _user_fcntl(int fd, int op, size_t argument)
9293 {
9294 	status_t status = common_fcntl(fd, op, argument, false);
9295 	if (op == F_SETLKW)
9296 		syscall_restart_handle_post(status);
9297 
9298 	return status;
9299 }
9300 
9301 
9302 status_t
9303 _user_fsync(int fd)
9304 {
9305 	return common_sync(fd, false);
9306 }
9307 
9308 
9309 status_t
9310 _user_flock(int fd, int operation)
9311 {
9312 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9313 
9314 	// Check if the operation is valid
9315 	switch (operation & ~LOCK_NB) {
9316 		case LOCK_UN:
9317 		case LOCK_SH:
9318 		case LOCK_EX:
9319 			break;
9320 
9321 		default:
9322 			return B_BAD_VALUE;
9323 	}
9324 
9325 	struct file_descriptor* descriptor;
9326 	struct vnode* vnode;
9327 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9328 	if (descriptor == NULL)
9329 		return B_FILE_ERROR;
9330 
9331 	if (descriptor->type != FDTYPE_FILE) {
9332 		put_fd(descriptor);
9333 		return B_BAD_VALUE;
9334 	}
9335 
9336 	struct flock flock;
9337 	flock.l_start = 0;
9338 	flock.l_len = OFF_MAX;
9339 	flock.l_whence = 0;
9340 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9341 
9342 	status_t status;
9343 	if ((operation & LOCK_UN) != 0) {
9344 		if (HAS_FS_CALL(vnode, release_lock))
9345 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9346 		else
9347 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9348 	} else {
9349 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9350 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9351 				(operation & LOCK_NB) == 0);
9352 		} else {
9353 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9354 				(operation & LOCK_NB) == 0);
9355 		}
9356 	}
9357 
9358 	syscall_restart_handle_post(status);
9359 
9360 	put_fd(descriptor);
9361 	return status;
9362 }
9363 
9364 
9365 status_t
9366 _user_lock_node(int fd)
9367 {
9368 	return common_lock_node(fd, false);
9369 }
9370 
9371 
9372 status_t
9373 _user_unlock_node(int fd)
9374 {
9375 	return common_unlock_node(fd, false);
9376 }
9377 
9378 
9379 status_t
9380 _user_preallocate(int fd, off_t offset, off_t length)
9381 {
9382 	return common_preallocate(fd, offset, length, false);
9383 }
9384 
9385 
9386 status_t
9387 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9388 	int perms)
9389 {
9390 	char name[B_FILE_NAME_LENGTH];
9391 	status_t status;
9392 
9393 	if (!IS_USER_ADDRESS(userName))
9394 		return B_BAD_ADDRESS;
9395 
9396 	status = user_copy_name(name, userName, sizeof(name));
9397 	if (status != B_OK)
9398 		return status;
9399 
9400 	return dir_create_entry_ref(device, inode, name, perms, false);
9401 }
9402 
9403 
9404 status_t
9405 _user_create_dir(int fd, const char* userPath, int perms)
9406 {
9407 	KPath pathBuffer;
9408 	if (pathBuffer.InitCheck() != B_OK)
9409 		return B_NO_MEMORY;
9410 
9411 	char* path = pathBuffer.LockBuffer();
9412 
9413 	if (!IS_USER_ADDRESS(userPath))
9414 		return B_BAD_ADDRESS;
9415 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9416 	if (status != B_OK)
9417 		return status;
9418 
9419 	return dir_create(fd, path, perms, false);
9420 }
9421 
9422 
9423 status_t
9424 _user_remove_dir(int fd, const char* userPath)
9425 {
9426 	KPath pathBuffer;
9427 	if (pathBuffer.InitCheck() != B_OK)
9428 		return B_NO_MEMORY;
9429 
9430 	char* path = pathBuffer.LockBuffer();
9431 
9432 	if (userPath != NULL) {
9433 		if (!IS_USER_ADDRESS(userPath))
9434 			return B_BAD_ADDRESS;
9435 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9436 		if (status != B_OK)
9437 			return status;
9438 	}
9439 
9440 	return dir_remove(fd, userPath ? path : NULL, false);
9441 }
9442 
9443 
9444 status_t
9445 _user_read_link(int fd, const char* userPath, char* userBuffer,
9446 	size_t* userBufferSize)
9447 {
9448 	KPath pathBuffer, linkBuffer;
9449 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9450 		return B_NO_MEMORY;
9451 
9452 	size_t bufferSize;
9453 
9454 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9455 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9456 		return B_BAD_ADDRESS;
9457 
9458 	char* path = pathBuffer.LockBuffer();
9459 	char* buffer = linkBuffer.LockBuffer();
9460 
9461 	if (userPath) {
9462 		if (!IS_USER_ADDRESS(userPath))
9463 			return B_BAD_ADDRESS;
9464 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9465 		if (status != B_OK)
9466 			return status;
9467 
9468 		if (bufferSize > B_PATH_NAME_LENGTH)
9469 			bufferSize = B_PATH_NAME_LENGTH;
9470 	}
9471 
9472 	size_t newBufferSize = bufferSize;
9473 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9474 		&newBufferSize, false);
9475 
9476 	// we also update the bufferSize in case of errors
9477 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9478 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9479 		return B_BAD_ADDRESS;
9480 
9481 	if (status != B_OK)
9482 		return status;
9483 
9484 	bufferSize = min_c(newBufferSize, bufferSize);
9485 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9486 		return B_BAD_ADDRESS;
9487 
9488 	return B_OK;
9489 }
9490 
9491 
9492 status_t
9493 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9494 	int mode)
9495 {
9496 	KPath pathBuffer;
9497 	KPath toPathBuffer;
9498 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9499 		return B_NO_MEMORY;
9500 
9501 	char* path = pathBuffer.LockBuffer();
9502 	char* toPath = toPathBuffer.LockBuffer();
9503 
9504 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9505 		return B_BAD_ADDRESS;
9506 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9507 	if (status != B_OK)
9508 		return status;
9509 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9510 	if (status != B_OK)
9511 		return status;
9512 
9513 	return common_create_symlink(fd, path, toPath, mode, false);
9514 }
9515 
9516 
9517 status_t
9518 _user_create_link(int pathFD, const char* userPath, int toFD,
9519 	const char* userToPath, bool traverseLeafLink)
9520 {
9521 	KPath pathBuffer;
9522 	KPath toPathBuffer;
9523 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9524 		return B_NO_MEMORY;
9525 
9526 	char* path = pathBuffer.LockBuffer();
9527 	char* toPath = toPathBuffer.LockBuffer();
9528 
9529 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9530 		return B_BAD_ADDRESS;
9531 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9532 	if (status != B_OK)
9533 		return status;
9534 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9535 	if (status != B_OK)
9536 		return status;
9537 
9538 	status = check_path(toPath);
9539 	if (status != B_OK)
9540 		return status;
9541 
9542 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9543 		false);
9544 }
9545 
9546 
9547 status_t
9548 _user_unlink(int fd, const char* userPath)
9549 {
9550 	KPath pathBuffer;
9551 	if (pathBuffer.InitCheck() != B_OK)
9552 		return B_NO_MEMORY;
9553 
9554 	char* path = pathBuffer.LockBuffer();
9555 
9556 	if (!IS_USER_ADDRESS(userPath))
9557 		return B_BAD_ADDRESS;
9558 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9559 	if (status != B_OK)
9560 		return status;
9561 
9562 	return common_unlink(fd, path, false);
9563 }
9564 
9565 
9566 status_t
9567 _user_rename(int oldFD, const char* userOldPath, int newFD,
9568 	const char* userNewPath)
9569 {
9570 	KPath oldPathBuffer;
9571 	KPath newPathBuffer;
9572 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9573 		return B_NO_MEMORY;
9574 
9575 	char* oldPath = oldPathBuffer.LockBuffer();
9576 	char* newPath = newPathBuffer.LockBuffer();
9577 
9578 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9579 		return B_BAD_ADDRESS;
9580 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9581 	if (status != B_OK)
9582 		return status;
9583 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9584 	if (status != B_OK)
9585 		return status;
9586 
9587 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9588 }
9589 
9590 
9591 status_t
9592 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9593 {
9594 	KPath pathBuffer;
9595 	if (pathBuffer.InitCheck() != B_OK)
9596 		return B_NO_MEMORY;
9597 
9598 	char* path = pathBuffer.LockBuffer();
9599 
9600 	if (!IS_USER_ADDRESS(userPath))
9601 		return B_BAD_ADDRESS;
9602 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9603 	if (status != B_OK)
9604 		return status;
9605 
9606 	// split into directory vnode and filename path
9607 	char filename[B_FILE_NAME_LENGTH];
9608 	struct vnode* dir;
9609 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9610 	if (status != B_OK)
9611 		return status;
9612 
9613 	VNodePutter _(dir);
9614 
9615 	// the underlying FS needs to support creating FIFOs
9616 	if (!HAS_FS_CALL(dir, create_special_node))
9617 		return B_UNSUPPORTED;
9618 
9619 	// create the entry	-- the FIFO sub node is set up automatically
9620 	fs_vnode superVnode;
9621 	ino_t nodeID;
9622 	status = FS_CALL(dir, create_special_node, filename, NULL,
9623 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9624 
9625 	// create_special_node() acquired a reference for us that we don't need.
9626 	if (status == B_OK)
9627 		put_vnode(dir->mount->volume, nodeID);
9628 
9629 	return status;
9630 }
9631 
9632 
9633 status_t
9634 _user_create_pipe(int* userFDs)
9635 {
9636 	// rootfs should support creating FIFOs, but let's be sure
9637 	if (!HAS_FS_CALL(sRoot, create_special_node))
9638 		return B_UNSUPPORTED;
9639 
9640 	// create the node	-- the FIFO sub node is set up automatically
9641 	fs_vnode superVnode;
9642 	ino_t nodeID;
9643 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9644 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9645 	if (status != B_OK)
9646 		return status;
9647 
9648 	// We've got one reference to the node and need another one.
9649 	struct vnode* vnode;
9650 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9651 	if (status != B_OK) {
9652 		// that should not happen
9653 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9654 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9655 		return status;
9656 	}
9657 
9658 	// Everything looks good so far. Open two FDs for reading respectively
9659 	// writing.
9660 	int fds[2];
9661 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9662 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9663 
9664 	FDCloser closer0(fds[0], false);
9665 	FDCloser closer1(fds[1], false);
9666 
9667 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9668 
9669 	// copy FDs to userland
9670 	if (status == B_OK) {
9671 		if (!IS_USER_ADDRESS(userFDs)
9672 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9673 			status = B_BAD_ADDRESS;
9674 		}
9675 	}
9676 
9677 	// keep FDs, if everything went fine
9678 	if (status == B_OK) {
9679 		closer0.Detach();
9680 		closer1.Detach();
9681 	}
9682 
9683 	return status;
9684 }
9685 
9686 
9687 status_t
9688 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9689 {
9690 	KPath pathBuffer;
9691 	if (pathBuffer.InitCheck() != B_OK)
9692 		return B_NO_MEMORY;
9693 
9694 	char* path = pathBuffer.LockBuffer();
9695 
9696 	if (!IS_USER_ADDRESS(userPath))
9697 		return B_BAD_ADDRESS;
9698 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9699 	if (status != B_OK)
9700 		return status;
9701 
9702 	return common_access(fd, path, mode, effectiveUserGroup, false);
9703 }
9704 
9705 
9706 status_t
9707 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9708 	struct stat* userStat, size_t statSize)
9709 {
9710 	struct stat stat = {0};
9711 	status_t status;
9712 
9713 	if (statSize > sizeof(struct stat))
9714 		return B_BAD_VALUE;
9715 
9716 	if (!IS_USER_ADDRESS(userStat))
9717 		return B_BAD_ADDRESS;
9718 
9719 	if (userPath != NULL) {
9720 		// path given: get the stat of the node referred to by (fd, path)
9721 		if (!IS_USER_ADDRESS(userPath))
9722 			return B_BAD_ADDRESS;
9723 
9724 		KPath pathBuffer;
9725 		if (pathBuffer.InitCheck() != B_OK)
9726 			return B_NO_MEMORY;
9727 
9728 		char* path = pathBuffer.LockBuffer();
9729 
9730 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9731 		if (status != B_OK)
9732 			return status;
9733 
9734 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9735 	} else {
9736 		// no path given: get the FD and use the FD operation
9737 		struct file_descriptor* descriptor
9738 			= get_fd(get_current_io_context(false), fd);
9739 		if (descriptor == NULL)
9740 			return B_FILE_ERROR;
9741 
9742 		if (descriptor->ops->fd_read_stat)
9743 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9744 		else
9745 			status = B_UNSUPPORTED;
9746 
9747 		put_fd(descriptor);
9748 	}
9749 
9750 	if (status != B_OK)
9751 		return status;
9752 
9753 	return user_memcpy(userStat, &stat, statSize);
9754 }
9755 
9756 
9757 status_t
9758 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9759 	const struct stat* userStat, size_t statSize, int statMask)
9760 {
9761 	if (statSize > sizeof(struct stat))
9762 		return B_BAD_VALUE;
9763 
9764 	struct stat stat;
9765 
9766 	if (!IS_USER_ADDRESS(userStat)
9767 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9768 		return B_BAD_ADDRESS;
9769 
9770 	// clear additional stat fields
9771 	if (statSize < sizeof(struct stat))
9772 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9773 
9774 	status_t status;
9775 
9776 	if (userPath != NULL) {
9777 		// path given: write the stat of the node referred to by (fd, path)
9778 		if (!IS_USER_ADDRESS(userPath))
9779 			return B_BAD_ADDRESS;
9780 
9781 		KPath pathBuffer;
9782 		if (pathBuffer.InitCheck() != B_OK)
9783 			return B_NO_MEMORY;
9784 
9785 		char* path = pathBuffer.LockBuffer();
9786 
9787 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9788 		if (status != B_OK)
9789 			return status;
9790 
9791 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9792 			statMask, false);
9793 	} else {
9794 		// no path given: get the FD and use the FD operation
9795 		struct file_descriptor* descriptor
9796 			= get_fd(get_current_io_context(false), fd);
9797 		if (descriptor == NULL)
9798 			return B_FILE_ERROR;
9799 
9800 		if (descriptor->ops->fd_write_stat) {
9801 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9802 				statMask);
9803 		} else
9804 			status = B_UNSUPPORTED;
9805 
9806 		put_fd(descriptor);
9807 	}
9808 
9809 	return status;
9810 }
9811 
9812 
9813 int
9814 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9815 {
9816 	KPath pathBuffer;
9817 	if (pathBuffer.InitCheck() != B_OK)
9818 		return B_NO_MEMORY;
9819 
9820 	char* path = pathBuffer.LockBuffer();
9821 
9822 	if (userPath != NULL) {
9823 		if (!IS_USER_ADDRESS(userPath))
9824 			return B_BAD_ADDRESS;
9825 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9826 		if (status != B_OK)
9827 			return status;
9828 	}
9829 
9830 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9831 }
9832 
9833 
9834 ssize_t
9835 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9836 	size_t readBytes)
9837 {
9838 	char attribute[B_FILE_NAME_LENGTH];
9839 
9840 	if (userAttribute == NULL)
9841 		return B_BAD_VALUE;
9842 	if (!IS_USER_ADDRESS(userAttribute))
9843 		return B_BAD_ADDRESS;
9844 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9845 	if (status != B_OK)
9846 		return status;
9847 
9848 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9849 	if (attr < 0)
9850 		return attr;
9851 
9852 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9853 	_user_close(attr);
9854 
9855 	return bytes;
9856 }
9857 
9858 
9859 ssize_t
9860 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9861 	const void* buffer, size_t writeBytes)
9862 {
9863 	char attribute[B_FILE_NAME_LENGTH];
9864 
9865 	if (userAttribute == NULL)
9866 		return B_BAD_VALUE;
9867 	if (!IS_USER_ADDRESS(userAttribute))
9868 		return B_BAD_ADDRESS;
9869 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9870 	if (status != B_OK)
9871 		return status;
9872 
9873 	// Try to support the BeOS typical truncation as well as the position
9874 	// argument
9875 	int attr = attr_create(fd, NULL, attribute, type,
9876 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9877 	if (attr < 0)
9878 		return attr;
9879 
9880 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9881 	_user_close(attr);
9882 
9883 	return bytes;
9884 }
9885 
9886 
9887 status_t
9888 _user_stat_attr(int fd, const char* userAttribute,
9889 	struct attr_info* userAttrInfo)
9890 {
9891 	char attribute[B_FILE_NAME_LENGTH];
9892 
9893 	if (userAttribute == NULL || userAttrInfo == NULL)
9894 		return B_BAD_VALUE;
9895 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9896 		return B_BAD_ADDRESS;
9897 	status_t status = user_copy_name(attribute, userAttribute,
9898 		sizeof(attribute));
9899 	if (status != B_OK)
9900 		return status;
9901 
9902 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9903 	if (attr < 0)
9904 		return attr;
9905 
9906 	struct file_descriptor* descriptor
9907 		= get_fd(get_current_io_context(false), attr);
9908 	if (descriptor == NULL) {
9909 		_user_close(attr);
9910 		return B_FILE_ERROR;
9911 	}
9912 
9913 	struct stat stat;
9914 	if (descriptor->ops->fd_read_stat)
9915 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9916 	else
9917 		status = B_UNSUPPORTED;
9918 
9919 	put_fd(descriptor);
9920 	_user_close(attr);
9921 
9922 	if (status == B_OK) {
9923 		attr_info info;
9924 		info.type = stat.st_type;
9925 		info.size = stat.st_size;
9926 
9927 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9928 			return B_BAD_ADDRESS;
9929 	}
9930 
9931 	return status;
9932 }
9933 
9934 
9935 int
9936 _user_open_attr(int fd, const char* userPath, const char* userName,
9937 	uint32 type, int openMode)
9938 {
9939 	char name[B_FILE_NAME_LENGTH];
9940 
9941 	if (!IS_USER_ADDRESS(userName))
9942 		return B_BAD_ADDRESS;
9943 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9944 	if (status != B_OK)
9945 		return status;
9946 
9947 	KPath pathBuffer;
9948 	if (pathBuffer.InitCheck() != B_OK)
9949 		return B_NO_MEMORY;
9950 
9951 	char* path = pathBuffer.LockBuffer();
9952 
9953 	if (userPath != NULL) {
9954 		if (!IS_USER_ADDRESS(userPath))
9955 			return B_BAD_ADDRESS;
9956 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9957 		if (status != B_OK)
9958 			return status;
9959 	}
9960 
9961 	if ((openMode & O_CREAT) != 0) {
9962 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9963 			false);
9964 	}
9965 
9966 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9967 }
9968 
9969 
9970 status_t
9971 _user_remove_attr(int fd, const char* userName)
9972 {
9973 	char name[B_FILE_NAME_LENGTH];
9974 
9975 	if (!IS_USER_ADDRESS(userName))
9976 		return B_BAD_ADDRESS;
9977 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9978 	if (status != B_OK)
9979 		return status;
9980 
9981 	return attr_remove(fd, name, false);
9982 }
9983 
9984 
9985 status_t
9986 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9987 	const char* userToName)
9988 {
9989 	if (!IS_USER_ADDRESS(userFromName)
9990 		|| !IS_USER_ADDRESS(userToName))
9991 		return B_BAD_ADDRESS;
9992 
9993 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9994 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9995 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9996 		return B_NO_MEMORY;
9997 
9998 	char* fromName = fromNameBuffer.LockBuffer();
9999 	char* toName = toNameBuffer.LockBuffer();
10000 
10001 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
10002 	if (status != B_OK)
10003 		return status;
10004 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
10005 	if (status != B_OK)
10006 		return status;
10007 
10008 	return attr_rename(fromFile, fromName, toFile, toName, false);
10009 }
10010 
10011 
10012 int
10013 _user_open_index_dir(dev_t device)
10014 {
10015 	return index_dir_open(device, false);
10016 }
10017 
10018 
10019 status_t
10020 _user_create_index(dev_t device, const char* userName, uint32 type,
10021 	uint32 flags)
10022 {
10023 	char name[B_FILE_NAME_LENGTH];
10024 
10025 	if (!IS_USER_ADDRESS(userName))
10026 		return B_BAD_ADDRESS;
10027 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10028 	if (status != B_OK)
10029 		return status;
10030 
10031 	return index_create(device, name, type, flags, false);
10032 }
10033 
10034 
10035 status_t
10036 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
10037 {
10038 	char name[B_FILE_NAME_LENGTH];
10039 	struct stat stat = {0};
10040 	status_t status;
10041 
10042 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
10043 		return B_BAD_ADDRESS;
10044 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10045 	if (status != B_OK)
10046 		return status;
10047 
10048 	status = index_name_read_stat(device, name, &stat, false);
10049 	if (status == B_OK) {
10050 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
10051 			return B_BAD_ADDRESS;
10052 	}
10053 
10054 	return status;
10055 }
10056 
10057 
10058 status_t
10059 _user_remove_index(dev_t device, const char* userName)
10060 {
10061 	char name[B_FILE_NAME_LENGTH];
10062 
10063 	if (!IS_USER_ADDRESS(userName))
10064 		return B_BAD_ADDRESS;
10065 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10066 	if (status != B_OK)
10067 		return status;
10068 
10069 	return index_remove(device, name, false);
10070 }
10071 
10072 
10073 status_t
10074 _user_getcwd(char* userBuffer, size_t size)
10075 {
10076 	if (size == 0)
10077 		return B_BAD_VALUE;
10078 	if (!IS_USER_ADDRESS(userBuffer))
10079 		return B_BAD_ADDRESS;
10080 
10081 	if (size > kMaxPathLength)
10082 		size = kMaxPathLength;
10083 
10084 	KPath pathBuffer(size);
10085 	if (pathBuffer.InitCheck() != B_OK)
10086 		return B_NO_MEMORY;
10087 
10088 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10089 
10090 	char* path = pathBuffer.LockBuffer();
10091 
10092 	status_t status = get_cwd(path, size, false);
10093 	if (status != B_OK)
10094 		return status;
10095 
10096 	// Copy back the result
10097 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10098 		return B_BAD_ADDRESS;
10099 
10100 	return status;
10101 }
10102 
10103 
10104 status_t
10105 _user_setcwd(int fd, const char* userPath)
10106 {
10107 	TRACE(("user_setcwd: path = %p\n", userPath));
10108 
10109 	KPath pathBuffer;
10110 	if (pathBuffer.InitCheck() != B_OK)
10111 		return B_NO_MEMORY;
10112 
10113 	char* path = pathBuffer.LockBuffer();
10114 
10115 	if (userPath != NULL) {
10116 		if (!IS_USER_ADDRESS(userPath))
10117 			return B_BAD_ADDRESS;
10118 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10119 		if (status != B_OK)
10120 			return status;
10121 	}
10122 
10123 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10124 }
10125 
10126 
10127 status_t
10128 _user_change_root(const char* userPath)
10129 {
10130 	// only root is allowed to chroot()
10131 	if (geteuid() != 0)
10132 		return B_NOT_ALLOWED;
10133 
10134 	// alloc path buffer
10135 	KPath pathBuffer;
10136 	if (pathBuffer.InitCheck() != B_OK)
10137 		return B_NO_MEMORY;
10138 
10139 	// copy userland path to kernel
10140 	char* path = pathBuffer.LockBuffer();
10141 	if (userPath != NULL) {
10142 		if (!IS_USER_ADDRESS(userPath))
10143 			return B_BAD_ADDRESS;
10144 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10145 		if (status != B_OK)
10146 			return status;
10147 	}
10148 
10149 	// get the vnode
10150 	struct vnode* vnode;
10151 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
10152 	if (status != B_OK)
10153 		return status;
10154 
10155 	// set the new root
10156 	struct io_context* context = get_current_io_context(false);
10157 	mutex_lock(&sIOContextRootLock);
10158 	struct vnode* oldRoot = context->root;
10159 	context->root = vnode;
10160 	mutex_unlock(&sIOContextRootLock);
10161 
10162 	put_vnode(oldRoot);
10163 
10164 	return B_OK;
10165 }
10166 
10167 
10168 int
10169 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10170 	uint32 flags, port_id port, int32 token)
10171 {
10172 	if (device < 0 || userQuery == NULL || queryLength == 0)
10173 		return B_BAD_VALUE;
10174 
10175 	if (!IS_USER_ADDRESS(userQuery))
10176 		return B_BAD_ADDRESS;
10177 
10178 	// this is a safety restriction
10179 	if (queryLength >= 65536)
10180 		return B_NAME_TOO_LONG;
10181 
10182 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10183 	if (!query.IsValid())
10184 		return B_NO_MEMORY;
10185 
10186 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10187 		return B_BAD_ADDRESS;
10188 
10189 	return query_open(device, query, flags, port, token, false);
10190 }
10191 
10192 
10193 #include "vfs_request_io.cpp"
10194