xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 3216a856947f9746d8c4c1e720ccf3dc5c0ac786)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <block_cache.h>
36 #include <boot/kernel_args.h>
37 #include <debug_heap.h>
38 #include <disk_device_manager/KDiskDevice.h>
39 #include <disk_device_manager/KDiskDeviceManager.h>
40 #include <disk_device_manager/KDiskDeviceUtils.h>
41 #include <disk_device_manager/KDiskSystem.h>
42 #include <fd.h>
43 #include <file_cache.h>
44 #include <fs/node_monitor.h>
45 #include <KPath.h>
46 #include <lock.h>
47 #include <low_resource_manager.h>
48 #include <slab/Slab.h>
49 #include <StackOrHeapArray.h>
50 #include <syscalls.h>
51 #include <syscall_restart.h>
52 #include <tracing.h>
53 #include <util/atomic.h>
54 #include <util/AutoLock.h>
55 #include <util/DoublyLinkedList.h>
56 #include <vfs.h>
57 #include <vm/vm.h>
58 #include <vm/VMCache.h>
59 #include <wait_for_objects.h>
60 
61 #include "EntryCache.h"
62 #include "fifo.h"
63 #include "IORequest.h"
64 #include "unused_vnodes.h"
65 #include "vfs_tracing.h"
66 #include "Vnode.h"
67 #include "../cache/vnode_store.h"
68 
69 
70 //#define TRACE_VFS
71 #ifdef TRACE_VFS
72 #	define TRACE(x) dprintf x
73 #	define FUNCTION(x) dprintf x
74 #else
75 #	define TRACE(x) ;
76 #	define FUNCTION(x) ;
77 #endif
78 
79 #define ADD_DEBUGGER_COMMANDS
80 
81 
82 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
83 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
84 
85 #if KDEBUG
86 #	define FS_CALL(vnode, op, params...) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode, params) \
89 			: (panic("FS_CALL op " #op " is NULL"), 0))
90 #	define FS_CALL_NO_PARAMS(vnode, op) \
91 		( HAS_FS_CALL(vnode, op) ? \
92 			vnode->ops->op(vnode->mount->volume, vnode) \
93 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL(mount, op, params...) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume, params) \
97 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
98 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
99 		( HAS_FS_MOUNT_CALL(mount, op) ? \
100 			mount->volume->ops->op(mount->volume) \
101 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
102 #else
103 #	define FS_CALL(vnode, op, params...) \
104 			vnode->ops->op(vnode->mount->volume, vnode, params)
105 #	define FS_CALL_NO_PARAMS(vnode, op) \
106 			vnode->ops->op(vnode->mount->volume, vnode)
107 #	define FS_MOUNT_CALL(mount, op, params...) \
108 			mount->volume->ops->op(mount->volume, params)
109 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
110 			mount->volume->ops->op(mount->volume)
111 #endif
112 
113 
114 const static size_t kMaxPathLength = 65536;
115 	// The absolute maximum path length (for getcwd() - this is not depending
116 	// on PATH_MAX
117 
118 
119 typedef DoublyLinkedList<vnode> VnodeList;
120 
121 /*!	\brief Structure to manage a mounted file system
122 
123 	Note: The root_vnode and root_vnode->covers fields (what others?) are
124 	initialized in fs_mount() and not changed afterwards. That is as soon
125 	as the mount is mounted and it is made sure it won't be unmounted
126 	(e.g. by holding a reference to a vnode of that mount) (read) access
127 	to those fields is always safe, even without additional locking. Morever
128 	while mounted the mount holds a reference to the root_vnode->covers vnode,
129 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
130 	safe if a reference to vnode is held (note that for the root mount
131 	root_vnode->covers is NULL, though).
132 */
133 struct fs_mount {
134 	fs_mount()
135 		:
136 		volume(NULL),
137 		device_name(NULL)
138 	{
139 		mutex_init(&lock, "mount lock");
140 	}
141 
142 	~fs_mount()
143 	{
144 		mutex_destroy(&lock);
145 		free(device_name);
146 
147 		while (volume) {
148 			fs_volume* superVolume = volume->super_volume;
149 
150 			if (volume->file_system != NULL)
151 				put_module(volume->file_system->info.name);
152 
153 			free(volume->file_system_name);
154 			free(volume);
155 			volume = superVolume;
156 		}
157 	}
158 
159 	struct fs_mount* next;
160 	dev_t			id;
161 	fs_volume*		volume;
162 	char*			device_name;
163 	mutex			lock;	// guards the vnodes list
164 	struct vnode*	root_vnode;
165 	struct vnode*	covers_vnode;	// immutable
166 	KPartition*		partition;
167 	VnodeList		vnodes;
168 	EntryCache		entry_cache;
169 	bool			unmounting;
170 	bool			owns_file_device;
171 };
172 
173 
174 namespace {
175 
176 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
177 	list_link		link;
178 	void*			bound_to;
179 	team_id			team;
180 	pid_t			session;
181 	off_t			start;
182 	off_t			end;
183 	bool			shared;
184 };
185 
186 typedef DoublyLinkedList<advisory_lock> LockList;
187 
188 } // namespace
189 
190 
191 struct advisory_locking {
192 	sem_id			lock;
193 	sem_id			wait_sem;
194 	LockList		locks;
195 
196 	advisory_locking()
197 		:
198 		lock(-1),
199 		wait_sem(-1)
200 	{
201 	}
202 
203 	~advisory_locking()
204 	{
205 		if (lock >= 0)
206 			delete_sem(lock);
207 		if (wait_sem >= 0)
208 			delete_sem(wait_sem);
209 	}
210 };
211 
212 /*!	\brief Guards sMountsTable.
213 
214 	The holder is allowed to read/write access the sMountsTable.
215 	Manipulation of the fs_mount structures themselves
216 	(and their destruction) requires different locks though.
217 */
218 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
219 
220 /*!	\brief Guards mount/unmount operations.
221 
222 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
223 	That is locking the lock ensures that no FS is mounted/unmounted. In
224 	particular this means that
225 	- sMountsTable will not be modified,
226 	- the fields immutable after initialization of the fs_mount structures in
227 	  sMountsTable will not be modified,
228 
229 	The thread trying to lock the lock must not hold sVnodeLock or
230 	sMountLock.
231 */
232 static recursive_lock sMountOpLock;
233 
234 /*!	\brief Guards sVnodeTable.
235 
236 	The holder is allowed read/write access to sVnodeTable and to
237 	any unbusy vnode in that table, save to the immutable fields (device, id,
238 	private_node, mount) to which only read-only access is allowed.
239 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
240 	well as the busy, removed, unused flags, and the vnode's type can also be
241 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
242 	locked. Write access to covered_by and covers requires to write lock
243 	sVnodeLock.
244 
245 	The thread trying to acquire the lock must not hold sMountLock.
246 	You must not hold this lock when calling create_sem(), as this might call
247 	vfs_free_unused_vnodes() and thus cause a deadlock.
248 */
249 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
250 
251 /*!	\brief Guards io_context::root.
252 
253 	Must be held when setting or getting the io_context::root field.
254 	The only operation allowed while holding this lock besides getting or
255 	setting the field is inc_vnode_ref_count() on io_context::root.
256 */
257 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
258 
259 
260 namespace {
261 
262 struct vnode_hash_key {
263 	dev_t	device;
264 	ino_t	vnode;
265 };
266 
267 struct VnodeHash {
268 	typedef vnode_hash_key	KeyType;
269 	typedef	struct vnode	ValueType;
270 
271 #define VHASH(mountid, vnodeid) \
272 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
273 
274 	size_t HashKey(KeyType key) const
275 	{
276 		return VHASH(key.device, key.vnode);
277 	}
278 
279 	size_t Hash(ValueType* vnode) const
280 	{
281 		return VHASH(vnode->device, vnode->id);
282 	}
283 
284 #undef VHASH
285 
286 	bool Compare(KeyType key, ValueType* vnode) const
287 	{
288 		return vnode->device == key.device && vnode->id == key.vnode;
289 	}
290 
291 	ValueType*& GetLink(ValueType* value) const
292 	{
293 		return value->next;
294 	}
295 };
296 
297 typedef BOpenHashTable<VnodeHash> VnodeTable;
298 
299 
300 struct MountHash {
301 	typedef dev_t			KeyType;
302 	typedef	struct fs_mount	ValueType;
303 
304 	size_t HashKey(KeyType key) const
305 	{
306 		return key;
307 	}
308 
309 	size_t Hash(ValueType* mount) const
310 	{
311 		return mount->id;
312 	}
313 
314 	bool Compare(KeyType key, ValueType* mount) const
315 	{
316 		return mount->id == key;
317 	}
318 
319 	ValueType*& GetLink(ValueType* value) const
320 	{
321 		return value->next;
322 	}
323 };
324 
325 typedef BOpenHashTable<MountHash> MountTable;
326 
327 } // namespace
328 
329 
330 object_cache* sPathNameCache;
331 object_cache* sFileDescriptorCache;
332 
333 #define VNODE_HASH_TABLE_SIZE 1024
334 static VnodeTable* sVnodeTable;
335 static struct vnode* sRoot;
336 
337 #define MOUNTS_HASH_TABLE_SIZE 16
338 static MountTable* sMountsTable;
339 static dev_t sNextMountID = 1;
340 
341 #define MAX_TEMP_IO_VECS 8
342 
343 // How long to wait for busy vnodes (10s)
344 #define BUSY_VNODE_RETRIES 2000
345 #define BUSY_VNODE_DELAY 5000
346 
347 mode_t __gUmask = 022;
348 
349 /* function declarations */
350 
351 static void free_unused_vnodes();
352 
353 // file descriptor operation prototypes
354 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
355 	void* buffer, size_t* _bytes);
356 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
357 	const void* buffer, size_t* _bytes);
358 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
359 	int seekType);
360 static void file_free_fd(struct file_descriptor* descriptor);
361 static status_t file_close(struct file_descriptor* descriptor);
362 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
363 	struct selectsync* sync);
364 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
365 	struct selectsync* sync);
366 static status_t dir_read(struct io_context* context,
367 	struct file_descriptor* descriptor, struct dirent* buffer,
368 	size_t bufferSize, uint32* _count);
369 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
370 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
371 static status_t dir_rewind(struct file_descriptor* descriptor);
372 static void dir_free_fd(struct file_descriptor* descriptor);
373 static status_t dir_close(struct file_descriptor* descriptor);
374 static status_t attr_dir_read(struct io_context* context,
375 	struct file_descriptor* descriptor, struct dirent* buffer,
376 	size_t bufferSize, uint32* _count);
377 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
378 static void attr_dir_free_fd(struct file_descriptor* descriptor);
379 static status_t attr_dir_close(struct file_descriptor* descriptor);
380 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
381 	void* buffer, size_t* _bytes);
382 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
383 	const void* buffer, size_t* _bytes);
384 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
385 	int seekType);
386 static void attr_free_fd(struct file_descriptor* descriptor);
387 static status_t attr_close(struct file_descriptor* descriptor);
388 static status_t attr_read_stat(struct file_descriptor* descriptor,
389 	struct stat* statData);
390 static status_t attr_write_stat(struct file_descriptor* descriptor,
391 	const struct stat* stat, int statMask);
392 static status_t index_dir_read(struct io_context* context,
393 	struct file_descriptor* descriptor, struct dirent* buffer,
394 	size_t bufferSize, uint32* _count);
395 static status_t index_dir_rewind(struct file_descriptor* descriptor);
396 static void index_dir_free_fd(struct file_descriptor* descriptor);
397 static status_t index_dir_close(struct file_descriptor* descriptor);
398 static status_t query_read(struct io_context* context,
399 	struct file_descriptor* descriptor, struct dirent* buffer,
400 	size_t bufferSize, uint32* _count);
401 static status_t query_rewind(struct file_descriptor* descriptor);
402 static void query_free_fd(struct file_descriptor* descriptor);
403 static status_t query_close(struct file_descriptor* descriptor);
404 
405 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
406 	void* buffer, size_t length);
407 static status_t common_read_stat(struct file_descriptor* descriptor,
408 	struct stat* statData);
409 static status_t common_write_stat(struct file_descriptor* descriptor,
410 	const struct stat* statData, int statMask);
411 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
412 	struct stat* stat, bool kernel);
413 
414 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
415 	bool traverseLeafLink, int count, bool kernel,
416 	struct vnode** _vnode, ino_t* _parentID);
417 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
418 	size_t bufferSize, bool kernel);
419 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
420 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
421 static void inc_vnode_ref_count(struct vnode* vnode);
422 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
423 	bool reenter);
424 static inline void put_vnode(struct vnode* vnode);
425 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
426 	bool kernel);
427 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
428 
429 
430 static struct fd_ops sFileOps = {
431 	file_read,
432 	file_write,
433 	file_seek,
434 	common_ioctl,
435 	NULL,		// set_flags
436 	file_select,
437 	file_deselect,
438 	NULL,		// read_dir()
439 	NULL,		// rewind_dir()
440 	common_read_stat,
441 	common_write_stat,
442 	file_close,
443 	file_free_fd
444 };
445 
446 static struct fd_ops sDirectoryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	common_ioctl,
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	dir_read,
455 	dir_rewind,
456 	common_read_stat,
457 	common_write_stat,
458 	dir_close,
459 	dir_free_fd
460 };
461 
462 static struct fd_ops sAttributeDirectoryOps = {
463 	NULL,		// read()
464 	NULL,		// write()
465 	NULL,		// seek()
466 	common_ioctl,
467 	NULL,		// set_flags
468 	NULL,		// select()
469 	NULL,		// deselect()
470 	attr_dir_read,
471 	attr_dir_rewind,
472 	common_read_stat,
473 	common_write_stat,
474 	attr_dir_close,
475 	attr_dir_free_fd
476 };
477 
478 static struct fd_ops sAttributeOps = {
479 	attr_read,
480 	attr_write,
481 	attr_seek,
482 	common_ioctl,
483 	NULL,		// set_flags
484 	NULL,		// select()
485 	NULL,		// deselect()
486 	NULL,		// read_dir()
487 	NULL,		// rewind_dir()
488 	attr_read_stat,
489 	attr_write_stat,
490 	attr_close,
491 	attr_free_fd
492 };
493 
494 static struct fd_ops sIndexDirectoryOps = {
495 	NULL,		// read()
496 	NULL,		// write()
497 	NULL,		// seek()
498 	NULL,		// ioctl()
499 	NULL,		// set_flags
500 	NULL,		// select()
501 	NULL,		// deselect()
502 	index_dir_read,
503 	index_dir_rewind,
504 	NULL,		// read_stat()
505 	NULL,		// write_stat()
506 	index_dir_close,
507 	index_dir_free_fd
508 };
509 
510 #if 0
511 static struct fd_ops sIndexOps = {
512 	NULL,		// read()
513 	NULL,		// write()
514 	NULL,		// seek()
515 	NULL,		// ioctl()
516 	NULL,		// set_flags
517 	NULL,		// select()
518 	NULL,		// deselect()
519 	NULL,		// dir_read()
520 	NULL,		// dir_rewind()
521 	index_read_stat,	// read_stat()
522 	NULL,		// write_stat()
523 	NULL,		// dir_close()
524 	NULL		// free_fd()
525 };
526 #endif
527 
528 static struct fd_ops sQueryOps = {
529 	NULL,		// read()
530 	NULL,		// write()
531 	NULL,		// seek()
532 	NULL,		// ioctl()
533 	NULL,		// set_flags
534 	NULL,		// select()
535 	NULL,		// deselect()
536 	query_read,
537 	query_rewind,
538 	NULL,		// read_stat()
539 	NULL,		// write_stat()
540 	query_close,
541 	query_free_fd
542 };
543 
544 
545 namespace {
546 
547 class VNodePutter {
548 public:
549 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
550 
551 	~VNodePutter()
552 	{
553 		Put();
554 	}
555 
556 	void SetTo(struct vnode* vnode)
557 	{
558 		Put();
559 		fVNode = vnode;
560 	}
561 
562 	void Put()
563 	{
564 		if (fVNode) {
565 			put_vnode(fVNode);
566 			fVNode = NULL;
567 		}
568 	}
569 
570 	struct vnode* Detach()
571 	{
572 		struct vnode* vnode = fVNode;
573 		fVNode = NULL;
574 		return vnode;
575 	}
576 
577 private:
578 	struct vnode* fVNode;
579 };
580 
581 
582 class FDCloser {
583 public:
584 	FDCloser() : fFD(-1), fKernel(true) {}
585 
586 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
587 
588 	~FDCloser()
589 	{
590 		Close();
591 	}
592 
593 	void SetTo(int fd, bool kernel)
594 	{
595 		Close();
596 		fFD = fd;
597 		fKernel = kernel;
598 	}
599 
600 	void Close()
601 	{
602 		if (fFD >= 0) {
603 			if (fKernel)
604 				_kern_close(fFD);
605 			else
606 				_user_close(fFD);
607 			fFD = -1;
608 		}
609 	}
610 
611 	int Detach()
612 	{
613 		int fd = fFD;
614 		fFD = -1;
615 		return fd;
616 	}
617 
618 private:
619 	int		fFD;
620 	bool	fKernel;
621 };
622 
623 } // namespace
624 
625 
626 #if VFS_PAGES_IO_TRACING
627 
628 namespace VFSPagesIOTracing {
629 
630 class PagesIOTraceEntry : public AbstractTraceEntry {
631 protected:
632 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
633 		const generic_io_vec* vecs, uint32 count, uint32 flags,
634 		generic_size_t bytesRequested, status_t status,
635 		generic_size_t bytesTransferred)
636 		:
637 		fVnode(vnode),
638 		fMountID(vnode->mount->id),
639 		fNodeID(vnode->id),
640 		fCookie(cookie),
641 		fPos(pos),
642 		fCount(count),
643 		fFlags(flags),
644 		fBytesRequested(bytesRequested),
645 		fStatus(status),
646 		fBytesTransferred(bytesTransferred)
647 	{
648 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
649 			sizeof(generic_io_vec) * count, false);
650 	}
651 
652 	void AddDump(TraceOutput& out, const char* mode)
653 	{
654 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
655 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
656 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
657 			(uint64)fBytesRequested);
658 
659 		if (fVecs != NULL) {
660 			for (uint32 i = 0; i < fCount; i++) {
661 				if (i > 0)
662 					out.Print(", ");
663 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
664 					(uint64)fVecs[i].length);
665 			}
666 		}
667 
668 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
669 			"transferred: %" B_PRIu64, fFlags, fStatus,
670 			(uint64)fBytesTransferred);
671 	}
672 
673 protected:
674 	struct vnode*	fVnode;
675 	dev_t			fMountID;
676 	ino_t			fNodeID;
677 	void*			fCookie;
678 	off_t			fPos;
679 	generic_io_vec*	fVecs;
680 	uint32			fCount;
681 	uint32			fFlags;
682 	generic_size_t	fBytesRequested;
683 	status_t		fStatus;
684 	generic_size_t	fBytesTransferred;
685 };
686 
687 
688 class ReadPages : public PagesIOTraceEntry {
689 public:
690 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
691 		const generic_io_vec* vecs, uint32 count, uint32 flags,
692 		generic_size_t bytesRequested, status_t status,
693 		generic_size_t bytesTransferred)
694 		:
695 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
696 			bytesRequested, status, bytesTransferred)
697 	{
698 		Initialized();
699 	}
700 
701 	virtual void AddDump(TraceOutput& out)
702 	{
703 		PagesIOTraceEntry::AddDump(out, "read");
704 	}
705 };
706 
707 
708 class WritePages : public PagesIOTraceEntry {
709 public:
710 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
711 		const generic_io_vec* vecs, uint32 count, uint32 flags,
712 		generic_size_t bytesRequested, status_t status,
713 		generic_size_t bytesTransferred)
714 		:
715 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
716 			bytesRequested, status, bytesTransferred)
717 	{
718 		Initialized();
719 	}
720 
721 	virtual void AddDump(TraceOutput& out)
722 	{
723 		PagesIOTraceEntry::AddDump(out, "write");
724 	}
725 };
726 
727 }	// namespace VFSPagesIOTracing
728 
729 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
730 #else
731 #	define TPIO(x) ;
732 #endif	// VFS_PAGES_IO_TRACING
733 
734 
735 /*! Finds the mounted device (the fs_mount structure) with the given ID.
736 	Note, you must hold the sMountLock lock when you call this function.
737 */
738 static struct fs_mount*
739 find_mount(dev_t id)
740 {
741 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
742 
743 	return sMountsTable->Lookup(id);
744 }
745 
746 
747 static status_t
748 get_mount(dev_t id, struct fs_mount** _mount)
749 {
750 	struct fs_mount* mount;
751 
752 	ReadLocker nodeLocker(sVnodeLock);
753 	ReadLocker mountLocker(sMountLock);
754 
755 	mount = find_mount(id);
756 	if (mount == NULL)
757 		return B_BAD_VALUE;
758 
759 	struct vnode* rootNode = mount->root_vnode;
760 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
761 		|| rootNode->ref_count == 0) {
762 		// might have been called during a mount/unmount operation
763 		return B_BUSY;
764 	}
765 
766 	inc_vnode_ref_count(rootNode);
767 	*_mount = mount;
768 	return B_OK;
769 }
770 
771 
772 static void
773 put_mount(struct fs_mount* mount)
774 {
775 	if (mount)
776 		put_vnode(mount->root_vnode);
777 }
778 
779 
780 /*!	Tries to open the specified file system module.
781 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
782 	Returns a pointer to file system module interface, or NULL if it
783 	could not open the module.
784 */
785 static file_system_module_info*
786 get_file_system(const char* fsName)
787 {
788 	char name[B_FILE_NAME_LENGTH];
789 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
790 		// construct module name if we didn't get one
791 		// (we currently support only one API)
792 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
793 		fsName = NULL;
794 	}
795 
796 	file_system_module_info* info;
797 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
798 		return NULL;
799 
800 	return info;
801 }
802 
803 
804 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
805 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
806 	The name is allocated for you, and you have to free() it when you're
807 	done with it.
808 	Returns NULL if the required memory is not available.
809 */
810 static char*
811 get_file_system_name(const char* fsName)
812 {
813 	const size_t length = strlen("file_systems/");
814 
815 	if (strncmp(fsName, "file_systems/", length)) {
816 		// the name already seems to be the module's file name
817 		return strdup(fsName);
818 	}
819 
820 	fsName += length;
821 	const char* end = strchr(fsName, '/');
822 	if (end == NULL) {
823 		// this doesn't seem to be a valid name, but well...
824 		return strdup(fsName);
825 	}
826 
827 	// cut off the trailing /v1
828 
829 	char* name = (char*)malloc(end + 1 - fsName);
830 	if (name == NULL)
831 		return NULL;
832 
833 	strlcpy(name, fsName, end + 1 - fsName);
834 	return name;
835 }
836 
837 
838 /*!	Accepts a list of file system names separated by a colon, one for each
839 	layer and returns the file system name for the specified layer.
840 	The name is allocated for you, and you have to free() it when you're
841 	done with it.
842 	Returns NULL if the required memory is not available or if there is no
843 	name for the specified layer.
844 */
845 static char*
846 get_file_system_name_for_layer(const char* fsNames, int32 layer)
847 {
848 	while (layer >= 0) {
849 		const char* end = strchr(fsNames, ':');
850 		if (end == NULL) {
851 			if (layer == 0)
852 				return strdup(fsNames);
853 			return NULL;
854 		}
855 
856 		if (layer == 0) {
857 			size_t length = end - fsNames + 1;
858 			char* result = (char*)malloc(length);
859 			strlcpy(result, fsNames, length);
860 			return result;
861 		}
862 
863 		fsNames = end + 1;
864 		layer--;
865 	}
866 
867 	return NULL;
868 }
869 
870 
871 static void
872 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
873 {
874 	MutexLocker _(mount->lock);
875 	mount->vnodes.Add(vnode);
876 }
877 
878 
879 static void
880 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
881 {
882 	MutexLocker _(mount->lock);
883 	mount->vnodes.Remove(vnode);
884 }
885 
886 
887 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
888 
889 	The caller must hold the sVnodeLock (read lock at least).
890 
891 	\param mountID the mount ID.
892 	\param vnodeID the node ID.
893 
894 	\return The vnode structure, if it was found in the hash table, \c NULL
895 			otherwise.
896 */
897 static struct vnode*
898 lookup_vnode(dev_t mountID, ino_t vnodeID)
899 {
900 	struct vnode_hash_key key;
901 
902 	key.device = mountID;
903 	key.vnode = vnodeID;
904 
905 	return sVnodeTable->Lookup(key);
906 }
907 
908 
909 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
910 
911 	This will also wait for BUSY_VNODE_DELAY before returning if one should
912 	still wait for the vnode becoming unbusy.
913 
914 	\return \c true if one should retry, \c false if not.
915 */
916 static bool
917 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
918 {
919 	if (--tries < 0) {
920 		// vnode doesn't seem to become unbusy
921 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
922 			" is not becoming unbusy!\n", mountID, vnodeID);
923 		return false;
924 	}
925 	snooze(BUSY_VNODE_DELAY);
926 	return true;
927 }
928 
929 
930 /*!	Creates a new vnode with the given mount and node ID.
931 	If the node already exists, it is returned instead and no new node is
932 	created. In either case -- but not, if an error occurs -- the function write
933 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
934 	error the lock is not held on return.
935 
936 	\param mountID The mount ID.
937 	\param vnodeID The vnode ID.
938 	\param _vnode Will be set to the new vnode on success.
939 	\param _nodeCreated Will be set to \c true when the returned vnode has
940 		been newly created, \c false when it already existed. Will not be
941 		changed on error.
942 	\return \c B_OK, when the vnode was successfully created and inserted or
943 		a node with the given ID was found, \c B_NO_MEMORY or
944 		\c B_ENTRY_NOT_FOUND on error.
945 */
946 static status_t
947 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
948 	bool& _nodeCreated)
949 {
950 	FUNCTION(("create_new_vnode_and_lock()\n"));
951 
952 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
953 	if (vnode == NULL)
954 		return B_NO_MEMORY;
955 
956 	// initialize basic values
957 	memset(vnode, 0, sizeof(struct vnode));
958 	vnode->device = mountID;
959 	vnode->id = vnodeID;
960 	vnode->ref_count = 1;
961 	vnode->SetBusy(true);
962 
963 	// look up the node -- it might have been added by someone else in the
964 	// meantime
965 	rw_lock_write_lock(&sVnodeLock);
966 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
967 	if (existingVnode != NULL) {
968 		free(vnode);
969 		_vnode = existingVnode;
970 		_nodeCreated = false;
971 		return B_OK;
972 	}
973 
974 	// get the mount structure
975 	rw_lock_read_lock(&sMountLock);
976 	vnode->mount = find_mount(mountID);
977 	if (!vnode->mount || vnode->mount->unmounting) {
978 		rw_lock_read_unlock(&sMountLock);
979 		rw_lock_write_unlock(&sVnodeLock);
980 		free(vnode);
981 		return B_ENTRY_NOT_FOUND;
982 	}
983 
984 	// add the vnode to the mount's node list and the hash table
985 	sVnodeTable->Insert(vnode);
986 	add_vnode_to_mount_list(vnode, vnode->mount);
987 
988 	rw_lock_read_unlock(&sMountLock);
989 
990 	_vnode = vnode;
991 	_nodeCreated = true;
992 
993 	// keep the vnode lock locked
994 	return B_OK;
995 }
996 
997 
998 /*!	Frees the vnode and all resources it has acquired, and removes
999 	it from the vnode hash as well as from its mount structure.
1000 	Will also make sure that any cache modifications are written back.
1001 */
1002 static void
1003 free_vnode(struct vnode* vnode, bool reenter)
1004 {
1005 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1006 		vnode);
1007 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1008 
1009 	// write back any changes in this vnode's cache -- but only
1010 	// if the vnode won't be deleted, in which case the changes
1011 	// will be discarded
1012 
1013 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1014 		FS_CALL_NO_PARAMS(vnode, fsync);
1015 
1016 	// Note: If this vnode has a cache attached, there will still be two
1017 	// references to that cache at this point. The last one belongs to the vnode
1018 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1019 	// cache. Each but the last reference to a cache also includes a reference
1020 	// to the vnode. The file cache, however, released its reference (cf.
1021 	// file_cache_create()), so that this vnode's ref count has the chance to
1022 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1023 	// cache reference to be released, which will also release a (no longer
1024 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1025 	// count, so that it will neither become negative nor 0.
1026 	vnode->ref_count = 2;
1027 
1028 	if (!vnode->IsUnpublished()) {
1029 		if (vnode->IsRemoved())
1030 			FS_CALL(vnode, remove_vnode, reenter);
1031 		else
1032 			FS_CALL(vnode, put_vnode, reenter);
1033 	}
1034 
1035 	// If the vnode has a VMCache attached, make sure that it won't try to get
1036 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1037 	// long as the vnode is busy and in the hash, that won't happen, but as
1038 	// soon as we've removed it from the hash, it could reload the vnode -- with
1039 	// a new cache attached!
1040 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1041 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1042 
1043 	// The file system has removed the resources of the vnode now, so we can
1044 	// make it available again (by removing the busy vnode from the hash).
1045 	rw_lock_write_lock(&sVnodeLock);
1046 	sVnodeTable->Remove(vnode);
1047 	rw_lock_write_unlock(&sVnodeLock);
1048 
1049 	// if we have a VMCache attached, remove it
1050 	if (vnode->cache)
1051 		vnode->cache->ReleaseRef();
1052 
1053 	vnode->cache = NULL;
1054 
1055 	remove_vnode_from_mount_list(vnode, vnode->mount);
1056 
1057 	free(vnode);
1058 }
1059 
1060 
1061 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1062 	if the counter dropped to 0.
1063 
1064 	The caller must, of course, own a reference to the vnode to call this
1065 	function.
1066 	The caller must not hold the sVnodeLock or the sMountLock.
1067 
1068 	\param vnode the vnode.
1069 	\param alwaysFree don't move this vnode into the unused list, but really
1070 		   delete it if possible.
1071 	\param reenter \c true, if this function is called (indirectly) from within
1072 		   a file system. This will be passed to file system hooks only.
1073 	\return \c B_OK, if everything went fine, an error code otherwise.
1074 */
1075 static status_t
1076 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1077 {
1078 	ReadLocker locker(sVnodeLock);
1079 	AutoLocker<Vnode> nodeLocker(vnode);
1080 
1081 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1082 
1083 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1084 
1085 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1086 		vnode->ref_count));
1087 
1088 	if (oldRefCount != 1)
1089 		return B_OK;
1090 
1091 	if (vnode->IsBusy())
1092 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1093 
1094 	bool freeNode = false;
1095 	bool freeUnusedNodes = false;
1096 
1097 	// Just insert the vnode into an unused list if we don't need
1098 	// to delete it
1099 	if (vnode->IsRemoved() || alwaysFree) {
1100 		vnode_to_be_freed(vnode);
1101 		vnode->SetBusy(true);
1102 		freeNode = true;
1103 	} else
1104 		freeUnusedNodes = vnode_unused(vnode);
1105 
1106 	nodeLocker.Unlock();
1107 	locker.Unlock();
1108 
1109 	if (freeNode)
1110 		free_vnode(vnode, reenter);
1111 	else if (freeUnusedNodes)
1112 		free_unused_vnodes();
1113 
1114 	return B_OK;
1115 }
1116 
1117 
1118 /*!	\brief Increments the reference counter of the given vnode.
1119 
1120 	The caller must make sure that the node isn't deleted while this function
1121 	is called. This can be done either:
1122 	- by ensuring that a reference to the node exists and remains in existence,
1123 	  or
1124 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1125 	  or by holding sVnodeLock write locked.
1126 
1127 	In the second case the caller is responsible for dealing with the ref count
1128 	0 -> 1 transition. That is 1. this function must not be invoked when the
1129 	node is busy in the first place and 2. vnode_used() must be called for the
1130 	node.
1131 
1132 	\param vnode the vnode.
1133 */
1134 static void
1135 inc_vnode_ref_count(struct vnode* vnode)
1136 {
1137 	atomic_add(&vnode->ref_count, 1);
1138 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1139 		vnode->ref_count));
1140 }
1141 
1142 
1143 static bool
1144 is_special_node_type(int type)
1145 {
1146 	// at the moment only FIFOs are supported
1147 	return S_ISFIFO(type);
1148 }
1149 
1150 
1151 static status_t
1152 create_special_sub_node(struct vnode* vnode, uint32 flags)
1153 {
1154 	if (S_ISFIFO(vnode->Type()))
1155 		return create_fifo_vnode(vnode->mount->volume, vnode);
1156 
1157 	return B_BAD_VALUE;
1158 }
1159 
1160 
1161 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1162 
1163 	If the node is not yet in memory, it will be loaded.
1164 
1165 	The caller must not hold the sVnodeLock or the sMountLock.
1166 
1167 	\param mountID the mount ID.
1168 	\param vnodeID the node ID.
1169 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1170 		   retrieved vnode structure shall be written.
1171 	\param reenter \c true, if this function is called (indirectly) from within
1172 		   a file system.
1173 	\return \c B_OK, if everything when fine, an error code otherwise.
1174 */
1175 static status_t
1176 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1177 	int reenter)
1178 {
1179 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1180 		mountID, vnodeID, _vnode));
1181 
1182 	rw_lock_read_lock(&sVnodeLock);
1183 
1184 	int32 tries = BUSY_VNODE_RETRIES;
1185 restart:
1186 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1187 	AutoLocker<Vnode> nodeLocker(vnode);
1188 
1189 	if (vnode && vnode->IsBusy()) {
1190 		nodeLocker.Unlock();
1191 		rw_lock_read_unlock(&sVnodeLock);
1192 		if (!canWait) {
1193 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1194 				mountID, vnodeID);
1195 			return B_BUSY;
1196 		}
1197 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1198 			return B_BUSY;
1199 
1200 		rw_lock_read_lock(&sVnodeLock);
1201 		goto restart;
1202 	}
1203 
1204 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1205 
1206 	status_t status;
1207 
1208 	if (vnode) {
1209 		if (vnode->ref_count == 0) {
1210 			// this vnode has been unused before
1211 			vnode_used(vnode);
1212 		}
1213 		inc_vnode_ref_count(vnode);
1214 
1215 		nodeLocker.Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	} else {
1218 		// we need to create a new vnode and read it in
1219 		rw_lock_read_unlock(&sVnodeLock);
1220 			// unlock -- create_new_vnode_and_lock() write-locks on success
1221 		bool nodeCreated;
1222 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1223 			nodeCreated);
1224 		if (status != B_OK)
1225 			return status;
1226 
1227 		if (!nodeCreated) {
1228 			rw_lock_read_lock(&sVnodeLock);
1229 			rw_lock_write_unlock(&sVnodeLock);
1230 			goto restart;
1231 		}
1232 
1233 		rw_lock_write_unlock(&sVnodeLock);
1234 
1235 		int type;
1236 		uint32 flags;
1237 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1238 			&flags, reenter);
1239 		if (status == B_OK && vnode->private_node == NULL)
1240 			status = B_BAD_VALUE;
1241 
1242 		bool gotNode = status == B_OK;
1243 		bool publishSpecialSubNode = false;
1244 		if (gotNode) {
1245 			vnode->SetType(type);
1246 			publishSpecialSubNode = is_special_node_type(type)
1247 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1248 		}
1249 
1250 		if (gotNode && publishSpecialSubNode)
1251 			status = create_special_sub_node(vnode, flags);
1252 
1253 		if (status != B_OK) {
1254 			if (gotNode)
1255 				FS_CALL(vnode, put_vnode, reenter);
1256 
1257 			rw_lock_write_lock(&sVnodeLock);
1258 			sVnodeTable->Remove(vnode);
1259 			remove_vnode_from_mount_list(vnode, vnode->mount);
1260 			rw_lock_write_unlock(&sVnodeLock);
1261 
1262 			free(vnode);
1263 			return status;
1264 		}
1265 
1266 		rw_lock_read_lock(&sVnodeLock);
1267 		vnode->Lock();
1268 
1269 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1270 		vnode->SetBusy(false);
1271 
1272 		vnode->Unlock();
1273 		rw_lock_read_unlock(&sVnodeLock);
1274 	}
1275 
1276 	TRACE(("get_vnode: returning %p\n", vnode));
1277 
1278 	*_vnode = vnode;
1279 	return B_OK;
1280 }
1281 
1282 
1283 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1284 	if the counter dropped to 0.
1285 
1286 	The caller must, of course, own a reference to the vnode to call this
1287 	function.
1288 	The caller must not hold the sVnodeLock or the sMountLock.
1289 
1290 	\param vnode the vnode.
1291 */
1292 static inline void
1293 put_vnode(struct vnode* vnode)
1294 {
1295 	dec_vnode_ref_count(vnode, false, false);
1296 }
1297 
1298 
1299 static void
1300 free_unused_vnodes(int32 level)
1301 {
1302 	unused_vnodes_check_started();
1303 
1304 	if (level == B_NO_LOW_RESOURCE) {
1305 		unused_vnodes_check_done();
1306 		return;
1307 	}
1308 
1309 	flush_hot_vnodes();
1310 
1311 	// determine how many nodes to free
1312 	uint32 count = 1;
1313 	{
1314 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1315 
1316 		switch (level) {
1317 			case B_LOW_RESOURCE_NOTE:
1318 				count = sUnusedVnodes / 100;
1319 				break;
1320 			case B_LOW_RESOURCE_WARNING:
1321 				count = sUnusedVnodes / 10;
1322 				break;
1323 			case B_LOW_RESOURCE_CRITICAL:
1324 				count = sUnusedVnodes;
1325 				break;
1326 		}
1327 
1328 		if (count > sUnusedVnodes)
1329 			count = sUnusedVnodes;
1330 	}
1331 
1332 	// Write back the modified pages of some unused vnodes and free them.
1333 
1334 	for (uint32 i = 0; i < count; i++) {
1335 		ReadLocker vnodesReadLocker(sVnodeLock);
1336 
1337 		// get the first node
1338 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1339 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1340 			&sUnusedVnodeList);
1341 		unusedVnodesLocker.Unlock();
1342 
1343 		if (vnode == NULL)
1344 			break;
1345 
1346 		// lock the node
1347 		AutoLocker<Vnode> nodeLocker(vnode);
1348 
1349 		// Check whether the node is still unused -- since we only append to the
1350 		// tail of the unused queue, the vnode should still be at its head.
1351 		// Alternatively we could check its ref count for 0 and its busy flag,
1352 		// but if the node is no longer at the head of the queue, it means it
1353 		// has been touched in the meantime, i.e. it is no longer the least
1354 		// recently used unused vnode and we rather don't free it.
1355 		unusedVnodesLocker.Lock();
1356 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1357 			continue;
1358 		unusedVnodesLocker.Unlock();
1359 
1360 		ASSERT(!vnode->IsBusy());
1361 
1362 		// grab a reference
1363 		inc_vnode_ref_count(vnode);
1364 		vnode_used(vnode);
1365 
1366 		// write back changes and free the node
1367 		nodeLocker.Unlock();
1368 		vnodesReadLocker.Unlock();
1369 
1370 		if (vnode->cache != NULL)
1371 			vnode->cache->WriteModified();
1372 
1373 		dec_vnode_ref_count(vnode, true, false);
1374 			// this should free the vnode when it's still unused
1375 	}
1376 
1377 	unused_vnodes_check_done();
1378 }
1379 
1380 
1381 /*!	Gets the vnode the given vnode is covering.
1382 
1383 	The caller must have \c sVnodeLock read-locked at least.
1384 
1385 	The function returns a reference to the retrieved vnode (if any), the caller
1386 	is responsible to free.
1387 
1388 	\param vnode The vnode whose covered node shall be returned.
1389 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1390 		vnode.
1391 */
1392 static inline Vnode*
1393 get_covered_vnode_locked(Vnode* vnode)
1394 {
1395 	if (Vnode* coveredNode = vnode->covers) {
1396 		while (coveredNode->covers != NULL)
1397 			coveredNode = coveredNode->covers;
1398 
1399 		inc_vnode_ref_count(coveredNode);
1400 		return coveredNode;
1401 	}
1402 
1403 	return NULL;
1404 }
1405 
1406 
1407 /*!	Gets the vnode the given vnode is covering.
1408 
1409 	The caller must not hold \c sVnodeLock. Note that this implies a race
1410 	condition, since the situation can change at any time.
1411 
1412 	The function returns a reference to the retrieved vnode (if any), the caller
1413 	is responsible to free.
1414 
1415 	\param vnode The vnode whose covered node shall be returned.
1416 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1417 		vnode.
1418 */
1419 static inline Vnode*
1420 get_covered_vnode(Vnode* vnode)
1421 {
1422 	if (!vnode->IsCovering())
1423 		return NULL;
1424 
1425 	ReadLocker vnodeReadLocker(sVnodeLock);
1426 	return get_covered_vnode_locked(vnode);
1427 }
1428 
1429 
1430 /*!	Gets the vnode the given vnode is covered by.
1431 
1432 	The caller must have \c sVnodeLock read-locked at least.
1433 
1434 	The function returns a reference to the retrieved vnode (if any), the caller
1435 	is responsible to free.
1436 
1437 	\param vnode The vnode whose covering node shall be returned.
1438 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1439 		any vnode.
1440 */
1441 static Vnode*
1442 get_covering_vnode_locked(Vnode* vnode)
1443 {
1444 	if (Vnode* coveringNode = vnode->covered_by) {
1445 		while (coveringNode->covered_by != NULL)
1446 			coveringNode = coveringNode->covered_by;
1447 
1448 		inc_vnode_ref_count(coveringNode);
1449 		return coveringNode;
1450 	}
1451 
1452 	return NULL;
1453 }
1454 
1455 
1456 /*!	Gets the vnode the given vnode is covered by.
1457 
1458 	The caller must not hold \c sVnodeLock. Note that this implies a race
1459 	condition, since the situation can change at any time.
1460 
1461 	The function returns a reference to the retrieved vnode (if any), the caller
1462 	is responsible to free.
1463 
1464 	\param vnode The vnode whose covering node shall be returned.
1465 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1466 		any vnode.
1467 */
1468 static inline Vnode*
1469 get_covering_vnode(Vnode* vnode)
1470 {
1471 	if (!vnode->IsCovered())
1472 		return NULL;
1473 
1474 	ReadLocker vnodeReadLocker(sVnodeLock);
1475 	return get_covering_vnode_locked(vnode);
1476 }
1477 
1478 
1479 static void
1480 free_unused_vnodes()
1481 {
1482 	free_unused_vnodes(
1483 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1484 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1485 }
1486 
1487 
1488 static void
1489 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1490 {
1491 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1492 
1493 	free_unused_vnodes(level);
1494 }
1495 
1496 
1497 static inline void
1498 put_advisory_locking(struct advisory_locking* locking)
1499 {
1500 	release_sem(locking->lock);
1501 }
1502 
1503 
1504 /*!	Returns the advisory_locking object of the \a vnode in case it
1505 	has one, and locks it.
1506 	You have to call put_advisory_locking() when you're done with
1507 	it.
1508 	Note, you must not have the vnode mutex locked when calling
1509 	this function.
1510 */
1511 static struct advisory_locking*
1512 get_advisory_locking(struct vnode* vnode)
1513 {
1514 	rw_lock_read_lock(&sVnodeLock);
1515 	vnode->Lock();
1516 
1517 	struct advisory_locking* locking = vnode->advisory_locking;
1518 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1519 
1520 	vnode->Unlock();
1521 	rw_lock_read_unlock(&sVnodeLock);
1522 
1523 	if (lock >= 0)
1524 		lock = acquire_sem(lock);
1525 	if (lock < 0) {
1526 		// This means the locking has been deleted in the mean time
1527 		// or had never existed in the first place - otherwise, we
1528 		// would get the lock at some point.
1529 		return NULL;
1530 	}
1531 
1532 	return locking;
1533 }
1534 
1535 
1536 /*!	Creates a locked advisory_locking object, and attaches it to the
1537 	given \a vnode.
1538 	Returns B_OK in case of success - also if the vnode got such an
1539 	object from someone else in the mean time, you'll still get this
1540 	one locked then.
1541 */
1542 static status_t
1543 create_advisory_locking(struct vnode* vnode)
1544 {
1545 	if (vnode == NULL)
1546 		return B_FILE_ERROR;
1547 
1548 	ObjectDeleter<advisory_locking> lockingDeleter;
1549 	struct advisory_locking* locking = NULL;
1550 
1551 	while (get_advisory_locking(vnode) == NULL) {
1552 		// no locking object set on the vnode yet, create one
1553 		if (locking == NULL) {
1554 			locking = new(std::nothrow) advisory_locking;
1555 			if (locking == NULL)
1556 				return B_NO_MEMORY;
1557 			lockingDeleter.SetTo(locking);
1558 
1559 			locking->wait_sem = create_sem(0, "advisory lock");
1560 			if (locking->wait_sem < 0)
1561 				return locking->wait_sem;
1562 
1563 			locking->lock = create_sem(0, "advisory locking");
1564 			if (locking->lock < 0)
1565 				return locking->lock;
1566 		}
1567 
1568 		// set our newly created locking object
1569 		ReadLocker _(sVnodeLock);
1570 		AutoLocker<Vnode> nodeLocker(vnode);
1571 		if (vnode->advisory_locking == NULL) {
1572 			vnode->advisory_locking = locking;
1573 			lockingDeleter.Detach();
1574 			return B_OK;
1575 		}
1576 	}
1577 
1578 	// The vnode already had a locking object. That's just as well.
1579 
1580 	return B_OK;
1581 }
1582 
1583 
1584 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1585 	with the advisory_lock \a lock.
1586 */
1587 static bool
1588 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1589 {
1590 	if (flock == NULL)
1591 		return true;
1592 
1593 	return lock->start <= flock->l_start - 1 + flock->l_len
1594 		&& lock->end >= flock->l_start;
1595 }
1596 
1597 
1598 /*!	Tests whether acquiring a lock would block.
1599 */
1600 static status_t
1601 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1602 {
1603 	flock->l_type = F_UNLCK;
1604 
1605 	struct advisory_locking* locking = get_advisory_locking(vnode);
1606 	if (locking == NULL)
1607 		return B_OK;
1608 
1609 	team_id team = team_get_current_team_id();
1610 
1611 	LockList::Iterator iterator = locking->locks.GetIterator();
1612 	while (iterator.HasNext()) {
1613 		struct advisory_lock* lock = iterator.Next();
1614 
1615 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1616 			// locks do overlap
1617 			if (flock->l_type != F_RDLCK || !lock->shared) {
1618 				// collision
1619 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1620 				flock->l_whence = SEEK_SET;
1621 				flock->l_start = lock->start;
1622 				flock->l_len = lock->end - lock->start + 1;
1623 				flock->l_pid = lock->team;
1624 				break;
1625 			}
1626 		}
1627 	}
1628 
1629 	put_advisory_locking(locking);
1630 	return B_OK;
1631 }
1632 
1633 
1634 /*!	Removes the specified lock, or all locks of the calling team
1635 	if \a flock is NULL.
1636 */
1637 static status_t
1638 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1639 	struct file_descriptor* descriptor, struct flock* flock)
1640 {
1641 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1642 
1643 	struct advisory_locking* locking = get_advisory_locking(vnode);
1644 	if (locking == NULL)
1645 		return B_OK;
1646 
1647 	// find matching lock entries
1648 
1649 	LockList::Iterator iterator = locking->locks.GetIterator();
1650 	while (iterator.HasNext()) {
1651 		struct advisory_lock* lock = iterator.Next();
1652 		bool removeLock = false;
1653 
1654 		if (descriptor != NULL && lock->bound_to == descriptor) {
1655 			// Remove flock() locks
1656 			removeLock = true;
1657 		} else if (lock->bound_to == context
1658 				&& advisory_lock_intersects(lock, flock)) {
1659 			// Remove POSIX locks
1660 			bool endsBeyond = false;
1661 			bool startsBefore = false;
1662 			if (flock != NULL) {
1663 				startsBefore = lock->start < flock->l_start;
1664 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1665 			}
1666 
1667 			if (!startsBefore && !endsBeyond) {
1668 				// lock is completely contained in flock
1669 				removeLock = true;
1670 			} else if (startsBefore && !endsBeyond) {
1671 				// cut the end of the lock
1672 				lock->end = flock->l_start - 1;
1673 			} else if (!startsBefore && endsBeyond) {
1674 				// cut the start of the lock
1675 				lock->start = flock->l_start + flock->l_len;
1676 			} else {
1677 				// divide the lock into two locks
1678 				struct advisory_lock* secondLock = new advisory_lock;
1679 				if (secondLock == NULL) {
1680 					// TODO: we should probably revert the locks we already
1681 					// changed... (ie. allocate upfront)
1682 					put_advisory_locking(locking);
1683 					return B_NO_MEMORY;
1684 				}
1685 
1686 				lock->end = flock->l_start - 1;
1687 
1688 				secondLock->bound_to = context;
1689 				secondLock->team = lock->team;
1690 				secondLock->session = lock->session;
1691 				// values must already be normalized when getting here
1692 				secondLock->start = flock->l_start + flock->l_len;
1693 				secondLock->end = lock->end;
1694 				secondLock->shared = lock->shared;
1695 
1696 				locking->locks.Add(secondLock);
1697 			}
1698 		}
1699 
1700 		if (removeLock) {
1701 			// this lock is no longer used
1702 			iterator.Remove();
1703 			free(lock);
1704 		}
1705 	}
1706 
1707 	bool removeLocking = locking->locks.IsEmpty();
1708 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1709 
1710 	put_advisory_locking(locking);
1711 
1712 	if (removeLocking) {
1713 		// We can remove the whole advisory locking structure; it's no
1714 		// longer used
1715 		locking = get_advisory_locking(vnode);
1716 		if (locking != NULL) {
1717 			ReadLocker locker(sVnodeLock);
1718 			AutoLocker<Vnode> nodeLocker(vnode);
1719 
1720 			// the locking could have been changed in the mean time
1721 			if (locking->locks.IsEmpty()) {
1722 				vnode->advisory_locking = NULL;
1723 				nodeLocker.Unlock();
1724 				locker.Unlock();
1725 
1726 				// we've detached the locking from the vnode, so we can
1727 				// safely delete it
1728 				delete locking;
1729 			} else {
1730 				// the locking is in use again
1731 				nodeLocker.Unlock();
1732 				locker.Unlock();
1733 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1734 			}
1735 		}
1736 	}
1737 
1738 	return B_OK;
1739 }
1740 
1741 
1742 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1743 	will wait for the lock to become available, if there are any collisions
1744 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1745 
1746 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1747 	BSD flock() semantics are used, that is, all children can unlock the file
1748 	in question (we even allow parents to remove the lock, though, but that
1749 	seems to be in line to what the BSD's are doing).
1750 */
1751 static status_t
1752 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1753 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1754 {
1755 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1756 		vnode, flock, wait ? "yes" : "no"));
1757 	dprintf("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1758 		vnode, flock, wait ? "yes" : "no");
1759 
1760 	bool shared = flock->l_type == F_RDLCK;
1761 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1762 	status_t status = B_OK;
1763 
1764 	// TODO: do deadlock detection!
1765 
1766 	struct advisory_locking* locking;
1767 
1768 	while (true) {
1769 		// if this vnode has an advisory_locking structure attached,
1770 		// lock that one and search for any colliding file lock
1771 		status = create_advisory_locking(vnode);
1772 		if (status != B_OK)
1773 			return status;
1774 
1775 		locking = vnode->advisory_locking;
1776 		team_id team = team_get_current_team_id();
1777 		sem_id waitForLock = -1;
1778 
1779 		// test for collisions
1780 		LockList::Iterator iterator = locking->locks.GetIterator();
1781 		while (iterator.HasNext()) {
1782 			struct advisory_lock* lock = iterator.Next();
1783 
1784 			// TODO: locks from the same team might be joinable!
1785 			if ((lock->team != team || lock->bound_to != boundTo)
1786 					&& advisory_lock_intersects(lock, flock)) {
1787 				// locks do overlap
1788 				if (!shared || !lock->shared) {
1789 					// we need to wait
1790 					waitForLock = locking->wait_sem;
1791 					break;
1792 				}
1793 			}
1794 		}
1795 
1796 		if (waitForLock < 0)
1797 			break;
1798 
1799 		// We need to wait. Do that or fail now, if we've been asked not to.
1800 
1801 		if (!wait) {
1802 			put_advisory_locking(locking);
1803 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1804 		}
1805 
1806 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1807 			B_CAN_INTERRUPT, 0);
1808 		if (status != B_OK && status != B_BAD_SEM_ID)
1809 			return status;
1810 
1811 		// We have been notified, but we need to re-lock the locking object. So
1812 		// go another round...
1813 	}
1814 
1815 	// install new lock
1816 
1817 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1818 		sizeof(struct advisory_lock));
1819 	if (lock == NULL) {
1820 		put_advisory_locking(locking);
1821 		return B_NO_MEMORY;
1822 	}
1823 
1824 	lock->bound_to = boundTo;
1825 	lock->team = team_get_current_team_id();
1826 	lock->session = thread_get_current_thread()->team->session_id;
1827 	// values must already be normalized when getting here
1828 	lock->start = flock->l_start;
1829 	lock->end = flock->l_start - 1 + flock->l_len;
1830 	lock->shared = shared;
1831 
1832 	locking->locks.Add(lock);
1833 	put_advisory_locking(locking);
1834 
1835 	return status;
1836 }
1837 
1838 
1839 /*!	Normalizes the \a flock structure to make it easier to compare the
1840 	structure with others. The l_start and l_len fields are set to absolute
1841 	values according to the l_whence field.
1842 */
1843 static status_t
1844 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1845 {
1846 	switch (flock->l_whence) {
1847 		case SEEK_SET:
1848 			break;
1849 		case SEEK_CUR:
1850 			flock->l_start += descriptor->pos;
1851 			break;
1852 		case SEEK_END:
1853 		{
1854 			struct vnode* vnode = descriptor->u.vnode;
1855 			struct stat stat;
1856 			status_t status;
1857 
1858 			if (!HAS_FS_CALL(vnode, read_stat))
1859 				return B_UNSUPPORTED;
1860 
1861 			status = FS_CALL(vnode, read_stat, &stat);
1862 			if (status != B_OK)
1863 				return status;
1864 
1865 			flock->l_start += stat.st_size;
1866 			break;
1867 		}
1868 		default:
1869 			return B_BAD_VALUE;
1870 	}
1871 
1872 	if (flock->l_start < 0)
1873 		flock->l_start = 0;
1874 	if (flock->l_len == 0)
1875 		flock->l_len = OFF_MAX;
1876 
1877 	// don't let the offset and length overflow
1878 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1879 		flock->l_len = OFF_MAX - flock->l_start;
1880 
1881 	if (flock->l_len < 0) {
1882 		// a negative length reverses the region
1883 		flock->l_start += flock->l_len;
1884 		flock->l_len = -flock->l_len;
1885 	}
1886 
1887 	return B_OK;
1888 }
1889 
1890 
1891 static void
1892 replace_vnode_if_disconnected(struct fs_mount* mount,
1893 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1894 	struct vnode* fallBack, bool lockRootLock)
1895 {
1896 	struct vnode* givenVnode = vnode;
1897 	bool vnodeReplaced = false;
1898 
1899 	ReadLocker vnodeReadLocker(sVnodeLock);
1900 
1901 	if (lockRootLock)
1902 		mutex_lock(&sIOContextRootLock);
1903 
1904 	while (vnode != NULL && vnode->mount == mount
1905 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1906 		if (vnode->covers != NULL) {
1907 			// redirect the vnode to the covered vnode
1908 			vnode = vnode->covers;
1909 		} else
1910 			vnode = fallBack;
1911 
1912 		vnodeReplaced = true;
1913 	}
1914 
1915 	// If we've replaced the node, grab a reference for the new one.
1916 	if (vnodeReplaced && vnode != NULL)
1917 		inc_vnode_ref_count(vnode);
1918 
1919 	if (lockRootLock)
1920 		mutex_unlock(&sIOContextRootLock);
1921 
1922 	vnodeReadLocker.Unlock();
1923 
1924 	if (vnodeReplaced)
1925 		put_vnode(givenVnode);
1926 }
1927 
1928 
1929 /*!	Disconnects all file descriptors that are associated with the
1930 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1931 	\a mount object.
1932 
1933 	Note, after you've called this function, there might still be ongoing
1934 	accesses - they won't be interrupted if they already happened before.
1935 	However, any subsequent access will fail.
1936 
1937 	This is not a cheap function and should be used with care and rarely.
1938 	TODO: there is currently no means to stop a blocking read/write!
1939 */
1940 static void
1941 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1942 	struct vnode* vnodeToDisconnect)
1943 {
1944 	// iterate over all teams and peek into their file descriptors
1945 	TeamListIterator teamIterator;
1946 	while (Team* team = teamIterator.Next()) {
1947 		BReference<Team> teamReference(team, true);
1948 		TeamLocker teamLocker(team);
1949 
1950 		// lock the I/O context
1951 		io_context* context = team->io_context;
1952 		if (context == NULL)
1953 			continue;
1954 		MutexLocker contextLocker(context->io_mutex);
1955 
1956 		teamLocker.Unlock();
1957 
1958 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1959 			sRoot, true);
1960 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1961 			sRoot, false);
1962 
1963 		for (uint32 i = 0; i < context->table_size; i++) {
1964 			struct file_descriptor* descriptor = context->fds[i];
1965 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1966 				continue;
1967 
1968 			inc_fd_ref_count(descriptor);
1969 
1970 			// if this descriptor points at this mount, we
1971 			// need to disconnect it to be able to unmount
1972 			struct vnode* vnode = fd_vnode(descriptor);
1973 			if (vnodeToDisconnect != NULL) {
1974 				if (vnode == vnodeToDisconnect)
1975 					disconnect_fd(descriptor);
1976 			} else if ((vnode != NULL && vnode->mount == mount)
1977 				|| (vnode == NULL && descriptor->u.mount == mount))
1978 				disconnect_fd(descriptor);
1979 
1980 			put_fd(descriptor);
1981 		}
1982 	}
1983 }
1984 
1985 
1986 /*!	\brief Gets the root node of the current IO context.
1987 	If \a kernel is \c true, the kernel IO context will be used.
1988 	The caller obtains a reference to the returned node.
1989 */
1990 struct vnode*
1991 get_root_vnode(bool kernel)
1992 {
1993 	if (!kernel) {
1994 		// Get current working directory from io context
1995 		struct io_context* context = get_current_io_context(kernel);
1996 
1997 		mutex_lock(&sIOContextRootLock);
1998 
1999 		struct vnode* root = context->root;
2000 		if (root != NULL)
2001 			inc_vnode_ref_count(root);
2002 
2003 		mutex_unlock(&sIOContextRootLock);
2004 
2005 		if (root != NULL)
2006 			return root;
2007 
2008 		// That should never happen.
2009 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2010 			"have a root\n", team_get_current_team_id());
2011 	}
2012 
2013 	inc_vnode_ref_count(sRoot);
2014 	return sRoot;
2015 }
2016 
2017 
2018 /*!	\brief Gets the directory path and leaf name for a given path.
2019 
2020 	The supplied \a path is transformed to refer to the directory part of
2021 	the entry identified by the original path, and into the buffer \a filename
2022 	the leaf name of the original entry is written.
2023 	Neither the returned path nor the leaf name can be expected to be
2024 	canonical.
2025 
2026 	\param path The path to be analyzed. Must be able to store at least one
2027 		   additional character.
2028 	\param filename The buffer into which the leaf name will be written.
2029 		   Must be of size B_FILE_NAME_LENGTH at least.
2030 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2031 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2032 		   if the given path name is empty.
2033 */
2034 static status_t
2035 get_dir_path_and_leaf(char* path, char* filename)
2036 {
2037 	if (*path == '\0')
2038 		return B_ENTRY_NOT_FOUND;
2039 
2040 	char* last = strrchr(path, '/');
2041 		// '/' are not allowed in file names!
2042 
2043 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2044 
2045 	if (last == NULL) {
2046 		// this path is single segment with no '/' in it
2047 		// ex. "foo"
2048 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2049 			return B_NAME_TOO_LONG;
2050 
2051 		strcpy(path, ".");
2052 	} else {
2053 		last++;
2054 		if (last[0] == '\0') {
2055 			// special case: the path ends in one or more '/' - remove them
2056 			while (*--last == '/' && last != path);
2057 			last[1] = '\0';
2058 
2059 			if (last == path && last[0] == '/') {
2060 				// This path points to the root of the file system
2061 				strcpy(filename, ".");
2062 				return B_OK;
2063 			}
2064 			for (; last != path && *(last - 1) != '/'; last--);
2065 				// rewind to the start of the leaf before the '/'
2066 		}
2067 
2068 		// normal leaf: replace the leaf portion of the path with a '.'
2069 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2070 			return B_NAME_TOO_LONG;
2071 
2072 		last[0] = '.';
2073 		last[1] = '\0';
2074 	}
2075 	return B_OK;
2076 }
2077 
2078 
2079 static status_t
2080 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2081 	bool traverse, bool kernel, struct vnode** _vnode)
2082 {
2083 	char clonedName[B_FILE_NAME_LENGTH + 1];
2084 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2085 		return B_NAME_TOO_LONG;
2086 
2087 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2088 	struct vnode* directory;
2089 
2090 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2091 	if (status < 0)
2092 		return status;
2093 
2094 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2095 		_vnode, NULL);
2096 }
2097 
2098 
2099 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2100 	and returns the respective vnode.
2101 	On success a reference to the vnode is acquired for the caller.
2102 */
2103 static status_t
2104 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2105 {
2106 	ino_t id;
2107 	bool missing;
2108 
2109 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2110 		return missing ? B_ENTRY_NOT_FOUND
2111 			: get_vnode(dir->device, id, _vnode, true, false);
2112 	}
2113 
2114 	status_t status = FS_CALL(dir, lookup, name, &id);
2115 	if (status != B_OK)
2116 		return status;
2117 
2118 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2119 	// have a reference and just need to look the node up.
2120 	rw_lock_read_lock(&sVnodeLock);
2121 	*_vnode = lookup_vnode(dir->device, id);
2122 	rw_lock_read_unlock(&sVnodeLock);
2123 
2124 	if (*_vnode == NULL) {
2125 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2126 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2127 		return B_ENTRY_NOT_FOUND;
2128 	}
2129 
2130 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2131 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2132 //		(*_vnode)->mount->id, (*_vnode)->id);
2133 
2134 	return B_OK;
2135 }
2136 
2137 
2138 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2139 	\a path must not be NULL.
2140 	If it returns successfully, \a path contains the name of the last path
2141 	component. This function clobbers the buffer pointed to by \a path only
2142 	if it does contain more than one component.
2143 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2144 	it is successful or not!
2145 */
2146 static status_t
2147 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2148 	int count, struct io_context* ioContext, struct vnode** _vnode,
2149 	ino_t* _parentID)
2150 {
2151 	status_t status = B_OK;
2152 	ino_t lastParentID = vnode->id;
2153 
2154 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2155 
2156 	if (path == NULL) {
2157 		put_vnode(vnode);
2158 		return B_BAD_VALUE;
2159 	}
2160 
2161 	if (*path == '\0') {
2162 		put_vnode(vnode);
2163 		return B_ENTRY_NOT_FOUND;
2164 	}
2165 
2166 	while (true) {
2167 		struct vnode* nextVnode;
2168 		char* nextPath;
2169 
2170 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2171 			path));
2172 
2173 		// done?
2174 		if (path[0] == '\0')
2175 			break;
2176 
2177 		// walk to find the next path component ("path" will point to a single
2178 		// path component), and filter out multiple slashes
2179 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2180 				nextPath++);
2181 
2182 		if (*nextPath == '/') {
2183 			*nextPath = '\0';
2184 			do
2185 				nextPath++;
2186 			while (*nextPath == '/');
2187 		}
2188 
2189 		// See if the '..' is at a covering vnode move to the covered
2190 		// vnode so we pass the '..' path to the underlying filesystem.
2191 		// Also prevent breaking the root of the IO context.
2192 		if (strcmp("..", path) == 0) {
2193 			if (vnode == ioContext->root) {
2194 				// Attempted prison break! Keep it contained.
2195 				path = nextPath;
2196 				continue;
2197 			}
2198 
2199 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2200 				nextVnode = coveredVnode;
2201 				put_vnode(vnode);
2202 				vnode = nextVnode;
2203 			}
2204 		}
2205 
2206 		// check if vnode is really a directory
2207 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2208 			status = B_NOT_A_DIRECTORY;
2209 
2210 		// Check if we have the right to search the current directory vnode.
2211 		// If a file system doesn't have the access() function, we assume that
2212 		// searching a directory is always allowed
2213 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2214 			status = FS_CALL(vnode, access, X_OK);
2215 
2216 		// Tell the filesystem to get the vnode of this path component (if we
2217 		// got the permission from the call above)
2218 		if (status == B_OK)
2219 			status = lookup_dir_entry(vnode, path, &nextVnode);
2220 
2221 		if (status != B_OK) {
2222 			put_vnode(vnode);
2223 			return status;
2224 		}
2225 
2226 		// If the new node is a symbolic link, resolve it (if we've been told
2227 		// to do it)
2228 		if (S_ISLNK(nextVnode->Type())
2229 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2230 			size_t bufferSize;
2231 			char* buffer;
2232 
2233 			TRACE(("traverse link\n"));
2234 
2235 			// it's not exactly nice style using goto in this way, but hey,
2236 			// it works :-/
2237 			if (count + 1 > B_MAX_SYMLINKS) {
2238 				status = B_LINK_LIMIT;
2239 				goto resolve_link_error;
2240 			}
2241 
2242 			bufferSize = B_PATH_NAME_LENGTH;
2243 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2244 			if (buffer == NULL) {
2245 				status = B_NO_MEMORY;
2246 				goto resolve_link_error;
2247 			}
2248 
2249 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2250 				bufferSize--;
2251 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2252 				// null-terminate
2253 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2254 					buffer[bufferSize] = '\0';
2255 			} else
2256 				status = B_BAD_VALUE;
2257 
2258 			if (status != B_OK) {
2259 				free(buffer);
2260 
2261 		resolve_link_error:
2262 				put_vnode(vnode);
2263 				put_vnode(nextVnode);
2264 
2265 				return status;
2266 			}
2267 			put_vnode(nextVnode);
2268 
2269 			// Check if we start from the root directory or the current
2270 			// directory ("vnode" still points to that one).
2271 			// Cut off all leading slashes if it's the root directory
2272 			path = buffer;
2273 			bool absoluteSymlink = false;
2274 			if (path[0] == '/') {
2275 				// we don't need the old directory anymore
2276 				put_vnode(vnode);
2277 
2278 				while (*++path == '/')
2279 					;
2280 
2281 				mutex_lock(&sIOContextRootLock);
2282 				vnode = ioContext->root;
2283 				inc_vnode_ref_count(vnode);
2284 				mutex_unlock(&sIOContextRootLock);
2285 
2286 				absoluteSymlink = true;
2287 			}
2288 
2289 			inc_vnode_ref_count(vnode);
2290 				// balance the next recursion - we will decrement the
2291 				// ref_count of the vnode, no matter if we succeeded or not
2292 
2293 			if (absoluteSymlink && *path == '\0') {
2294 				// symlink was just "/"
2295 				nextVnode = vnode;
2296 			} else {
2297 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2298 					ioContext, &nextVnode, &lastParentID);
2299 			}
2300 
2301 			object_cache_free(sPathNameCache, buffer, 0);
2302 
2303 			if (status != B_OK) {
2304 				put_vnode(vnode);
2305 				return status;
2306 			}
2307 		} else
2308 			lastParentID = vnode->id;
2309 
2310 		// decrease the ref count on the old dir we just looked up into
2311 		put_vnode(vnode);
2312 
2313 		path = nextPath;
2314 		vnode = nextVnode;
2315 
2316 		// see if we hit a covered node
2317 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2318 			put_vnode(vnode);
2319 			vnode = coveringNode;
2320 		}
2321 	}
2322 
2323 	*_vnode = vnode;
2324 	if (_parentID)
2325 		*_parentID = lastParentID;
2326 
2327 	return B_OK;
2328 }
2329 
2330 
2331 static status_t
2332 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2333 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2334 {
2335 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2336 		get_current_io_context(kernel), _vnode, _parentID);
2337 }
2338 
2339 
2340 static status_t
2341 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2342 	ino_t* _parentID, bool kernel)
2343 {
2344 	struct vnode* start = NULL;
2345 
2346 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2347 
2348 	if (!path)
2349 		return B_BAD_VALUE;
2350 
2351 	if (*path == '\0')
2352 		return B_ENTRY_NOT_FOUND;
2353 
2354 	// figure out if we need to start at root or at cwd
2355 	if (*path == '/') {
2356 		if (sRoot == NULL) {
2357 			// we're a bit early, aren't we?
2358 			return B_ERROR;
2359 		}
2360 
2361 		while (*++path == '/')
2362 			;
2363 		start = get_root_vnode(kernel);
2364 
2365 		if (*path == '\0') {
2366 			*_vnode = start;
2367 			return B_OK;
2368 		}
2369 
2370 	} else {
2371 		struct io_context* context = get_current_io_context(kernel);
2372 
2373 		mutex_lock(&context->io_mutex);
2374 		start = context->cwd;
2375 		if (start != NULL)
2376 			inc_vnode_ref_count(start);
2377 		mutex_unlock(&context->io_mutex);
2378 
2379 		if (start == NULL)
2380 			return B_ERROR;
2381 	}
2382 
2383 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2384 		_parentID);
2385 }
2386 
2387 
2388 /*! Returns the vnode in the next to last segment of the path, and returns
2389 	the last portion in filename.
2390 	The path buffer must be able to store at least one additional character.
2391 */
2392 static status_t
2393 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2394 	bool kernel)
2395 {
2396 	status_t status = get_dir_path_and_leaf(path, filename);
2397 	if (status != B_OK)
2398 		return status;
2399 
2400 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2401 }
2402 
2403 
2404 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2405 		   to by a FD + path pair.
2406 
2407 	\a path must be given in either case. \a fd might be omitted, in which
2408 	case \a path is either an absolute path or one relative to the current
2409 	directory. If both a supplied and \a path is relative it is reckoned off
2410 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2411 	ignored.
2412 
2413 	The caller has the responsibility to call put_vnode() on the returned
2414 	directory vnode.
2415 
2416 	\param fd The FD. May be < 0.
2417 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2418 	       is modified by this function. It must have at least room for a
2419 	       string one character longer than the path it contains.
2420 	\param _vnode A pointer to a variable the directory vnode shall be written
2421 		   into.
2422 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2423 		   the leaf name of the specified entry will be written.
2424 	\param kernel \c true, if invoked from inside the kernel, \c false if
2425 		   invoked from userland.
2426 	\return \c B_OK, if everything went fine, another error code otherwise.
2427 */
2428 static status_t
2429 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2430 	char* filename, bool kernel)
2431 {
2432 	if (!path)
2433 		return B_BAD_VALUE;
2434 	if (*path == '\0')
2435 		return B_ENTRY_NOT_FOUND;
2436 	if (fd < 0)
2437 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2438 
2439 	status_t status = get_dir_path_and_leaf(path, filename);
2440 	if (status != B_OK)
2441 		return status;
2442 
2443 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2444 }
2445 
2446 
2447 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2448 		   to by a vnode + path pair.
2449 
2450 	\a path must be given in either case. \a vnode might be omitted, in which
2451 	case \a path is either an absolute path or one relative to the current
2452 	directory. If both a supplied and \a path is relative it is reckoned off
2453 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2454 	ignored.
2455 
2456 	The caller has the responsibility to call put_vnode() on the returned
2457 	directory vnode.
2458 
2459 	\param vnode The vnode. May be \c NULL.
2460 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2461 	       is modified by this function. It must have at least room for a
2462 	       string one character longer than the path it contains.
2463 	\param _vnode A pointer to a variable the directory vnode shall be written
2464 		   into.
2465 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2466 		   the leaf name of the specified entry will be written.
2467 	\param kernel \c true, if invoked from inside the kernel, \c false if
2468 		   invoked from userland.
2469 	\return \c B_OK, if everything went fine, another error code otherwise.
2470 */
2471 static status_t
2472 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2473 	struct vnode** _vnode, char* filename, bool kernel)
2474 {
2475 	if (!path)
2476 		return B_BAD_VALUE;
2477 	if (*path == '\0')
2478 		return B_ENTRY_NOT_FOUND;
2479 	if (vnode == NULL || path[0] == '/')
2480 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2481 
2482 	status_t status = get_dir_path_and_leaf(path, filename);
2483 	if (status != B_OK)
2484 		return status;
2485 
2486 	inc_vnode_ref_count(vnode);
2487 		// vnode_path_to_vnode() always decrements the ref count
2488 
2489 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2490 }
2491 
2492 
2493 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2494 */
2495 static status_t
2496 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2497 	size_t bufferSize, struct io_context* ioContext)
2498 {
2499 	if (bufferSize < sizeof(struct dirent))
2500 		return B_BAD_VALUE;
2501 
2502 	// See if the vnode is covering another vnode and move to the covered
2503 	// vnode so we get the underlying file system
2504 	VNodePutter vnodePutter;
2505 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2506 		vnode = coveredVnode;
2507 		vnodePutter.SetTo(vnode);
2508 	}
2509 
2510 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2511 		// The FS supports getting the name of a vnode.
2512 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2513 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2514 			return B_OK;
2515 	}
2516 
2517 	// The FS doesn't support getting the name of a vnode. So we search the
2518 	// parent directory for the vnode, if the caller let us.
2519 
2520 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2521 		return B_UNSUPPORTED;
2522 
2523 	void* cookie;
2524 
2525 	status_t status = FS_CALL(parent, open_dir, &cookie);
2526 	if (status >= B_OK) {
2527 		while (true) {
2528 			uint32 num = 1;
2529 			// We use the FS hook directly instead of dir_read(), since we don't
2530 			// want the entries to be fixed. We have already resolved vnode to
2531 			// the covered node.
2532 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2533 				&num);
2534 			if (status != B_OK)
2535 				break;
2536 			if (num == 0) {
2537 				status = B_ENTRY_NOT_FOUND;
2538 				break;
2539 			}
2540 
2541 			if (vnode->id == buffer->d_ino) {
2542 				// found correct entry!
2543 				break;
2544 			}
2545 		}
2546 
2547 		FS_CALL(parent, close_dir, cookie);
2548 		FS_CALL(parent, free_dir_cookie, cookie);
2549 	}
2550 	return status;
2551 }
2552 
2553 
2554 static status_t
2555 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2556 	size_t nameSize, bool kernel)
2557 {
2558 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2559 	struct dirent* dirent = (struct dirent*)buffer;
2560 
2561 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2562 		get_current_io_context(kernel));
2563 	if (status != B_OK)
2564 		return status;
2565 
2566 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2567 		return B_BUFFER_OVERFLOW;
2568 
2569 	return B_OK;
2570 }
2571 
2572 
2573 /*!	Gets the full path to a given directory vnode.
2574 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2575 	file system doesn't support this call, it will fall back to iterating
2576 	through the parent directory to get the name of the child.
2577 
2578 	To protect against circular loops, it supports a maximum tree depth
2579 	of 256 levels.
2580 
2581 	Note that the path may not be correct the time this function returns!
2582 	It doesn't use any locking to prevent returning the correct path, as
2583 	paths aren't safe anyway: the path to a file can change at any time.
2584 
2585 	It might be a good idea, though, to check if the returned path exists
2586 	in the calling function (it's not done here because of efficiency)
2587 */
2588 static status_t
2589 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2590 	bool kernel)
2591 {
2592 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2593 
2594 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2595 		return B_BAD_VALUE;
2596 
2597 	if (!S_ISDIR(vnode->Type()))
2598 		return B_NOT_A_DIRECTORY;
2599 
2600 	char* path = buffer;
2601 	int32 insert = bufferSize;
2602 	int32 maxLevel = 256;
2603 	int32 length;
2604 	status_t status = B_OK;
2605 	struct io_context* ioContext = get_current_io_context(kernel);
2606 
2607 	// we don't use get_vnode() here because this call is more
2608 	// efficient and does all we need from get_vnode()
2609 	inc_vnode_ref_count(vnode);
2610 
2611 	path[--insert] = '\0';
2612 		// the path is filled right to left
2613 
2614 	while (true) {
2615 		// If the node is the context's root, bail out. Otherwise resolve mount
2616 		// points.
2617 		if (vnode == ioContext->root)
2618 			break;
2619 
2620 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2621 			put_vnode(vnode);
2622 			vnode = coveredVnode;
2623 		}
2624 
2625 		// lookup the parent vnode
2626 		struct vnode* parentVnode;
2627 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2628 		if (status != B_OK)
2629 			goto out;
2630 
2631 		if (parentVnode == vnode) {
2632 			// The caller apparently got their hands on a node outside of their
2633 			// context's root. Now we've hit the global root.
2634 			put_vnode(parentVnode);
2635 			break;
2636 		}
2637 
2638 		// get the node's name
2639 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2640 			// also used for fs_read_dir()
2641 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2642 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2643 			sizeof(nameBuffer), ioContext);
2644 
2645 		// release the current vnode, we only need its parent from now on
2646 		put_vnode(vnode);
2647 		vnode = parentVnode;
2648 
2649 		if (status != B_OK)
2650 			goto out;
2651 
2652 		// TODO: add an explicit check for loops in about 10 levels to do
2653 		// real loop detection
2654 
2655 		// don't go deeper as 'maxLevel' to prevent circular loops
2656 		if (maxLevel-- < 0) {
2657 			status = B_LINK_LIMIT;
2658 			goto out;
2659 		}
2660 
2661 		// add the name in front of the current path
2662 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2663 		length = strlen(name);
2664 		insert -= length;
2665 		if (insert <= 0) {
2666 			status = B_RESULT_NOT_REPRESENTABLE;
2667 			goto out;
2668 		}
2669 		memcpy(path + insert, name, length);
2670 		path[--insert] = '/';
2671 	}
2672 
2673 	// the root dir will result in an empty path: fix it
2674 	if (path[insert] == '\0')
2675 		path[--insert] = '/';
2676 
2677 	TRACE(("  path is: %s\n", path + insert));
2678 
2679 	// move the path to the start of the buffer
2680 	length = bufferSize - insert;
2681 	memmove(buffer, path + insert, length);
2682 
2683 out:
2684 	put_vnode(vnode);
2685 	return status;
2686 }
2687 
2688 
2689 /*!	Checks the length of every path component, and adds a '.'
2690 	if the path ends in a slash.
2691 	The given path buffer must be able to store at least one
2692 	additional character.
2693 */
2694 static status_t
2695 check_path(char* to)
2696 {
2697 	int32 length = 0;
2698 
2699 	// check length of every path component
2700 
2701 	while (*to) {
2702 		char* begin;
2703 		if (*to == '/')
2704 			to++, length++;
2705 
2706 		begin = to;
2707 		while (*to != '/' && *to)
2708 			to++, length++;
2709 
2710 		if (to - begin > B_FILE_NAME_LENGTH)
2711 			return B_NAME_TOO_LONG;
2712 	}
2713 
2714 	if (length == 0)
2715 		return B_ENTRY_NOT_FOUND;
2716 
2717 	// complete path if there is a slash at the end
2718 
2719 	if (*(to - 1) == '/') {
2720 		if (length > B_PATH_NAME_LENGTH - 2)
2721 			return B_NAME_TOO_LONG;
2722 
2723 		to[0] = '.';
2724 		to[1] = '\0';
2725 	}
2726 
2727 	return B_OK;
2728 }
2729 
2730 
2731 static struct file_descriptor*
2732 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2733 {
2734 	struct file_descriptor* descriptor
2735 		= get_fd(get_current_io_context(kernel), fd);
2736 	if (descriptor == NULL)
2737 		return NULL;
2738 
2739 	struct vnode* vnode = fd_vnode(descriptor);
2740 	if (vnode == NULL) {
2741 		put_fd(descriptor);
2742 		return NULL;
2743 	}
2744 
2745 	// ToDo: when we can close a file descriptor at any point, investigate
2746 	//	if this is still valid to do (accessing the vnode without ref_count
2747 	//	or locking)
2748 	*_vnode = vnode;
2749 	return descriptor;
2750 }
2751 
2752 
2753 static struct vnode*
2754 get_vnode_from_fd(int fd, bool kernel)
2755 {
2756 	struct file_descriptor* descriptor;
2757 	struct vnode* vnode;
2758 
2759 	descriptor = get_fd(get_current_io_context(kernel), fd);
2760 	if (descriptor == NULL)
2761 		return NULL;
2762 
2763 	vnode = fd_vnode(descriptor);
2764 	if (vnode != NULL)
2765 		inc_vnode_ref_count(vnode);
2766 
2767 	put_fd(descriptor);
2768 	return vnode;
2769 }
2770 
2771 
2772 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2773 	only the path will be considered. In this case, the \a path must not be
2774 	NULL.
2775 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2776 	and should be NULL for files.
2777 */
2778 static status_t
2779 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2780 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2781 {
2782 	if (fd < 0 && !path)
2783 		return B_BAD_VALUE;
2784 
2785 	if (path != NULL && *path == '\0')
2786 		return B_ENTRY_NOT_FOUND;
2787 
2788 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2789 		// no FD or absolute path
2790 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2791 	}
2792 
2793 	// FD only, or FD + relative path
2794 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2795 	if (vnode == NULL)
2796 		return B_FILE_ERROR;
2797 
2798 	if (path != NULL) {
2799 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2800 			_vnode, _parentID);
2801 	}
2802 
2803 	// there is no relative path to take into account
2804 
2805 	*_vnode = vnode;
2806 	if (_parentID)
2807 		*_parentID = -1;
2808 
2809 	return B_OK;
2810 }
2811 
2812 
2813 static int
2814 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2815 	void* cookie, int openMode, bool kernel)
2816 {
2817 	struct file_descriptor* descriptor;
2818 	int fd;
2819 
2820 	// If the vnode is locked, we don't allow creating a new file/directory
2821 	// file_descriptor for it
2822 	if (vnode && vnode->mandatory_locked_by != NULL
2823 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2824 		return B_BUSY;
2825 
2826 	descriptor = alloc_fd();
2827 	if (!descriptor)
2828 		return B_NO_MEMORY;
2829 
2830 	if (vnode)
2831 		descriptor->u.vnode = vnode;
2832 	else
2833 		descriptor->u.mount = mount;
2834 	descriptor->cookie = cookie;
2835 
2836 	switch (type) {
2837 		// vnode types
2838 		case FDTYPE_FILE:
2839 			descriptor->ops = &sFileOps;
2840 			break;
2841 		case FDTYPE_DIR:
2842 			descriptor->ops = &sDirectoryOps;
2843 			break;
2844 		case FDTYPE_ATTR:
2845 			descriptor->ops = &sAttributeOps;
2846 			break;
2847 		case FDTYPE_ATTR_DIR:
2848 			descriptor->ops = &sAttributeDirectoryOps;
2849 			break;
2850 
2851 		// mount types
2852 		case FDTYPE_INDEX_DIR:
2853 			descriptor->ops = &sIndexDirectoryOps;
2854 			break;
2855 		case FDTYPE_QUERY:
2856 			descriptor->ops = &sQueryOps;
2857 			break;
2858 
2859 		default:
2860 			panic("get_new_fd() called with unknown type %d\n", type);
2861 			break;
2862 	}
2863 	descriptor->type = type;
2864 	descriptor->open_mode = openMode;
2865 
2866 	io_context* context = get_current_io_context(kernel);
2867 	fd = new_fd(context, descriptor);
2868 	if (fd < 0) {
2869 		descriptor->ops = NULL;
2870 		put_fd(descriptor);
2871 		return B_NO_MORE_FDS;
2872 	}
2873 
2874 	mutex_lock(&context->io_mutex);
2875 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2876 	mutex_unlock(&context->io_mutex);
2877 
2878 	return fd;
2879 }
2880 
2881 
2882 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2883 	vfs_normalize_path(). See there for more documentation.
2884 */
2885 static status_t
2886 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2887 {
2888 	VNodePutter dirPutter;
2889 	struct vnode* dir = NULL;
2890 	status_t error;
2891 
2892 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2893 		// get dir vnode + leaf name
2894 		struct vnode* nextDir;
2895 		char leaf[B_FILE_NAME_LENGTH];
2896 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2897 		if (error != B_OK)
2898 			return error;
2899 
2900 		dir = nextDir;
2901 		strcpy(path, leaf);
2902 		dirPutter.SetTo(dir);
2903 
2904 		// get file vnode, if we shall resolve links
2905 		bool fileExists = false;
2906 		struct vnode* fileVnode;
2907 		VNodePutter fileVnodePutter;
2908 		if (traverseLink) {
2909 			inc_vnode_ref_count(dir);
2910 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2911 					NULL) == B_OK) {
2912 				fileVnodePutter.SetTo(fileVnode);
2913 				fileExists = true;
2914 			}
2915 		}
2916 
2917 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2918 			// we're done -- construct the path
2919 			bool hasLeaf = true;
2920 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2921 				// special cases "." and ".." -- get the dir, forget the leaf
2922 				inc_vnode_ref_count(dir);
2923 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2924 					&nextDir, NULL);
2925 				if (error != B_OK)
2926 					return error;
2927 				dir = nextDir;
2928 				dirPutter.SetTo(dir);
2929 				hasLeaf = false;
2930 			}
2931 
2932 			// get the directory path
2933 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2934 			if (error != B_OK)
2935 				return error;
2936 
2937 			// append the leaf name
2938 			if (hasLeaf) {
2939 				// insert a directory separator if this is not the file system
2940 				// root
2941 				if ((strcmp(path, "/") != 0
2942 					&& strlcat(path, "/", pathSize) >= pathSize)
2943 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2944 					return B_NAME_TOO_LONG;
2945 				}
2946 			}
2947 
2948 			return B_OK;
2949 		}
2950 
2951 		// read link
2952 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2953 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2954 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2955 			if (error != B_OK)
2956 				return error;
2957 			if (bufferSize < B_PATH_NAME_LENGTH)
2958 				path[bufferSize] = '\0';
2959 		} else
2960 			return B_BAD_VALUE;
2961 	}
2962 
2963 	return B_LINK_LIMIT;
2964 }
2965 
2966 
2967 static status_t
2968 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2969 	struct io_context* ioContext)
2970 {
2971 	// Make sure the IO context root is not bypassed.
2972 	if (parent == ioContext->root) {
2973 		*_device = parent->device;
2974 		*_node = parent->id;
2975 		return B_OK;
2976 	}
2977 
2978 	inc_vnode_ref_count(parent);
2979 		// vnode_path_to_vnode() puts the node
2980 
2981 	// ".." is guaranteed not to be clobbered by this call
2982 	struct vnode* vnode;
2983 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2984 		ioContext, &vnode, NULL);
2985 	if (status == B_OK) {
2986 		*_device = vnode->device;
2987 		*_node = vnode->id;
2988 		put_vnode(vnode);
2989 	}
2990 
2991 	return status;
2992 }
2993 
2994 
2995 #ifdef ADD_DEBUGGER_COMMANDS
2996 
2997 
2998 static void
2999 _dump_advisory_locking(advisory_locking* locking)
3000 {
3001 	if (locking == NULL)
3002 		return;
3003 
3004 	kprintf("   lock:        %" B_PRId32, locking->lock);
3005 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
3006 
3007 	int32 index = 0;
3008 	LockList::Iterator iterator = locking->locks.GetIterator();
3009 	while (iterator.HasNext()) {
3010 		struct advisory_lock* lock = iterator.Next();
3011 
3012 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3013 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3014 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3015 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3016 	}
3017 }
3018 
3019 
3020 static void
3021 _dump_mount(struct fs_mount* mount)
3022 {
3023 	kprintf("MOUNT: %p\n", mount);
3024 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3025 	kprintf(" device_name:   %s\n", mount->device_name);
3026 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3027 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3028 	kprintf(" partition:     %p\n", mount->partition);
3029 	kprintf(" lock:          %p\n", &mount->lock);
3030 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3031 		mount->owns_file_device ? " owns_file_device" : "");
3032 
3033 	fs_volume* volume = mount->volume;
3034 	while (volume != NULL) {
3035 		kprintf(" volume %p:\n", volume);
3036 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3037 		kprintf("  private_volume:   %p\n", volume->private_volume);
3038 		kprintf("  ops:              %p\n", volume->ops);
3039 		kprintf("  file_system:      %p\n", volume->file_system);
3040 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3041 		volume = volume->super_volume;
3042 	}
3043 
3044 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3045 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3046 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3047 	set_debug_variable("_partition", (addr_t)mount->partition);
3048 }
3049 
3050 
3051 static bool
3052 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3053 	const char* name)
3054 {
3055 	bool insertSlash = buffer[bufferSize] != '\0';
3056 	size_t nameLength = strlen(name);
3057 
3058 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3059 		return false;
3060 
3061 	if (insertSlash)
3062 		buffer[--bufferSize] = '/';
3063 
3064 	bufferSize -= nameLength;
3065 	memcpy(buffer + bufferSize, name, nameLength);
3066 
3067 	return true;
3068 }
3069 
3070 
3071 static bool
3072 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3073 	ino_t nodeID)
3074 {
3075 	if (bufferSize == 0)
3076 		return false;
3077 
3078 	bool insertSlash = buffer[bufferSize] != '\0';
3079 	if (insertSlash)
3080 		buffer[--bufferSize] = '/';
3081 
3082 	size_t size = snprintf(buffer, bufferSize,
3083 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3084 	if (size > bufferSize) {
3085 		if (insertSlash)
3086 			bufferSize++;
3087 		return false;
3088 	}
3089 
3090 	if (size < bufferSize)
3091 		memmove(buffer + bufferSize - size, buffer, size);
3092 
3093 	bufferSize -= size;
3094 	return true;
3095 }
3096 
3097 
3098 static char*
3099 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3100 	bool& _truncated)
3101 {
3102 	// null-terminate the path
3103 	buffer[--bufferSize] = '\0';
3104 
3105 	while (true) {
3106 		while (vnode->covers != NULL)
3107 			vnode = vnode->covers;
3108 
3109 		if (vnode == sRoot) {
3110 			_truncated = bufferSize == 0;
3111 			if (!_truncated)
3112 				buffer[--bufferSize] = '/';
3113 			return buffer + bufferSize;
3114 		}
3115 
3116 		// resolve the name
3117 		ino_t dirID;
3118 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3119 			vnode->id, dirID);
3120 		if (name == NULL) {
3121 			// Failed to resolve the name -- prepend "<dev,node>/".
3122 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3123 				vnode->mount->id, vnode->id);
3124 			return buffer + bufferSize;
3125 		}
3126 
3127 		// prepend the name
3128 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3129 			_truncated = true;
3130 			return buffer + bufferSize;
3131 		}
3132 
3133 		// resolve the directory node
3134 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3135 		if (nextVnode == NULL) {
3136 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3137 				vnode->mount->id, dirID);
3138 			return buffer + bufferSize;
3139 		}
3140 
3141 		vnode = nextVnode;
3142 	}
3143 }
3144 
3145 
3146 static void
3147 _dump_vnode(struct vnode* vnode, bool printPath)
3148 {
3149 	kprintf("VNODE: %p\n", vnode);
3150 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3151 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3152 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3153 	kprintf(" private_node:  %p\n", vnode->private_node);
3154 	kprintf(" mount:         %p\n", vnode->mount);
3155 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3156 	kprintf(" covers:        %p\n", vnode->covers);
3157 	kprintf(" cache:         %p\n", vnode->cache);
3158 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3159 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3160 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3161 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3162 
3163 	_dump_advisory_locking(vnode->advisory_locking);
3164 
3165 	if (printPath) {
3166 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3167 		if (buffer != NULL) {
3168 			bool truncated;
3169 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3170 				B_PATH_NAME_LENGTH, truncated);
3171 			if (path != NULL) {
3172 				kprintf(" path:          ");
3173 				if (truncated)
3174 					kputs("<truncated>/");
3175 				kputs(path);
3176 				kputs("\n");
3177 			} else
3178 				kprintf("Failed to resolve vnode path.\n");
3179 
3180 			debug_free(buffer);
3181 		} else
3182 			kprintf("Failed to allocate memory for constructing the path.\n");
3183 	}
3184 
3185 	set_debug_variable("_node", (addr_t)vnode->private_node);
3186 	set_debug_variable("_mount", (addr_t)vnode->mount);
3187 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3188 	set_debug_variable("_covers", (addr_t)vnode->covers);
3189 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3190 }
3191 
3192 
3193 static int
3194 dump_mount(int argc, char** argv)
3195 {
3196 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3197 		kprintf("usage: %s [id|address]\n", argv[0]);
3198 		return 0;
3199 	}
3200 
3201 	ulong val = parse_expression(argv[1]);
3202 	uint32 id = val;
3203 
3204 	struct fs_mount* mount = sMountsTable->Lookup(id);
3205 	if (mount == NULL) {
3206 		if (IS_USER_ADDRESS(id)) {
3207 			kprintf("fs_mount not found\n");
3208 			return 0;
3209 		}
3210 		mount = (fs_mount*)val;
3211 	}
3212 
3213 	_dump_mount(mount);
3214 	return 0;
3215 }
3216 
3217 
3218 static int
3219 dump_mounts(int argc, char** argv)
3220 {
3221 	if (argc != 1) {
3222 		kprintf("usage: %s\n", argv[0]);
3223 		return 0;
3224 	}
3225 
3226 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3227 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3228 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3229 
3230 	struct fs_mount* mount;
3231 
3232 	MountTable::Iterator iterator(sMountsTable);
3233 	while (iterator.HasNext()) {
3234 		mount = iterator.Next();
3235 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3236 			mount->root_vnode->covers, mount->volume->private_volume,
3237 			mount->volume->file_system_name);
3238 
3239 		fs_volume* volume = mount->volume;
3240 		while (volume->super_volume != NULL) {
3241 			volume = volume->super_volume;
3242 			kprintf("                                     %p %s\n",
3243 				volume->private_volume, volume->file_system_name);
3244 		}
3245 	}
3246 
3247 	return 0;
3248 }
3249 
3250 
3251 static int
3252 dump_vnode(int argc, char** argv)
3253 {
3254 	bool printPath = false;
3255 	int argi = 1;
3256 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3257 		printPath = true;
3258 		argi++;
3259 	}
3260 
3261 	if (argi >= argc || argi + 2 < argc) {
3262 		print_debugger_command_usage(argv[0]);
3263 		return 0;
3264 	}
3265 
3266 	struct vnode* vnode = NULL;
3267 
3268 	if (argi + 1 == argc) {
3269 		vnode = (struct vnode*)parse_expression(argv[argi]);
3270 		if (IS_USER_ADDRESS(vnode)) {
3271 			kprintf("invalid vnode address\n");
3272 			return 0;
3273 		}
3274 		_dump_vnode(vnode, printPath);
3275 		return 0;
3276 	}
3277 
3278 	dev_t device = parse_expression(argv[argi]);
3279 	ino_t id = parse_expression(argv[argi + 1]);
3280 
3281 	VnodeTable::Iterator iterator(sVnodeTable);
3282 	while (iterator.HasNext()) {
3283 		vnode = iterator.Next();
3284 		if (vnode->id != id || vnode->device != device)
3285 			continue;
3286 
3287 		_dump_vnode(vnode, printPath);
3288 	}
3289 
3290 	return 0;
3291 }
3292 
3293 
3294 static int
3295 dump_vnodes(int argc, char** argv)
3296 {
3297 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3298 		kprintf("usage: %s [device]\n", argv[0]);
3299 		return 0;
3300 	}
3301 
3302 	// restrict dumped nodes to a certain device if requested
3303 	dev_t device = parse_expression(argv[1]);
3304 
3305 	struct vnode* vnode;
3306 
3307 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3308 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3309 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3310 
3311 	VnodeTable::Iterator iterator(sVnodeTable);
3312 	while (iterator.HasNext()) {
3313 		vnode = iterator.Next();
3314 		if (vnode->device != device)
3315 			continue;
3316 
3317 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3318 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3319 			vnode->private_node, vnode->advisory_locking,
3320 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3321 			vnode->IsUnpublished() ? "u" : "-");
3322 	}
3323 
3324 	return 0;
3325 }
3326 
3327 
3328 static int
3329 dump_vnode_caches(int argc, char** argv)
3330 {
3331 	struct vnode* vnode;
3332 
3333 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3334 		kprintf("usage: %s [device]\n", argv[0]);
3335 		return 0;
3336 	}
3337 
3338 	// restrict dumped nodes to a certain device if requested
3339 	dev_t device = -1;
3340 	if (argc > 1)
3341 		device = parse_expression(argv[1]);
3342 
3343 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3344 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3345 
3346 	VnodeTable::Iterator iterator(sVnodeTable);
3347 	while (iterator.HasNext()) {
3348 		vnode = iterator.Next();
3349 		if (vnode->cache == NULL)
3350 			continue;
3351 		if (device != -1 && vnode->device != device)
3352 			continue;
3353 
3354 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3355 			vnode, vnode->device, vnode->id, vnode->cache,
3356 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3357 			vnode->cache->page_count);
3358 	}
3359 
3360 	return 0;
3361 }
3362 
3363 
3364 int
3365 dump_io_context(int argc, char** argv)
3366 {
3367 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3368 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3369 		return 0;
3370 	}
3371 
3372 	struct io_context* context = NULL;
3373 
3374 	if (argc > 1) {
3375 		ulong num = parse_expression(argv[1]);
3376 		if (IS_KERNEL_ADDRESS(num))
3377 			context = (struct io_context*)num;
3378 		else {
3379 			Team* team = team_get_team_struct_locked(num);
3380 			if (team == NULL) {
3381 				kprintf("could not find team with ID %lu\n", num);
3382 				return 0;
3383 			}
3384 			context = (struct io_context*)team->io_context;
3385 		}
3386 	} else
3387 		context = get_current_io_context(true);
3388 
3389 	kprintf("I/O CONTEXT: %p\n", context);
3390 	kprintf(" root vnode:\t%p\n", context->root);
3391 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3392 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3393 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3394 
3395 	if (context->num_used_fds) {
3396 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3397 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3398 	}
3399 
3400 	for (uint32 i = 0; i < context->table_size; i++) {
3401 		struct file_descriptor* fd = context->fds[i];
3402 		if (fd == NULL)
3403 			continue;
3404 
3405 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3406 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3407 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3408 			fd->pos, fd->cookie,
3409 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3410 				? "mount" : "vnode",
3411 			fd->u.vnode);
3412 	}
3413 
3414 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3415 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3416 
3417 	set_debug_variable("_cwd", (addr_t)context->cwd);
3418 
3419 	return 0;
3420 }
3421 
3422 
3423 int
3424 dump_vnode_usage(int argc, char** argv)
3425 {
3426 	if (argc != 1) {
3427 		kprintf("usage: %s\n", argv[0]);
3428 		return 0;
3429 	}
3430 
3431 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3432 		sUnusedVnodes, kMaxUnusedVnodes);
3433 
3434 	uint32 count = sVnodeTable->CountElements();
3435 
3436 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3437 		count - sUnusedVnodes);
3438 	return 0;
3439 }
3440 
3441 #endif	// ADD_DEBUGGER_COMMANDS
3442 
3443 
3444 /*!	Clears memory specified by an iovec array.
3445 */
3446 static void
3447 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3448 {
3449 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3450 		size_t length = std::min(vecs[i].iov_len, bytes);
3451 		memset(vecs[i].iov_base, 0, length);
3452 		bytes -= length;
3453 	}
3454 }
3455 
3456 
3457 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3458 	and calls the file system hooks to read/write the request to disk.
3459 */
3460 static status_t
3461 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3462 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3463 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3464 	bool doWrite)
3465 {
3466 	if (fileVecCount == 0) {
3467 		// There are no file vecs at this offset, so we're obviously trying
3468 		// to access the file outside of its bounds
3469 		return B_BAD_VALUE;
3470 	}
3471 
3472 	size_t numBytes = *_numBytes;
3473 	uint32 fileVecIndex;
3474 	size_t vecOffset = *_vecOffset;
3475 	uint32 vecIndex = *_vecIndex;
3476 	status_t status;
3477 	size_t size;
3478 
3479 	if (!doWrite && vecOffset == 0) {
3480 		// now directly read the data from the device
3481 		// the first file_io_vec can be read directly
3482 
3483 		if (fileVecs[0].length < (off_t)numBytes)
3484 			size = fileVecs[0].length;
3485 		else
3486 			size = numBytes;
3487 
3488 		if (fileVecs[0].offset >= 0) {
3489 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3490 				&vecs[vecIndex], vecCount - vecIndex, &size);
3491 		} else {
3492 			// sparse read
3493 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3494 			status = B_OK;
3495 		}
3496 		if (status != B_OK)
3497 			return status;
3498 
3499 		// TODO: this is a work-around for buggy device drivers!
3500 		//	When our own drivers honour the length, we can:
3501 		//	a) also use this direct I/O for writes (otherwise, it would
3502 		//	   overwrite precious data)
3503 		//	b) panic if the term below is true (at least for writes)
3504 		if ((off_t)size > fileVecs[0].length) {
3505 			//dprintf("warning: device driver %p doesn't respect total length "
3506 			//	"in read_pages() call!\n", ref->device);
3507 			size = fileVecs[0].length;
3508 		}
3509 
3510 		ASSERT((off_t)size <= fileVecs[0].length);
3511 
3512 		// If the file portion was contiguous, we're already done now
3513 		if (size == numBytes)
3514 			return B_OK;
3515 
3516 		// if we reached the end of the file, we can return as well
3517 		if ((off_t)size != fileVecs[0].length) {
3518 			*_numBytes = size;
3519 			return B_OK;
3520 		}
3521 
3522 		fileVecIndex = 1;
3523 
3524 		// first, find out where we have to continue in our iovecs
3525 		for (; vecIndex < vecCount; vecIndex++) {
3526 			if (size < vecs[vecIndex].iov_len)
3527 				break;
3528 
3529 			size -= vecs[vecIndex].iov_len;
3530 		}
3531 
3532 		vecOffset = size;
3533 	} else {
3534 		fileVecIndex = 0;
3535 		size = 0;
3536 	}
3537 
3538 	// Too bad, let's process the rest of the file_io_vecs
3539 
3540 	size_t totalSize = size;
3541 	size_t bytesLeft = numBytes - size;
3542 
3543 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3544 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3545 		off_t fileOffset = fileVec.offset;
3546 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3547 
3548 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3549 			fileLeft));
3550 
3551 		// process the complete fileVec
3552 		while (fileLeft > 0) {
3553 			iovec tempVecs[MAX_TEMP_IO_VECS];
3554 			uint32 tempCount = 0;
3555 
3556 			// size tracks how much of what is left of the current fileVec
3557 			// (fileLeft) has been assigned to tempVecs
3558 			size = 0;
3559 
3560 			// assign what is left of the current fileVec to the tempVecs
3561 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3562 					&& tempCount < MAX_TEMP_IO_VECS;) {
3563 				// try to satisfy one iovec per iteration (or as much as
3564 				// possible)
3565 
3566 				// bytes left of the current iovec
3567 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3568 				if (vecLeft == 0) {
3569 					vecOffset = 0;
3570 					vecIndex++;
3571 					continue;
3572 				}
3573 
3574 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3575 					vecIndex, vecOffset, size));
3576 
3577 				// actually available bytes
3578 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3579 
3580 				tempVecs[tempCount].iov_base
3581 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3582 				tempVecs[tempCount].iov_len = tempVecSize;
3583 				tempCount++;
3584 
3585 				size += tempVecSize;
3586 				vecOffset += tempVecSize;
3587 			}
3588 
3589 			size_t bytes = size;
3590 
3591 			if (fileOffset == -1) {
3592 				if (doWrite) {
3593 					panic("sparse write attempt: vnode %p", vnode);
3594 					status = B_IO_ERROR;
3595 				} else {
3596 					// sparse read
3597 					zero_iovecs(tempVecs, tempCount, bytes);
3598 					status = B_OK;
3599 				}
3600 			} else if (doWrite) {
3601 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3602 					tempVecs, tempCount, &bytes);
3603 			} else {
3604 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3605 					tempVecs, tempCount, &bytes);
3606 			}
3607 			if (status != B_OK)
3608 				return status;
3609 
3610 			totalSize += bytes;
3611 			bytesLeft -= size;
3612 			if (fileOffset >= 0)
3613 				fileOffset += size;
3614 			fileLeft -= size;
3615 			//dprintf("-> file left = %Lu\n", fileLeft);
3616 
3617 			if (size != bytes || vecIndex >= vecCount) {
3618 				// there are no more bytes or iovecs, let's bail out
3619 				*_numBytes = totalSize;
3620 				return B_OK;
3621 			}
3622 		}
3623 	}
3624 
3625 	*_vecIndex = vecIndex;
3626 	*_vecOffset = vecOffset;
3627 	*_numBytes = totalSize;
3628 	return B_OK;
3629 }
3630 
3631 
3632 static bool
3633 is_user_in_group(gid_t gid)
3634 {
3635 	if (gid == getegid())
3636 		return true;
3637 
3638 	gid_t groups[NGROUPS_MAX];
3639 	int groupCount = getgroups(NGROUPS_MAX, groups);
3640 	for (int i = 0; i < groupCount; i++) {
3641 		if (gid == groups[i])
3642 			return true;
3643 	}
3644 
3645 	return false;
3646 }
3647 
3648 
3649 static status_t
3650 free_io_context(io_context* context)
3651 {
3652 	uint32 i;
3653 
3654 	TIOC(FreeIOContext(context));
3655 
3656 	if (context->root)
3657 		put_vnode(context->root);
3658 
3659 	if (context->cwd)
3660 		put_vnode(context->cwd);
3661 
3662 	mutex_lock(&context->io_mutex);
3663 
3664 	for (i = 0; i < context->table_size; i++) {
3665 		if (struct file_descriptor* descriptor = context->fds[i]) {
3666 			close_fd(context, descriptor);
3667 			put_fd(descriptor);
3668 		}
3669 	}
3670 
3671 	mutex_destroy(&context->io_mutex);
3672 
3673 	remove_node_monitors(context);
3674 	free(context->fds);
3675 	free(context);
3676 
3677 	return B_OK;
3678 }
3679 
3680 
3681 static status_t
3682 resize_monitor_table(struct io_context* context, const int newSize)
3683 {
3684 	int	status = B_OK;
3685 
3686 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3687 		return B_BAD_VALUE;
3688 
3689 	mutex_lock(&context->io_mutex);
3690 
3691 	if ((size_t)newSize < context->num_monitors) {
3692 		status = B_BUSY;
3693 		goto out;
3694 	}
3695 	context->max_monitors = newSize;
3696 
3697 out:
3698 	mutex_unlock(&context->io_mutex);
3699 	return status;
3700 }
3701 
3702 
3703 //	#pragma mark - public API for file systems
3704 
3705 
3706 extern "C" status_t
3707 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3708 	fs_vnode_ops* ops)
3709 {
3710 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3711 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3712 
3713 	if (privateNode == NULL)
3714 		return B_BAD_VALUE;
3715 
3716 	int32 tries = BUSY_VNODE_RETRIES;
3717 restart:
3718 	// create the node
3719 	bool nodeCreated;
3720 	struct vnode* vnode;
3721 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3722 		nodeCreated);
3723 	if (status != B_OK)
3724 		return status;
3725 
3726 	WriteLocker nodeLocker(sVnodeLock, true);
3727 		// create_new_vnode_and_lock() has locked for us
3728 
3729 	if (!nodeCreated && vnode->IsBusy()) {
3730 		nodeLocker.Unlock();
3731 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3732 			return B_BUSY;
3733 		goto restart;
3734 	}
3735 
3736 	// file system integrity check:
3737 	// test if the vnode already exists and bail out if this is the case!
3738 	if (!nodeCreated) {
3739 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3740 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3741 			vnode->private_node);
3742 		return B_ERROR;
3743 	}
3744 
3745 	vnode->private_node = privateNode;
3746 	vnode->ops = ops;
3747 	vnode->SetUnpublished(true);
3748 
3749 	TRACE(("returns: %s\n", strerror(status)));
3750 
3751 	return status;
3752 }
3753 
3754 
3755 extern "C" status_t
3756 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3757 	fs_vnode_ops* ops, int type, uint32 flags)
3758 {
3759 	FUNCTION(("publish_vnode()\n"));
3760 
3761 	int32 tries = BUSY_VNODE_RETRIES;
3762 restart:
3763 	WriteLocker locker(sVnodeLock);
3764 
3765 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3766 
3767 	bool nodeCreated = false;
3768 	if (vnode == NULL) {
3769 		if (privateNode == NULL)
3770 			return B_BAD_VALUE;
3771 
3772 		// create the node
3773 		locker.Unlock();
3774 			// create_new_vnode_and_lock() will re-lock for us on success
3775 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3776 			nodeCreated);
3777 		if (status != B_OK)
3778 			return status;
3779 
3780 		locker.SetTo(sVnodeLock, true);
3781 	}
3782 
3783 	if (nodeCreated) {
3784 		vnode->private_node = privateNode;
3785 		vnode->ops = ops;
3786 		vnode->SetUnpublished(true);
3787 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3788 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3789 		// already known, but not published
3790 	} else if (vnode->IsBusy()) {
3791 		locker.Unlock();
3792 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3793 			return B_BUSY;
3794 		goto restart;
3795 	} else
3796 		return B_BAD_VALUE;
3797 
3798 	bool publishSpecialSubNode = false;
3799 
3800 	vnode->SetType(type);
3801 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3802 	publishSpecialSubNode = is_special_node_type(type)
3803 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3804 
3805 	status_t status = B_OK;
3806 
3807 	// create sub vnodes, if necessary
3808 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3809 		locker.Unlock();
3810 
3811 		fs_volume* subVolume = volume;
3812 		if (volume->sub_volume != NULL) {
3813 			while (status == B_OK && subVolume->sub_volume != NULL) {
3814 				subVolume = subVolume->sub_volume;
3815 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3816 					vnode);
3817 			}
3818 		}
3819 
3820 		if (status == B_OK && publishSpecialSubNode)
3821 			status = create_special_sub_node(vnode, flags);
3822 
3823 		if (status != B_OK) {
3824 			// error -- clean up the created sub vnodes
3825 			while (subVolume->super_volume != volume) {
3826 				subVolume = subVolume->super_volume;
3827 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3828 			}
3829 		}
3830 
3831 		if (status == B_OK) {
3832 			ReadLocker vnodesReadLocker(sVnodeLock);
3833 			AutoLocker<Vnode> nodeLocker(vnode);
3834 			vnode->SetBusy(false);
3835 			vnode->SetUnpublished(false);
3836 		} else {
3837 			locker.Lock();
3838 			sVnodeTable->Remove(vnode);
3839 			remove_vnode_from_mount_list(vnode, vnode->mount);
3840 			free(vnode);
3841 		}
3842 	} else {
3843 		// we still hold the write lock -- mark the node unbusy and published
3844 		vnode->SetBusy(false);
3845 		vnode->SetUnpublished(false);
3846 	}
3847 
3848 	TRACE(("returns: %s\n", strerror(status)));
3849 
3850 	return status;
3851 }
3852 
3853 
3854 extern "C" status_t
3855 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3856 {
3857 	struct vnode* vnode;
3858 
3859 	if (volume == NULL)
3860 		return B_BAD_VALUE;
3861 
3862 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3863 	if (status != B_OK)
3864 		return status;
3865 
3866 	// If this is a layered FS, we need to get the node cookie for the requested
3867 	// layer.
3868 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3869 		fs_vnode resolvedNode;
3870 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3871 			&resolvedNode);
3872 		if (status != B_OK) {
3873 			panic("get_vnode(): Failed to get super node for vnode %p, "
3874 				"volume: %p", vnode, volume);
3875 			put_vnode(vnode);
3876 			return status;
3877 		}
3878 
3879 		if (_privateNode != NULL)
3880 			*_privateNode = resolvedNode.private_node;
3881 	} else if (_privateNode != NULL)
3882 		*_privateNode = vnode->private_node;
3883 
3884 	return B_OK;
3885 }
3886 
3887 
3888 extern "C" status_t
3889 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3890 {
3891 	struct vnode* vnode;
3892 
3893 	rw_lock_read_lock(&sVnodeLock);
3894 	vnode = lookup_vnode(volume->id, vnodeID);
3895 	rw_lock_read_unlock(&sVnodeLock);
3896 
3897 	if (vnode == NULL)
3898 		return B_BAD_VALUE;
3899 
3900 	inc_vnode_ref_count(vnode);
3901 	return B_OK;
3902 }
3903 
3904 
3905 extern "C" status_t
3906 put_vnode(fs_volume* volume, ino_t vnodeID)
3907 {
3908 	struct vnode* vnode;
3909 
3910 	rw_lock_read_lock(&sVnodeLock);
3911 	vnode = lookup_vnode(volume->id, vnodeID);
3912 	rw_lock_read_unlock(&sVnodeLock);
3913 
3914 	if (vnode == NULL)
3915 		return B_BAD_VALUE;
3916 
3917 	dec_vnode_ref_count(vnode, false, true);
3918 	return B_OK;
3919 }
3920 
3921 
3922 extern "C" status_t
3923 remove_vnode(fs_volume* volume, ino_t vnodeID)
3924 {
3925 	ReadLocker locker(sVnodeLock);
3926 
3927 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3928 	if (vnode == NULL)
3929 		return B_ENTRY_NOT_FOUND;
3930 
3931 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3932 		// this vnode is in use
3933 		return B_BUSY;
3934 	}
3935 
3936 	vnode->Lock();
3937 
3938 	vnode->SetRemoved(true);
3939 	bool removeUnpublished = false;
3940 
3941 	if (vnode->IsUnpublished()) {
3942 		// prepare the vnode for deletion
3943 		removeUnpublished = true;
3944 		vnode->SetBusy(true);
3945 	}
3946 
3947 	vnode->Unlock();
3948 	locker.Unlock();
3949 
3950 	if (removeUnpublished) {
3951 		// If the vnode hasn't been published yet, we delete it here
3952 		atomic_add(&vnode->ref_count, -1);
3953 		free_vnode(vnode, true);
3954 	}
3955 
3956 	return B_OK;
3957 }
3958 
3959 
3960 extern "C" status_t
3961 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3962 {
3963 	struct vnode* vnode;
3964 
3965 	rw_lock_read_lock(&sVnodeLock);
3966 
3967 	vnode = lookup_vnode(volume->id, vnodeID);
3968 	if (vnode) {
3969 		AutoLocker<Vnode> nodeLocker(vnode);
3970 		vnode->SetRemoved(false);
3971 	}
3972 
3973 	rw_lock_read_unlock(&sVnodeLock);
3974 	return B_OK;
3975 }
3976 
3977 
3978 extern "C" status_t
3979 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3980 {
3981 	ReadLocker _(sVnodeLock);
3982 
3983 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3984 		if (_removed != NULL)
3985 			*_removed = vnode->IsRemoved();
3986 		return B_OK;
3987 	}
3988 
3989 	return B_BAD_VALUE;
3990 }
3991 
3992 
3993 extern "C" status_t
3994 mark_vnode_busy(fs_volume* volume, ino_t vnodeID, bool busy)
3995 {
3996 	ReadLocker locker(sVnodeLock);
3997 
3998 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3999 	if (vnode == NULL)
4000 		return B_ENTRY_NOT_FOUND;
4001 
4002 	// are we trying to mark an already busy node busy again?
4003 	if (busy && vnode->IsBusy())
4004 		return B_BUSY;
4005 
4006 	vnode->Lock();
4007 	vnode->SetBusy(busy);
4008 	vnode->Unlock();
4009 
4010 	return B_OK;
4011 }
4012 
4013 
4014 extern "C" status_t
4015 change_vnode_id(fs_volume* volume, ino_t vnodeID, ino_t newID)
4016 {
4017 	WriteLocker locker(sVnodeLock);
4018 
4019 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
4020 	if (vnode == NULL)
4021 		return B_ENTRY_NOT_FOUND;
4022 
4023 	sVnodeTable->Remove(vnode);
4024 	vnode->id = newID;
4025 	sVnodeTable->Insert(vnode);
4026 
4027 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
4028 		((VMVnodeCache*)vnode->cache)->SetVnodeID(newID);
4029 
4030 	return B_OK;
4031 }
4032 
4033 
4034 extern "C" fs_volume*
4035 volume_for_vnode(fs_vnode* _vnode)
4036 {
4037 	if (_vnode == NULL)
4038 		return NULL;
4039 
4040 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
4041 	return vnode->mount->volume;
4042 }
4043 
4044 
4045 extern "C" status_t
4046 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
4047 	uid_t nodeUserID)
4048 {
4049 	// get node permissions
4050 	int userPermissions = (mode & S_IRWXU) >> 6;
4051 	int groupPermissions = (mode & S_IRWXG) >> 3;
4052 	int otherPermissions = mode & S_IRWXO;
4053 
4054 	// get the node permissions for this uid/gid
4055 	int permissions = 0;
4056 	uid_t uid = geteuid();
4057 
4058 	if (uid == 0) {
4059 		// user is root
4060 		// root has always read/write permission, but at least one of the
4061 		// X bits must be set for execute permission
4062 		permissions = userPermissions | groupPermissions | otherPermissions
4063 			| S_IROTH | S_IWOTH;
4064 		if (S_ISDIR(mode))
4065 			permissions |= S_IXOTH;
4066 	} else if (uid == nodeUserID) {
4067 		// user is node owner
4068 		permissions = userPermissions;
4069 	} else if (is_user_in_group(nodeGroupID)) {
4070 		// user is in owning group
4071 		permissions = groupPermissions;
4072 	} else {
4073 		// user is one of the others
4074 		permissions = otherPermissions;
4075 	}
4076 
4077 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4078 }
4079 
4080 
4081 #if 0
4082 extern "C" status_t
4083 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4084 	size_t* _numBytes)
4085 {
4086 	struct file_descriptor* descriptor;
4087 	struct vnode* vnode;
4088 
4089 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4090 	if (descriptor == NULL)
4091 		return B_FILE_ERROR;
4092 
4093 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4094 		count, 0, _numBytes);
4095 
4096 	put_fd(descriptor);
4097 	return status;
4098 }
4099 
4100 
4101 extern "C" status_t
4102 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4103 	size_t* _numBytes)
4104 {
4105 	struct file_descriptor* descriptor;
4106 	struct vnode* vnode;
4107 
4108 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4109 	if (descriptor == NULL)
4110 		return B_FILE_ERROR;
4111 
4112 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4113 		count, 0, _numBytes);
4114 
4115 	put_fd(descriptor);
4116 	return status;
4117 }
4118 #endif
4119 
4120 
4121 extern "C" status_t
4122 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4123 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4124 	size_t* _bytes)
4125 {
4126 	struct file_descriptor* descriptor;
4127 	struct vnode* vnode;
4128 
4129 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4130 	if (descriptor == NULL)
4131 		return B_FILE_ERROR;
4132 
4133 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4134 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4135 		false);
4136 
4137 	put_fd(descriptor);
4138 	return status;
4139 }
4140 
4141 
4142 extern "C" status_t
4143 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4144 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4145 	size_t* _bytes)
4146 {
4147 	struct file_descriptor* descriptor;
4148 	struct vnode* vnode;
4149 
4150 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4151 	if (descriptor == NULL)
4152 		return B_FILE_ERROR;
4153 
4154 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4155 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4156 		true);
4157 
4158 	put_fd(descriptor);
4159 	return status;
4160 }
4161 
4162 
4163 extern "C" status_t
4164 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4165 {
4166 	// lookup mount -- the caller is required to make sure that the mount
4167 	// won't go away
4168 	ReadLocker locker(sMountLock);
4169 	struct fs_mount* mount = find_mount(mountID);
4170 	if (mount == NULL)
4171 		return B_BAD_VALUE;
4172 	locker.Unlock();
4173 
4174 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4175 }
4176 
4177 
4178 extern "C" status_t
4179 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4180 {
4181 	// lookup mount -- the caller is required to make sure that the mount
4182 	// won't go away
4183 	ReadLocker locker(sMountLock);
4184 	struct fs_mount* mount = find_mount(mountID);
4185 	if (mount == NULL)
4186 		return B_BAD_VALUE;
4187 	locker.Unlock();
4188 
4189 	return mount->entry_cache.Add(dirID, name, -1, true);
4190 }
4191 
4192 
4193 extern "C" status_t
4194 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4195 {
4196 	// lookup mount -- the caller is required to make sure that the mount
4197 	// won't go away
4198 	ReadLocker locker(sMountLock);
4199 	struct fs_mount* mount = find_mount(mountID);
4200 	if (mount == NULL)
4201 		return B_BAD_VALUE;
4202 	locker.Unlock();
4203 
4204 	return mount->entry_cache.Remove(dirID, name);
4205 }
4206 
4207 
4208 //	#pragma mark - private VFS API
4209 //	Functions the VFS exports for other parts of the kernel
4210 
4211 
4212 /*! Acquires another reference to the vnode that has to be released
4213 	by calling vfs_put_vnode().
4214 */
4215 void
4216 vfs_acquire_vnode(struct vnode* vnode)
4217 {
4218 	inc_vnode_ref_count(vnode);
4219 }
4220 
4221 
4222 /*! This is currently called from file_cache_create() only.
4223 	It's probably a temporary solution as long as devfs requires that
4224 	fs_read_pages()/fs_write_pages() are called with the standard
4225 	open cookie and not with a device cookie.
4226 	If that's done differently, remove this call; it has no other
4227 	purpose.
4228 */
4229 extern "C" status_t
4230 vfs_get_cookie_from_fd(int fd, void** _cookie)
4231 {
4232 	struct file_descriptor* descriptor;
4233 
4234 	descriptor = get_fd(get_current_io_context(true), fd);
4235 	if (descriptor == NULL)
4236 		return B_FILE_ERROR;
4237 
4238 	*_cookie = descriptor->cookie;
4239 	return B_OK;
4240 }
4241 
4242 
4243 extern "C" status_t
4244 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4245 {
4246 	*vnode = get_vnode_from_fd(fd, kernel);
4247 
4248 	if (*vnode == NULL)
4249 		return B_FILE_ERROR;
4250 
4251 	return B_NO_ERROR;
4252 }
4253 
4254 
4255 extern "C" status_t
4256 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4257 {
4258 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4259 		path, kernel));
4260 
4261 	KPath pathBuffer;
4262 	if (pathBuffer.InitCheck() != B_OK)
4263 		return B_NO_MEMORY;
4264 
4265 	char* buffer = pathBuffer.LockBuffer();
4266 	strlcpy(buffer, path, pathBuffer.BufferSize());
4267 
4268 	struct vnode* vnode;
4269 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4270 	if (status != B_OK)
4271 		return status;
4272 
4273 	*_vnode = vnode;
4274 	return B_OK;
4275 }
4276 
4277 
4278 extern "C" status_t
4279 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4280 {
4281 	struct vnode* vnode = NULL;
4282 
4283 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4284 	if (status != B_OK)
4285 		return status;
4286 
4287 	*_vnode = vnode;
4288 	return B_OK;
4289 }
4290 
4291 
4292 extern "C" status_t
4293 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4294 	const char* name, struct vnode** _vnode)
4295 {
4296 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4297 }
4298 
4299 
4300 extern "C" void
4301 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4302 {
4303 	*_mountID = vnode->device;
4304 	*_vnodeID = vnode->id;
4305 }
4306 
4307 
4308 /*!
4309 	Helper function abstracting the process of "converting" a given
4310 	vnode-pointer to a fs_vnode-pointer.
4311 	Currently only used in bindfs.
4312 */
4313 extern "C" fs_vnode*
4314 vfs_fsnode_for_vnode(struct vnode* vnode)
4315 {
4316 	return vnode;
4317 }
4318 
4319 
4320 /*!
4321 	Calls fs_open() on the given vnode and returns a new
4322 	file descriptor for it
4323 */
4324 int
4325 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4326 {
4327 	return open_vnode(vnode, openMode, kernel);
4328 }
4329 
4330 
4331 /*!	Looks up a vnode with the given mount and vnode ID.
4332 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4333 	to the node.
4334 	It's currently only be used by file_cache_create().
4335 */
4336 extern "C" status_t
4337 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4338 {
4339 	rw_lock_read_lock(&sVnodeLock);
4340 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4341 	rw_lock_read_unlock(&sVnodeLock);
4342 
4343 	if (vnode == NULL)
4344 		return B_ERROR;
4345 
4346 	*_vnode = vnode;
4347 	return B_OK;
4348 }
4349 
4350 
4351 extern "C" status_t
4352 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4353 	bool traverseLeafLink, bool kernel, void** _node)
4354 {
4355 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4356 		volume, path, kernel));
4357 
4358 	KPath pathBuffer;
4359 	if (pathBuffer.InitCheck() != B_OK)
4360 		return B_NO_MEMORY;
4361 
4362 	fs_mount* mount;
4363 	status_t status = get_mount(volume->id, &mount);
4364 	if (status != B_OK)
4365 		return status;
4366 
4367 	char* buffer = pathBuffer.LockBuffer();
4368 	strlcpy(buffer, path, pathBuffer.BufferSize());
4369 
4370 	struct vnode* vnode = mount->root_vnode;
4371 
4372 	if (buffer[0] == '/')
4373 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4374 	else {
4375 		inc_vnode_ref_count(vnode);
4376 			// vnode_path_to_vnode() releases a reference to the starting vnode
4377 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4378 			kernel, &vnode, NULL);
4379 	}
4380 
4381 	put_mount(mount);
4382 
4383 	if (status != B_OK)
4384 		return status;
4385 
4386 	if (vnode->device != volume->id) {
4387 		// wrong mount ID - must not gain access on foreign file system nodes
4388 		put_vnode(vnode);
4389 		return B_BAD_VALUE;
4390 	}
4391 
4392 	// Use get_vnode() to resolve the cookie for the right layer.
4393 	status = get_vnode(volume, vnode->id, _node);
4394 	put_vnode(vnode);
4395 
4396 	return status;
4397 }
4398 
4399 
4400 status_t
4401 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4402 	struct stat* stat, bool kernel)
4403 {
4404 	status_t status;
4405 
4406 	if (path != NULL) {
4407 		// path given: get the stat of the node referred to by (fd, path)
4408 		KPath pathBuffer(path);
4409 		if (pathBuffer.InitCheck() != B_OK)
4410 			return B_NO_MEMORY;
4411 
4412 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4413 			traverseLeafLink, stat, kernel);
4414 	} else {
4415 		// no path given: get the FD and use the FD operation
4416 		struct file_descriptor* descriptor
4417 			= get_fd(get_current_io_context(kernel), fd);
4418 		if (descriptor == NULL)
4419 			return B_FILE_ERROR;
4420 
4421 		if (descriptor->ops->fd_read_stat)
4422 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4423 		else
4424 			status = B_UNSUPPORTED;
4425 
4426 		put_fd(descriptor);
4427 	}
4428 
4429 	return status;
4430 }
4431 
4432 
4433 /*!	Finds the full path to the file that contains the module \a moduleName,
4434 	puts it into \a pathBuffer, and returns B_OK for success.
4435 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4436 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4437 	\a pathBuffer is clobbered in any case and must not be relied on if this
4438 	functions returns unsuccessfully.
4439 	\a basePath and \a pathBuffer must not point to the same space.
4440 */
4441 status_t
4442 vfs_get_module_path(const char* basePath, const char* moduleName,
4443 	char* pathBuffer, size_t bufferSize)
4444 {
4445 	struct vnode* dir;
4446 	struct vnode* file;
4447 	status_t status;
4448 	size_t length;
4449 	char* path;
4450 
4451 	if (bufferSize == 0
4452 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4453 		return B_BUFFER_OVERFLOW;
4454 
4455 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4456 	if (status != B_OK)
4457 		return status;
4458 
4459 	// the path buffer had been clobbered by the above call
4460 	length = strlcpy(pathBuffer, basePath, bufferSize);
4461 	if (pathBuffer[length - 1] != '/')
4462 		pathBuffer[length++] = '/';
4463 
4464 	path = pathBuffer + length;
4465 	bufferSize -= length;
4466 
4467 	while (moduleName) {
4468 		char* nextPath = strchr(moduleName, '/');
4469 		if (nextPath == NULL)
4470 			length = strlen(moduleName);
4471 		else {
4472 			length = nextPath - moduleName;
4473 			nextPath++;
4474 		}
4475 
4476 		if (length + 1 >= bufferSize) {
4477 			status = B_BUFFER_OVERFLOW;
4478 			goto err;
4479 		}
4480 
4481 		memcpy(path, moduleName, length);
4482 		path[length] = '\0';
4483 		moduleName = nextPath;
4484 
4485 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4486 		if (status != B_OK) {
4487 			// vnode_path_to_vnode() has already released the reference to dir
4488 			return status;
4489 		}
4490 
4491 		if (S_ISDIR(file->Type())) {
4492 			// goto the next directory
4493 			path[length] = '/';
4494 			path[length + 1] = '\0';
4495 			path += length + 1;
4496 			bufferSize -= length + 1;
4497 
4498 			dir = file;
4499 		} else if (S_ISREG(file->Type())) {
4500 			// it's a file so it should be what we've searched for
4501 			put_vnode(file);
4502 
4503 			return B_OK;
4504 		} else {
4505 			TRACE(("vfs_get_module_path(): something is strange here: "
4506 				"0x%08" B_PRIx32 "...\n", file->Type()));
4507 			status = B_ERROR;
4508 			dir = file;
4509 			goto err;
4510 		}
4511 	}
4512 
4513 	// if we got here, the moduleName just pointed to a directory, not to
4514 	// a real module - what should we do in this case?
4515 	status = B_ENTRY_NOT_FOUND;
4516 
4517 err:
4518 	put_vnode(dir);
4519 	return status;
4520 }
4521 
4522 
4523 /*!	\brief Normalizes a given path.
4524 
4525 	The path must refer to an existing or non-existing entry in an existing
4526 	directory, that is chopping off the leaf component the remaining path must
4527 	refer to an existing directory.
4528 
4529 	The returned will be canonical in that it will be absolute, will not
4530 	contain any "." or ".." components or duplicate occurrences of '/'s,
4531 	and none of the directory components will by symbolic links.
4532 
4533 	Any two paths referring to the same entry, will result in the same
4534 	normalized path (well, that is pretty much the definition of `normalized',
4535 	isn't it :-).
4536 
4537 	\param path The path to be normalized.
4538 	\param buffer The buffer into which the normalized path will be written.
4539 		   May be the same one as \a path.
4540 	\param bufferSize The size of \a buffer.
4541 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4542 	\param kernel \c true, if the IO context of the kernel shall be used,
4543 		   otherwise that of the team this thread belongs to. Only relevant,
4544 		   if the path is relative (to get the CWD).
4545 	\return \c B_OK if everything went fine, another error code otherwise.
4546 */
4547 status_t
4548 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4549 	bool traverseLink, bool kernel)
4550 {
4551 	if (!path || !buffer || bufferSize < 1)
4552 		return B_BAD_VALUE;
4553 
4554 	if (path != buffer) {
4555 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4556 			return B_BUFFER_OVERFLOW;
4557 	}
4558 
4559 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4560 }
4561 
4562 
4563 /*!	\brief Gets the parent of the passed in node.
4564 
4565 	Gets the parent of the passed in node, and correctly resolves covered
4566 	nodes.
4567 */
4568 extern "C" status_t
4569 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4570 {
4571 	return resolve_covered_parent(parent, device, node,
4572 		get_current_io_context(true));
4573 }
4574 
4575 
4576 /*!	\brief Creates a special node in the file system.
4577 
4578 	The caller gets a reference to the newly created node (which is passed
4579 	back through \a _createdVnode) and is responsible for releasing it.
4580 
4581 	\param path The path where to create the entry for the node. Can be \c NULL,
4582 		in which case the node is created without an entry in the root FS -- it
4583 		will automatically be deleted when the last reference has been released.
4584 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4585 		the target file system will just create the node with its standard
4586 		operations. Depending on the type of the node a subnode might be created
4587 		automatically, though.
4588 	\param mode The type and permissions for the node to be created.
4589 	\param flags Flags to be passed to the creating FS.
4590 	\param kernel \c true, if called in the kernel context (relevant only if
4591 		\a path is not \c NULL and not absolute).
4592 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4593 		file system creating the node, with the private data pointer and
4594 		operations for the super node. Can be \c NULL.
4595 	\param _createVnode Pointer to pre-allocated storage where to store the
4596 		pointer to the newly created node.
4597 	\return \c B_OK, if everything went fine, another error code otherwise.
4598 */
4599 status_t
4600 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4601 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4602 	struct vnode** _createdVnode)
4603 {
4604 	struct vnode* dirNode;
4605 	char _leaf[B_FILE_NAME_LENGTH];
4606 	char* leaf = NULL;
4607 
4608 	if (path) {
4609 		// We've got a path. Get the dir vnode and the leaf name.
4610 		KPath tmpPathBuffer;
4611 		if (tmpPathBuffer.InitCheck() != B_OK)
4612 			return B_NO_MEMORY;
4613 
4614 		char* tmpPath = tmpPathBuffer.LockBuffer();
4615 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4616 			return B_NAME_TOO_LONG;
4617 
4618 		// get the dir vnode and the leaf name
4619 		leaf = _leaf;
4620 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4621 		if (error != B_OK)
4622 			return error;
4623 	} else {
4624 		// No path. Create the node in the root FS.
4625 		dirNode = sRoot;
4626 		inc_vnode_ref_count(dirNode);
4627 	}
4628 
4629 	VNodePutter _(dirNode);
4630 
4631 	// check support for creating special nodes
4632 	if (!HAS_FS_CALL(dirNode, create_special_node))
4633 		return B_UNSUPPORTED;
4634 
4635 	// create the node
4636 	fs_vnode superVnode;
4637 	ino_t nodeID;
4638 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4639 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4640 	if (status != B_OK)
4641 		return status;
4642 
4643 	// lookup the node
4644 	rw_lock_read_lock(&sVnodeLock);
4645 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4646 	rw_lock_read_unlock(&sVnodeLock);
4647 
4648 	if (*_createdVnode == NULL) {
4649 		panic("vfs_create_special_node(): lookup of node failed");
4650 		return B_ERROR;
4651 	}
4652 
4653 	return B_OK;
4654 }
4655 
4656 
4657 extern "C" void
4658 vfs_put_vnode(struct vnode* vnode)
4659 {
4660 	put_vnode(vnode);
4661 }
4662 
4663 
4664 extern "C" status_t
4665 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4666 {
4667 	// Get current working directory from io context
4668 	struct io_context* context = get_current_io_context(false);
4669 	status_t status = B_OK;
4670 
4671 	mutex_lock(&context->io_mutex);
4672 
4673 	if (context->cwd != NULL) {
4674 		*_mountID = context->cwd->device;
4675 		*_vnodeID = context->cwd->id;
4676 	} else
4677 		status = B_ERROR;
4678 
4679 	mutex_unlock(&context->io_mutex);
4680 	return status;
4681 }
4682 
4683 
4684 status_t
4685 vfs_unmount(dev_t mountID, uint32 flags)
4686 {
4687 	return fs_unmount(NULL, mountID, flags, true);
4688 }
4689 
4690 
4691 extern "C" status_t
4692 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4693 {
4694 	struct vnode* vnode;
4695 
4696 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4697 	if (status != B_OK)
4698 		return status;
4699 
4700 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4701 	put_vnode(vnode);
4702 	return B_OK;
4703 }
4704 
4705 
4706 extern "C" void
4707 vfs_free_unused_vnodes(int32 level)
4708 {
4709 	vnode_low_resource_handler(NULL,
4710 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4711 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4712 		level);
4713 }
4714 
4715 
4716 extern "C" bool
4717 vfs_can_page(struct vnode* vnode, void* cookie)
4718 {
4719 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4720 
4721 	if (HAS_FS_CALL(vnode, can_page))
4722 		return FS_CALL(vnode, can_page, cookie);
4723 	return false;
4724 }
4725 
4726 
4727 extern "C" status_t
4728 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4729 	const generic_io_vec* vecs, size_t count, uint32 flags,
4730 	generic_size_t* _numBytes)
4731 {
4732 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4733 		vecs, pos));
4734 
4735 #if VFS_PAGES_IO_TRACING
4736 	generic_size_t bytesRequested = *_numBytes;
4737 #endif
4738 
4739 	IORequest request;
4740 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4741 	if (status == B_OK) {
4742 		status = vfs_vnode_io(vnode, cookie, &request);
4743 		if (status == B_OK)
4744 			status = request.Wait();
4745 		*_numBytes = request.TransferredBytes();
4746 	}
4747 
4748 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4749 		status, *_numBytes));
4750 
4751 	return status;
4752 }
4753 
4754 
4755 extern "C" status_t
4756 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4757 	const generic_io_vec* vecs, size_t count, uint32 flags,
4758 	generic_size_t* _numBytes)
4759 {
4760 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4761 		vecs, pos));
4762 
4763 #if VFS_PAGES_IO_TRACING
4764 	generic_size_t bytesRequested = *_numBytes;
4765 #endif
4766 
4767 	IORequest request;
4768 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4769 	if (status == B_OK) {
4770 		status = vfs_vnode_io(vnode, cookie, &request);
4771 		if (status == B_OK)
4772 			status = request.Wait();
4773 		*_numBytes = request.TransferredBytes();
4774 	}
4775 
4776 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4777 		status, *_numBytes));
4778 
4779 	return status;
4780 }
4781 
4782 
4783 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4784 	created if \a allocate is \c true.
4785 	In case it's successful, it will also grab a reference to the cache
4786 	it returns.
4787 */
4788 extern "C" status_t
4789 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4790 {
4791 	if (vnode->cache != NULL) {
4792 		vnode->cache->AcquireRef();
4793 		*_cache = vnode->cache;
4794 		return B_OK;
4795 	}
4796 
4797 	rw_lock_read_lock(&sVnodeLock);
4798 	vnode->Lock();
4799 
4800 	status_t status = B_OK;
4801 
4802 	// The cache could have been created in the meantime
4803 	if (vnode->cache == NULL) {
4804 		if (allocate) {
4805 			// TODO: actually the vnode needs to be busy already here, or
4806 			//	else this won't work...
4807 			bool wasBusy = vnode->IsBusy();
4808 			vnode->SetBusy(true);
4809 
4810 			vnode->Unlock();
4811 			rw_lock_read_unlock(&sVnodeLock);
4812 
4813 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4814 
4815 			rw_lock_read_lock(&sVnodeLock);
4816 			vnode->Lock();
4817 			vnode->SetBusy(wasBusy);
4818 		} else
4819 			status = B_BAD_VALUE;
4820 	}
4821 
4822 	vnode->Unlock();
4823 	rw_lock_read_unlock(&sVnodeLock);
4824 
4825 	if (status == B_OK) {
4826 		vnode->cache->AcquireRef();
4827 		*_cache = vnode->cache;
4828 	}
4829 
4830 	return status;
4831 }
4832 
4833 
4834 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4835 	their own.
4836 	In case it's successful, it will also grab a reference to the cache
4837 	it returns.
4838 */
4839 extern "C" status_t
4840 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4841 {
4842 	rw_lock_read_lock(&sVnodeLock);
4843 	vnode->Lock();
4844 
4845 	status_t status = B_OK;
4846 	if (vnode->cache != NULL) {
4847 		status = B_NOT_ALLOWED;
4848 	} else {
4849 		vnode->cache = _cache;
4850 		_cache->AcquireRef();
4851 	}
4852 
4853 	vnode->Unlock();
4854 	rw_lock_read_unlock(&sVnodeLock);
4855 	return status;
4856 }
4857 
4858 
4859 status_t
4860 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4861 	file_io_vec* vecs, size_t* _count)
4862 {
4863 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4864 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4865 
4866 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4867 }
4868 
4869 
4870 status_t
4871 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4872 {
4873 	status_t status = FS_CALL(vnode, read_stat, stat);
4874 
4875 	// fill in the st_dev and st_ino fields
4876 	if (status == B_OK) {
4877 		stat->st_dev = vnode->device;
4878 		stat->st_ino = vnode->id;
4879 		// the rdev field must stay unset for non-special files
4880 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4881 			stat->st_rdev = -1;
4882 	}
4883 
4884 	return status;
4885 }
4886 
4887 
4888 status_t
4889 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4890 {
4891 	struct vnode* vnode;
4892 	status_t status = get_vnode(device, inode, &vnode, true, false);
4893 	if (status != B_OK)
4894 		return status;
4895 
4896 	status = vfs_stat_vnode(vnode, stat);
4897 
4898 	put_vnode(vnode);
4899 	return status;
4900 }
4901 
4902 
4903 status_t
4904 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4905 {
4906 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4907 }
4908 
4909 
4910 status_t
4911 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4912 	bool kernel, char* path, size_t pathLength)
4913 {
4914 	struct vnode* vnode;
4915 	status_t status;
4916 
4917 	// filter invalid leaf names
4918 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4919 		return B_BAD_VALUE;
4920 
4921 	// get the vnode matching the dir's node_ref
4922 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4923 		// special cases "." and "..": we can directly get the vnode of the
4924 		// referenced directory
4925 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4926 		leaf = NULL;
4927 	} else
4928 		status = get_vnode(device, inode, &vnode, true, false);
4929 	if (status != B_OK)
4930 		return status;
4931 
4932 	// get the directory path
4933 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4934 	put_vnode(vnode);
4935 		// we don't need the vnode anymore
4936 	if (status != B_OK)
4937 		return status;
4938 
4939 	// append the leaf name
4940 	if (leaf) {
4941 		// insert a directory separator if this is not the file system root
4942 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4943 				>= pathLength)
4944 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4945 			return B_NAME_TOO_LONG;
4946 		}
4947 	}
4948 
4949 	return B_OK;
4950 }
4951 
4952 
4953 /*!	If the given descriptor locked its vnode, that lock will be released. */
4954 void
4955 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4956 {
4957 	struct vnode* vnode = fd_vnode(descriptor);
4958 
4959 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4960 		vnode->mandatory_locked_by = NULL;
4961 }
4962 
4963 
4964 /*!	Releases any POSIX locks on the file descriptor. */
4965 status_t
4966 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4967 {
4968 	struct vnode* vnode = descriptor->u.vnode;
4969 	if (vnode == NULL)
4970 		return B_OK;
4971 
4972 	if (HAS_FS_CALL(vnode, release_lock))
4973 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4974 
4975 	return release_advisory_lock(vnode, context, NULL, NULL);
4976 }
4977 
4978 
4979 /*!	Closes all file descriptors of the specified I/O context that
4980 	have the O_CLOEXEC flag set.
4981 */
4982 void
4983 vfs_exec_io_context(io_context* context)
4984 {
4985 	uint32 i;
4986 
4987 	for (i = 0; i < context->table_size; i++) {
4988 		mutex_lock(&context->io_mutex);
4989 
4990 		struct file_descriptor* descriptor = context->fds[i];
4991 		bool remove = false;
4992 
4993 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4994 			context->fds[i] = NULL;
4995 			context->num_used_fds--;
4996 
4997 			remove = true;
4998 		}
4999 
5000 		mutex_unlock(&context->io_mutex);
5001 
5002 		if (remove) {
5003 			close_fd(context, descriptor);
5004 			put_fd(descriptor);
5005 		}
5006 	}
5007 }
5008 
5009 
5010 /*! Sets up a new io_control structure, and inherits the properties
5011 	of the parent io_control if it is given.
5012 */
5013 io_context*
5014 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
5015 {
5016 	io_context* context = (io_context*)malloc(sizeof(io_context));
5017 	if (context == NULL)
5018 		return NULL;
5019 
5020 	TIOC(NewIOContext(context, parentContext));
5021 
5022 	memset(context, 0, sizeof(io_context));
5023 	context->ref_count = 1;
5024 
5025 	MutexLocker parentLocker;
5026 
5027 	size_t tableSize;
5028 	if (parentContext != NULL) {
5029 		parentLocker.SetTo(parentContext->io_mutex, false);
5030 		tableSize = parentContext->table_size;
5031 	} else
5032 		tableSize = DEFAULT_FD_TABLE_SIZE;
5033 
5034 	// allocate space for FDs and their close-on-exec flag
5035 	context->fds = (file_descriptor**)malloc(
5036 		sizeof(struct file_descriptor*) * tableSize
5037 		+ sizeof(struct select_sync*) * tableSize
5038 		+ (tableSize + 7) / 8);
5039 	if (context->fds == NULL) {
5040 		free(context);
5041 		return NULL;
5042 	}
5043 
5044 	context->select_infos = (select_info**)(context->fds + tableSize);
5045 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
5046 
5047 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
5048 		+ sizeof(struct select_sync*) * tableSize
5049 		+ (tableSize + 7) / 8);
5050 
5051 	mutex_init(&context->io_mutex, "I/O context");
5052 
5053 	// Copy all parent file descriptors
5054 
5055 	if (parentContext != NULL) {
5056 		size_t i;
5057 
5058 		mutex_lock(&sIOContextRootLock);
5059 		context->root = parentContext->root;
5060 		if (context->root)
5061 			inc_vnode_ref_count(context->root);
5062 		mutex_unlock(&sIOContextRootLock);
5063 
5064 		context->cwd = parentContext->cwd;
5065 		if (context->cwd)
5066 			inc_vnode_ref_count(context->cwd);
5067 
5068 		if (parentContext->inherit_fds) {
5069 			for (i = 0; i < tableSize; i++) {
5070 				struct file_descriptor* descriptor = parentContext->fds[i];
5071 
5072 				if (descriptor != NULL
5073 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
5074 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5075 					if (closeOnExec && purgeCloseOnExec)
5076 						continue;
5077 
5078 					TFD(InheritFD(context, i, descriptor, parentContext));
5079 
5080 					context->fds[i] = descriptor;
5081 					context->num_used_fds++;
5082 					atomic_add(&descriptor->ref_count, 1);
5083 					atomic_add(&descriptor->open_count, 1);
5084 
5085 					if (closeOnExec)
5086 						fd_set_close_on_exec(context, i, true);
5087 				}
5088 			}
5089 		}
5090 
5091 		parentLocker.Unlock();
5092 	} else {
5093 		context->root = sRoot;
5094 		context->cwd = sRoot;
5095 
5096 		if (context->root)
5097 			inc_vnode_ref_count(context->root);
5098 
5099 		if (context->cwd)
5100 			inc_vnode_ref_count(context->cwd);
5101 	}
5102 
5103 	context->table_size = tableSize;
5104 	context->inherit_fds = parentContext != NULL;
5105 
5106 	list_init(&context->node_monitors);
5107 	context->max_monitors = DEFAULT_NODE_MONITORS;
5108 
5109 	return context;
5110 }
5111 
5112 
5113 void
5114 vfs_get_io_context(io_context* context)
5115 {
5116 	atomic_add(&context->ref_count, 1);
5117 }
5118 
5119 
5120 void
5121 vfs_put_io_context(io_context* context)
5122 {
5123 	if (atomic_add(&context->ref_count, -1) == 1)
5124 		free_io_context(context);
5125 }
5126 
5127 
5128 status_t
5129 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5130 {
5131 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5132 		return B_BAD_VALUE;
5133 
5134 	TIOC(ResizeIOContext(context, newSize));
5135 
5136 	MutexLocker _(context->io_mutex);
5137 
5138 	uint32 oldSize = context->table_size;
5139 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5140 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5141 
5142 	// If the tables shrink, make sure none of the fds being dropped are in use.
5143 	if (newSize < oldSize) {
5144 		for (uint32 i = oldSize; i-- > newSize;) {
5145 			if (context->fds[i])
5146 				return B_BUSY;
5147 		}
5148 	}
5149 
5150 	// store pointers to the old tables
5151 	file_descriptor** oldFDs = context->fds;
5152 	select_info** oldSelectInfos = context->select_infos;
5153 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5154 
5155 	// allocate new tables
5156 	file_descriptor** newFDs = (file_descriptor**)malloc(
5157 		sizeof(struct file_descriptor*) * newSize
5158 		+ sizeof(struct select_sync*) * newSize
5159 		+ newCloseOnExitBitmapSize);
5160 	if (newFDs == NULL)
5161 		return B_NO_MEMORY;
5162 
5163 	context->fds = newFDs;
5164 	context->select_infos = (select_info**)(context->fds + newSize);
5165 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5166 	context->table_size = newSize;
5167 
5168 	// copy entries from old tables
5169 	uint32 toCopy = min_c(oldSize, newSize);
5170 
5171 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5172 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5173 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5174 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5175 
5176 	// clear additional entries, if the tables grow
5177 	if (newSize > oldSize) {
5178 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5179 		memset(context->select_infos + oldSize, 0,
5180 			sizeof(void*) * (newSize - oldSize));
5181 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5182 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5183 	}
5184 
5185 	free(oldFDs);
5186 
5187 	return B_OK;
5188 }
5189 
5190 
5191 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5192 
5193 	Given an arbitrary vnode (identified by mount and node ID), the function
5194 	checks, whether the vnode is covered by another vnode. If it is, the
5195 	function returns the mount and node ID of the covering vnode. Otherwise
5196 	it simply returns the supplied mount and node ID.
5197 
5198 	In case of error (e.g. the supplied node could not be found) the variables
5199 	for storing the resolved mount and node ID remain untouched and an error
5200 	code is returned.
5201 
5202 	\param mountID The mount ID of the vnode in question.
5203 	\param nodeID The node ID of the vnode in question.
5204 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5205 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5206 	\return
5207 	- \c B_OK, if everything went fine,
5208 	- another error code, if something went wrong.
5209 */
5210 status_t
5211 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5212 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5213 {
5214 	// get the node
5215 	struct vnode* node;
5216 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5217 	if (error != B_OK)
5218 		return error;
5219 
5220 	// resolve the node
5221 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5222 		put_vnode(node);
5223 		node = coveringNode;
5224 	}
5225 
5226 	// set the return values
5227 	*resolvedMountID = node->device;
5228 	*resolvedNodeID = node->id;
5229 
5230 	put_vnode(node);
5231 
5232 	return B_OK;
5233 }
5234 
5235 
5236 status_t
5237 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5238 	ino_t* _mountPointNodeID)
5239 {
5240 	ReadLocker nodeLocker(sVnodeLock);
5241 	ReadLocker mountLocker(sMountLock);
5242 
5243 	struct fs_mount* mount = find_mount(mountID);
5244 	if (mount == NULL)
5245 		return B_BAD_VALUE;
5246 
5247 	Vnode* mountPoint = mount->covers_vnode;
5248 
5249 	*_mountPointMountID = mountPoint->device;
5250 	*_mountPointNodeID = mountPoint->id;
5251 
5252 	return B_OK;
5253 }
5254 
5255 
5256 status_t
5257 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5258 	ino_t coveredNodeID)
5259 {
5260 	// get the vnodes
5261 	Vnode* vnode;
5262 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5263 	if (error != B_OK)
5264 		return B_BAD_VALUE;
5265 	VNodePutter vnodePutter(vnode);
5266 
5267 	Vnode* coveredVnode;
5268 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5269 		false);
5270 	if (error != B_OK)
5271 		return B_BAD_VALUE;
5272 	VNodePutter coveredVnodePutter(coveredVnode);
5273 
5274 	// establish the covered/covering links
5275 	WriteLocker locker(sVnodeLock);
5276 
5277 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5278 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5279 		return B_BUSY;
5280 	}
5281 
5282 	vnode->covers = coveredVnode;
5283 	vnode->SetCovering(true);
5284 
5285 	coveredVnode->covered_by = vnode;
5286 	coveredVnode->SetCovered(true);
5287 
5288 	// the vnodes do now reference each other
5289 	inc_vnode_ref_count(vnode);
5290 	inc_vnode_ref_count(coveredVnode);
5291 
5292 	return B_OK;
5293 }
5294 
5295 
5296 int
5297 vfs_getrlimit(int resource, struct rlimit* rlp)
5298 {
5299 	if (!rlp)
5300 		return B_BAD_ADDRESS;
5301 
5302 	switch (resource) {
5303 		case RLIMIT_NOFILE:
5304 		{
5305 			struct io_context* context = get_current_io_context(false);
5306 			MutexLocker _(context->io_mutex);
5307 
5308 			rlp->rlim_cur = context->table_size;
5309 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5310 			return 0;
5311 		}
5312 
5313 		case RLIMIT_NOVMON:
5314 		{
5315 			struct io_context* context = get_current_io_context(false);
5316 			MutexLocker _(context->io_mutex);
5317 
5318 			rlp->rlim_cur = context->max_monitors;
5319 			rlp->rlim_max = MAX_NODE_MONITORS;
5320 			return 0;
5321 		}
5322 
5323 		default:
5324 			return B_BAD_VALUE;
5325 	}
5326 }
5327 
5328 
5329 int
5330 vfs_setrlimit(int resource, const struct rlimit* rlp)
5331 {
5332 	if (!rlp)
5333 		return B_BAD_ADDRESS;
5334 
5335 	switch (resource) {
5336 		case RLIMIT_NOFILE:
5337 			/* TODO: check getuid() */
5338 			if (rlp->rlim_max != RLIM_SAVED_MAX
5339 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5340 				return B_NOT_ALLOWED;
5341 
5342 			return vfs_resize_fd_table(get_current_io_context(false),
5343 				rlp->rlim_cur);
5344 
5345 		case RLIMIT_NOVMON:
5346 			/* TODO: check getuid() */
5347 			if (rlp->rlim_max != RLIM_SAVED_MAX
5348 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5349 				return B_NOT_ALLOWED;
5350 
5351 			return resize_monitor_table(get_current_io_context(false),
5352 				rlp->rlim_cur);
5353 
5354 		default:
5355 			return B_BAD_VALUE;
5356 	}
5357 }
5358 
5359 
5360 status_t
5361 vfs_init(kernel_args* args)
5362 {
5363 	vnode::StaticInit();
5364 
5365 	sVnodeTable = new(std::nothrow) VnodeTable();
5366 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5367 		panic("vfs_init: error creating vnode hash table\n");
5368 
5369 	struct vnode dummy_vnode;
5370 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5371 
5372 	struct fs_mount dummyMount;
5373 	sMountsTable = new(std::nothrow) MountTable();
5374 	if (sMountsTable == NULL
5375 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5376 		panic("vfs_init: error creating mounts hash table\n");
5377 
5378 	sPathNameCache = create_object_cache("vfs path names",
5379 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5380 	if (sPathNameCache == NULL)
5381 		panic("vfs_init: error creating path name object_cache\n");
5382 
5383 	sFileDescriptorCache = create_object_cache("vfs fds",
5384 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5385 	if (sFileDescriptorCache == NULL)
5386 		panic("vfs_init: error creating file descriptor object_cache\n");
5387 
5388 	node_monitor_init();
5389 
5390 	sRoot = NULL;
5391 
5392 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5393 
5394 	if (block_cache_init() != B_OK)
5395 		return B_ERROR;
5396 
5397 #ifdef ADD_DEBUGGER_COMMANDS
5398 	// add some debugger commands
5399 	add_debugger_command_etc("vnode", &dump_vnode,
5400 		"Print info about the specified vnode",
5401 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5402 		"Prints information about the vnode specified by address <vnode> or\n"
5403 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5404 		"constructed and printed. It might not be possible to construct a\n"
5405 		"complete path, though.\n",
5406 		0);
5407 	add_debugger_command("vnodes", &dump_vnodes,
5408 		"list all vnodes (from the specified device)");
5409 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5410 		"list all vnode caches");
5411 	add_debugger_command("mount", &dump_mount,
5412 		"info about the specified fs_mount");
5413 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5414 	add_debugger_command("io_context", &dump_io_context,
5415 		"info about the I/O context");
5416 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5417 		"info about vnode usage");
5418 #endif
5419 
5420 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5421 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5422 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5423 		0);
5424 
5425 	fifo_init();
5426 	file_map_init();
5427 
5428 	return file_cache_init();
5429 }
5430 
5431 
5432 //	#pragma mark - fd_ops implementations
5433 
5434 
5435 /*!
5436 	Calls fs_open() on the given vnode and returns a new
5437 	file descriptor for it
5438 */
5439 static int
5440 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5441 {
5442 	void* cookie;
5443 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5444 	if (status != B_OK)
5445 		return status;
5446 
5447 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5448 	if (fd < 0) {
5449 		FS_CALL(vnode, close, cookie);
5450 		FS_CALL(vnode, free_cookie, cookie);
5451 	}
5452 	return fd;
5453 }
5454 
5455 
5456 /*!
5457 	Calls fs_open() on the given vnode and returns a new
5458 	file descriptor for it
5459 */
5460 static int
5461 create_vnode(struct vnode* directory, const char* name, int openMode,
5462 	int perms, bool kernel)
5463 {
5464 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5465 	status_t status = B_ERROR;
5466 	struct vnode* vnode;
5467 	void* cookie;
5468 	ino_t newID;
5469 
5470 	// This is somewhat tricky: If the entry already exists, the FS responsible
5471 	// for the directory might not necessarily also be the one responsible for
5472 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5473 	// we can actually never call the create() hook without O_EXCL. Instead we
5474 	// try to look the entry up first. If it already exists, we just open the
5475 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5476 	// introduces a race condition, since someone else might have created the
5477 	// entry in the meantime. We hope the respective FS returns the correct
5478 	// error code and retry (up to 3 times) again.
5479 
5480 	for (int i = 0; i < 3 && status != B_OK; i++) {
5481 		// look the node up
5482 		status = lookup_dir_entry(directory, name, &vnode);
5483 		if (status == B_OK) {
5484 			VNodePutter putter(vnode);
5485 
5486 			if ((openMode & O_EXCL) != 0)
5487 				return B_FILE_EXISTS;
5488 
5489 			// If the node is a symlink, we have to follow it, unless
5490 			// O_NOTRAVERSE is set.
5491 			if (S_ISLNK(vnode->Type()) && traverse) {
5492 				putter.Put();
5493 				char clonedName[B_FILE_NAME_LENGTH + 1];
5494 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5495 						>= B_FILE_NAME_LENGTH) {
5496 					return B_NAME_TOO_LONG;
5497 				}
5498 
5499 				inc_vnode_ref_count(directory);
5500 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5501 					kernel, &vnode, NULL);
5502 				if (status != B_OK)
5503 					return status;
5504 
5505 				putter.SetTo(vnode);
5506 			}
5507 
5508 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5509 				return B_LINK_LIMIT;
5510 
5511 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5512 			// on success keep the vnode reference for the FD
5513 			if (fd >= 0)
5514 				putter.Detach();
5515 
5516 			return fd;
5517 		}
5518 
5519 		// it doesn't exist yet -- try to create it
5520 
5521 		if (!HAS_FS_CALL(directory, create))
5522 			return B_READ_ONLY_DEVICE;
5523 
5524 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5525 			&cookie, &newID);
5526 		if (status != B_OK
5527 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5528 			return status;
5529 		}
5530 	}
5531 
5532 	if (status != B_OK)
5533 		return status;
5534 
5535 	// the node has been created successfully
5536 
5537 	rw_lock_read_lock(&sVnodeLock);
5538 	vnode = lookup_vnode(directory->device, newID);
5539 	rw_lock_read_unlock(&sVnodeLock);
5540 
5541 	if (vnode == NULL) {
5542 		panic("vfs: fs_create() returned success but there is no vnode, "
5543 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5544 		return B_BAD_VALUE;
5545 	}
5546 
5547 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5548 	if (fd >= 0)
5549 		return fd;
5550 
5551 	status = fd;
5552 
5553 	// something went wrong, clean up
5554 
5555 	FS_CALL(vnode, close, cookie);
5556 	FS_CALL(vnode, free_cookie, cookie);
5557 	put_vnode(vnode);
5558 
5559 	FS_CALL(directory, unlink, name);
5560 
5561 	return status;
5562 }
5563 
5564 
5565 /*! Calls fs open_dir() on the given vnode and returns a new
5566 	file descriptor for it
5567 */
5568 static int
5569 open_dir_vnode(struct vnode* vnode, bool kernel)
5570 {
5571 	void* cookie;
5572 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5573 	if (status != B_OK)
5574 		return status;
5575 
5576 	// directory is opened, create a fd
5577 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5578 	if (status >= 0)
5579 		return status;
5580 
5581 	FS_CALL(vnode, close_dir, cookie);
5582 	FS_CALL(vnode, free_dir_cookie, cookie);
5583 
5584 	return status;
5585 }
5586 
5587 
5588 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5589 	file descriptor for it.
5590 	Used by attr_dir_open(), and attr_dir_open_fd().
5591 */
5592 static int
5593 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5594 {
5595 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5596 		return B_UNSUPPORTED;
5597 
5598 	void* cookie;
5599 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5600 	if (status != B_OK)
5601 		return status;
5602 
5603 	// directory is opened, create a fd
5604 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5605 		kernel);
5606 	if (status >= 0)
5607 		return status;
5608 
5609 	FS_CALL(vnode, close_attr_dir, cookie);
5610 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5611 
5612 	return status;
5613 }
5614 
5615 
5616 static int
5617 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5618 	int openMode, int perms, bool kernel)
5619 {
5620 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5621 		"kernel %d\n", name, openMode, perms, kernel));
5622 
5623 	// get directory to put the new file in
5624 	struct vnode* directory;
5625 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5626 	if (status != B_OK)
5627 		return status;
5628 
5629 	status = create_vnode(directory, name, openMode, perms, kernel);
5630 	put_vnode(directory);
5631 
5632 	return status;
5633 }
5634 
5635 
5636 static int
5637 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5638 {
5639 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5640 		openMode, perms, kernel));
5641 
5642 	// get directory to put the new file in
5643 	char name[B_FILE_NAME_LENGTH];
5644 	struct vnode* directory;
5645 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5646 		kernel);
5647 	if (status < 0)
5648 		return status;
5649 
5650 	status = create_vnode(directory, name, openMode, perms, kernel);
5651 
5652 	put_vnode(directory);
5653 	return status;
5654 }
5655 
5656 
5657 static int
5658 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5659 	int openMode, bool kernel)
5660 {
5661 	if (name == NULL || *name == '\0')
5662 		return B_BAD_VALUE;
5663 
5664 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5665 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5666 
5667 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5668 
5669 	// get the vnode matching the entry_ref
5670 	struct vnode* vnode;
5671 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5672 		kernel, &vnode);
5673 	if (status != B_OK)
5674 		return status;
5675 
5676 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5677 		put_vnode(vnode);
5678 		return B_LINK_LIMIT;
5679 	}
5680 
5681 	int newFD = open_vnode(vnode, openMode, kernel);
5682 	if (newFD >= 0) {
5683 		// The vnode reference has been transferred to the FD
5684 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5685 			directoryID, vnode->id, name);
5686 	} else
5687 		put_vnode(vnode);
5688 
5689 	return newFD;
5690 }
5691 
5692 
5693 static int
5694 file_open(int fd, char* path, int openMode, bool kernel)
5695 {
5696 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5697 
5698 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5699 		fd, path, openMode, kernel));
5700 
5701 	// get the vnode matching the vnode + path combination
5702 	struct vnode* vnode;
5703 	ino_t parentID;
5704 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5705 		&parentID, kernel);
5706 	if (status != B_OK)
5707 		return status;
5708 
5709 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5710 		put_vnode(vnode);
5711 		return B_LINK_LIMIT;
5712 	}
5713 
5714 	// open the vnode
5715 	int newFD = open_vnode(vnode, openMode, kernel);
5716 	if (newFD >= 0) {
5717 		// The vnode reference has been transferred to the FD
5718 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5719 			vnode->device, parentID, vnode->id, NULL);
5720 	} else
5721 		put_vnode(vnode);
5722 
5723 	return newFD;
5724 }
5725 
5726 
5727 static status_t
5728 file_close(struct file_descriptor* descriptor)
5729 {
5730 	struct vnode* vnode = descriptor->u.vnode;
5731 	status_t status = B_OK;
5732 
5733 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5734 
5735 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5736 		vnode->id);
5737 	if (HAS_FS_CALL(vnode, close)) {
5738 		status = FS_CALL(vnode, close, descriptor->cookie);
5739 	}
5740 
5741 	if (status == B_OK) {
5742 		// remove all outstanding locks for this team
5743 		if (HAS_FS_CALL(vnode, release_lock))
5744 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5745 		else
5746 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5747 	}
5748 	return status;
5749 }
5750 
5751 
5752 static void
5753 file_free_fd(struct file_descriptor* descriptor)
5754 {
5755 	struct vnode* vnode = descriptor->u.vnode;
5756 
5757 	if (vnode != NULL) {
5758 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5759 		put_vnode(vnode);
5760 	}
5761 }
5762 
5763 
5764 static status_t
5765 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5766 	size_t* length)
5767 {
5768 	struct vnode* vnode = descriptor->u.vnode;
5769 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5770 		pos, length, *length));
5771 
5772 	if (S_ISDIR(vnode->Type()))
5773 		return B_IS_A_DIRECTORY;
5774 
5775 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5776 }
5777 
5778 
5779 static status_t
5780 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5781 	size_t* length)
5782 {
5783 	struct vnode* vnode = descriptor->u.vnode;
5784 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5785 		length));
5786 
5787 	if (S_ISDIR(vnode->Type()))
5788 		return B_IS_A_DIRECTORY;
5789 	if (!HAS_FS_CALL(vnode, write))
5790 		return B_READ_ONLY_DEVICE;
5791 
5792 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5793 }
5794 
5795 
5796 static off_t
5797 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5798 {
5799 	struct vnode* vnode = descriptor->u.vnode;
5800 	off_t offset;
5801 	bool isDevice = false;
5802 
5803 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5804 		seekType));
5805 
5806 	// some kinds of files are not seekable
5807 	switch (vnode->Type() & S_IFMT) {
5808 		case S_IFIFO:
5809 		case S_IFSOCK:
5810 			return ESPIPE;
5811 
5812 		// drivers publish block devices as chr, so pick both
5813 		case S_IFBLK:
5814 		case S_IFCHR:
5815 			isDevice = true;
5816 			break;
5817 		// The Open Group Base Specs don't mention any file types besides pipes,
5818 		// fifos, and sockets specially, so we allow seeking them.
5819 		case S_IFREG:
5820 		case S_IFDIR:
5821 		case S_IFLNK:
5822 			break;
5823 	}
5824 
5825 	switch (seekType) {
5826 		case SEEK_SET:
5827 			offset = 0;
5828 			break;
5829 		case SEEK_CUR:
5830 			offset = descriptor->pos;
5831 			break;
5832 		case SEEK_END:
5833 		{
5834 			// stat() the node
5835 			if (!HAS_FS_CALL(vnode, read_stat))
5836 				return B_UNSUPPORTED;
5837 
5838 			struct stat stat;
5839 			status_t status = FS_CALL(vnode, read_stat, &stat);
5840 			if (status != B_OK)
5841 				return status;
5842 
5843 			offset = stat.st_size;
5844 
5845 			if (offset == 0 && isDevice) {
5846 				// stat() on regular drivers doesn't report size
5847 				device_geometry geometry;
5848 
5849 				if (HAS_FS_CALL(vnode, ioctl)) {
5850 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5851 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5852 					if (status == B_OK)
5853 						offset = (off_t)geometry.bytes_per_sector
5854 							* geometry.sectors_per_track
5855 							* geometry.cylinder_count
5856 							* geometry.head_count;
5857 				}
5858 			}
5859 
5860 			break;
5861 		}
5862 		case SEEK_DATA:
5863 		case SEEK_HOLE:
5864 		{
5865 			status_t status = B_BAD_VALUE;
5866 			if (HAS_FS_CALL(vnode, ioctl)) {
5867 				offset = pos;
5868 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5869 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5870 					&offset, sizeof(offset));
5871 				if (status == B_OK) {
5872 					if (offset > pos)
5873 						offset -= pos;
5874 					break;
5875 				}
5876 			}
5877 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5878 				return status;
5879 
5880 			// basic implementation with stat() the node
5881 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5882 				return B_BAD_VALUE;
5883 
5884 			struct stat stat;
5885 			status = FS_CALL(vnode, read_stat, &stat);
5886 			if (status != B_OK)
5887 				return status;
5888 
5889 			off_t end = stat.st_size;
5890 			if (pos >= end)
5891 				return ENXIO;
5892 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5893 			break;
5894 		}
5895 		default:
5896 			return B_BAD_VALUE;
5897 	}
5898 
5899 	// assumes off_t is 64 bits wide
5900 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5901 		return B_BUFFER_OVERFLOW;
5902 
5903 	pos += offset;
5904 	if (pos < 0)
5905 		return B_BAD_VALUE;
5906 
5907 	return descriptor->pos = pos;
5908 }
5909 
5910 
5911 static status_t
5912 file_select(struct file_descriptor* descriptor, uint8 event,
5913 	struct selectsync* sync)
5914 {
5915 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5916 
5917 	struct vnode* vnode = descriptor->u.vnode;
5918 
5919 	// If the FS has no select() hook, notify select() now.
5920 	if (!HAS_FS_CALL(vnode, select)) {
5921 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5922 			return notify_select_event(sync, event);
5923 		else
5924 			return B_OK;
5925 	}
5926 
5927 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5928 }
5929 
5930 
5931 static status_t
5932 file_deselect(struct file_descriptor* descriptor, uint8 event,
5933 	struct selectsync* sync)
5934 {
5935 	struct vnode* vnode = descriptor->u.vnode;
5936 
5937 	if (!HAS_FS_CALL(vnode, deselect))
5938 		return B_OK;
5939 
5940 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5941 }
5942 
5943 
5944 static status_t
5945 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5946 	bool kernel)
5947 {
5948 	struct vnode* vnode;
5949 	status_t status;
5950 
5951 	if (name == NULL || *name == '\0')
5952 		return B_BAD_VALUE;
5953 
5954 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5955 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5956 
5957 	status = get_vnode(mountID, parentID, &vnode, true, false);
5958 	if (status != B_OK)
5959 		return status;
5960 
5961 	if (HAS_FS_CALL(vnode, create_dir))
5962 		status = FS_CALL(vnode, create_dir, name, perms);
5963 	else
5964 		status = B_READ_ONLY_DEVICE;
5965 
5966 	put_vnode(vnode);
5967 	return status;
5968 }
5969 
5970 
5971 static status_t
5972 dir_create(int fd, char* path, int perms, bool kernel)
5973 {
5974 	char filename[B_FILE_NAME_LENGTH];
5975 	struct vnode* vnode;
5976 	status_t status;
5977 
5978 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5979 		kernel));
5980 
5981 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5982 	if (status < 0)
5983 		return status;
5984 
5985 	if (HAS_FS_CALL(vnode, create_dir)) {
5986 		status = FS_CALL(vnode, create_dir, filename, perms);
5987 	} else
5988 		status = B_READ_ONLY_DEVICE;
5989 
5990 	put_vnode(vnode);
5991 	return status;
5992 }
5993 
5994 
5995 static int
5996 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5997 {
5998 	FUNCTION(("dir_open_entry_ref()\n"));
5999 
6000 	if (name && name[0] == '\0')
6001 		return B_BAD_VALUE;
6002 
6003 	// get the vnode matching the entry_ref/node_ref
6004 	struct vnode* vnode;
6005 	status_t status;
6006 	if (name) {
6007 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
6008 			&vnode);
6009 	} else
6010 		status = get_vnode(mountID, parentID, &vnode, true, false);
6011 	if (status != B_OK)
6012 		return status;
6013 
6014 	int newFD = open_dir_vnode(vnode, kernel);
6015 	if (newFD >= 0) {
6016 		// The vnode reference has been transferred to the FD
6017 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
6018 			vnode->id, name);
6019 	} else
6020 		put_vnode(vnode);
6021 
6022 	return newFD;
6023 }
6024 
6025 
6026 static int
6027 dir_open(int fd, char* path, bool kernel)
6028 {
6029 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
6030 		kernel));
6031 
6032 	// get the vnode matching the vnode + path combination
6033 	struct vnode* vnode = NULL;
6034 	ino_t parentID;
6035 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
6036 		kernel);
6037 	if (status != B_OK)
6038 		return status;
6039 
6040 	// open the dir
6041 	int newFD = open_dir_vnode(vnode, kernel);
6042 	if (newFD >= 0) {
6043 		// The vnode reference has been transferred to the FD
6044 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6045 			parentID, vnode->id, NULL);
6046 	} else
6047 		put_vnode(vnode);
6048 
6049 	return newFD;
6050 }
6051 
6052 
6053 static status_t
6054 dir_close(struct file_descriptor* descriptor)
6055 {
6056 	struct vnode* vnode = descriptor->u.vnode;
6057 
6058 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6059 
6060 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6061 		vnode->id);
6062 	if (HAS_FS_CALL(vnode, close_dir))
6063 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6064 
6065 	return B_OK;
6066 }
6067 
6068 
6069 static void
6070 dir_free_fd(struct file_descriptor* descriptor)
6071 {
6072 	struct vnode* vnode = descriptor->u.vnode;
6073 
6074 	if (vnode != NULL) {
6075 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6076 		put_vnode(vnode);
6077 	}
6078 }
6079 
6080 
6081 static status_t
6082 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6083 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6084 {
6085 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6086 		bufferSize, _count);
6087 }
6088 
6089 
6090 static status_t
6091 fix_dirent(struct vnode* parent, struct dirent* entry,
6092 	struct io_context* ioContext)
6093 {
6094 	// set d_pdev and d_pino
6095 	entry->d_pdev = parent->device;
6096 	entry->d_pino = parent->id;
6097 
6098 	// If this is the ".." entry and the directory covering another vnode,
6099 	// we need to replace d_dev and d_ino with the actual values.
6100 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6101 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6102 			ioContext);
6103 	}
6104 
6105 	// resolve covered vnodes
6106 	ReadLocker _(&sVnodeLock);
6107 
6108 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6109 	if (vnode != NULL && vnode->covered_by != NULL) {
6110 		do {
6111 			vnode = vnode->covered_by;
6112 		} while (vnode->covered_by != NULL);
6113 
6114 		entry->d_dev = vnode->device;
6115 		entry->d_ino = vnode->id;
6116 	}
6117 
6118 	return B_OK;
6119 }
6120 
6121 
6122 static status_t
6123 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6124 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6125 {
6126 	if (!HAS_FS_CALL(vnode, read_dir))
6127 		return B_UNSUPPORTED;
6128 
6129 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6130 		_count);
6131 	if (error != B_OK)
6132 		return error;
6133 
6134 	// we need to adjust the read dirents
6135 	uint32 count = *_count;
6136 	for (uint32 i = 0; i < count; i++) {
6137 		error = fix_dirent(vnode, buffer, ioContext);
6138 		if (error != B_OK)
6139 			return error;
6140 
6141 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6142 	}
6143 
6144 	return error;
6145 }
6146 
6147 
6148 static status_t
6149 dir_rewind(struct file_descriptor* descriptor)
6150 {
6151 	struct vnode* vnode = descriptor->u.vnode;
6152 
6153 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6154 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6155 	}
6156 
6157 	return B_UNSUPPORTED;
6158 }
6159 
6160 
6161 static status_t
6162 dir_remove(int fd, char* path, bool kernel)
6163 {
6164 	char name[B_FILE_NAME_LENGTH];
6165 	struct vnode* directory;
6166 	status_t status;
6167 
6168 	if (path != NULL) {
6169 		// we need to make sure our path name doesn't stop with "/", ".",
6170 		// or ".."
6171 		char* lastSlash;
6172 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6173 			char* leaf = lastSlash + 1;
6174 			if (!strcmp(leaf, ".."))
6175 				return B_NOT_ALLOWED;
6176 
6177 			// omit multiple slashes
6178 			while (lastSlash > path && lastSlash[-1] == '/')
6179 				lastSlash--;
6180 
6181 			if (leaf[0]
6182 				&& strcmp(leaf, ".")) {
6183 				break;
6184 			}
6185 			// "name/" -> "name", or "name/." -> "name"
6186 			lastSlash[0] = '\0';
6187 		}
6188 
6189 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6190 			return B_NOT_ALLOWED;
6191 	}
6192 
6193 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6194 	if (status != B_OK)
6195 		return status;
6196 
6197 	if (HAS_FS_CALL(directory, remove_dir))
6198 		status = FS_CALL(directory, remove_dir, name);
6199 	else
6200 		status = B_READ_ONLY_DEVICE;
6201 
6202 	put_vnode(directory);
6203 	return status;
6204 }
6205 
6206 
6207 static status_t
6208 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6209 	size_t length)
6210 {
6211 	struct vnode* vnode = descriptor->u.vnode;
6212 
6213 	if (HAS_FS_CALL(vnode, ioctl))
6214 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6215 
6216 	return B_DEV_INVALID_IOCTL;
6217 }
6218 
6219 
6220 static status_t
6221 common_fcntl(int fd, int op, size_t argument, bool kernel)
6222 {
6223 	struct flock flock;
6224 
6225 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6226 		fd, op, argument, kernel ? "kernel" : "user"));
6227 
6228 	struct io_context* context = get_current_io_context(kernel);
6229 
6230 	struct file_descriptor* descriptor = get_fd(context, fd);
6231 	if (descriptor == NULL)
6232 		return B_FILE_ERROR;
6233 
6234 	struct vnode* vnode = fd_vnode(descriptor);
6235 
6236 	status_t status = B_OK;
6237 
6238 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6239 		if (descriptor->type != FDTYPE_FILE)
6240 			status = B_BAD_VALUE;
6241 		else if (kernel)
6242 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6243 		else if (user_memcpy(&flock, (struct flock*)argument,
6244 				sizeof(struct flock)) != B_OK)
6245 			status = B_BAD_ADDRESS;
6246 		if (status != B_OK) {
6247 			put_fd(descriptor);
6248 			return status;
6249 		}
6250 	}
6251 
6252 	switch (op) {
6253 		case F_SETFD:
6254 		{
6255 			// Set file descriptor flags
6256 
6257 			// O_CLOEXEC is the only flag available at this time
6258 			mutex_lock(&context->io_mutex);
6259 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6260 			mutex_unlock(&context->io_mutex);
6261 
6262 			status = B_OK;
6263 			break;
6264 		}
6265 
6266 		case F_GETFD:
6267 		{
6268 			// Get file descriptor flags
6269 			mutex_lock(&context->io_mutex);
6270 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6271 			mutex_unlock(&context->io_mutex);
6272 			break;
6273 		}
6274 
6275 		case F_SETFL:
6276 			// Set file descriptor open mode
6277 
6278 			// we only accept changes to O_APPEND and O_NONBLOCK
6279 			argument &= O_APPEND | O_NONBLOCK;
6280 			if (descriptor->ops->fd_set_flags != NULL) {
6281 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6282 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6283 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6284 					(int)argument);
6285 			} else
6286 				status = B_UNSUPPORTED;
6287 
6288 			if (status == B_OK) {
6289 				// update this descriptor's open_mode field
6290 				descriptor->open_mode = (descriptor->open_mode
6291 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6292 			}
6293 
6294 			break;
6295 
6296 		case F_GETFL:
6297 			// Get file descriptor open mode
6298 			status = descriptor->open_mode;
6299 			break;
6300 
6301 		case F_DUPFD:
6302 		case F_DUPFD_CLOEXEC:
6303 		{
6304 			status = new_fd_etc(context, descriptor, (int)argument);
6305 			if (status >= 0) {
6306 				mutex_lock(&context->io_mutex);
6307 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6308 				mutex_unlock(&context->io_mutex);
6309 
6310 				atomic_add(&descriptor->ref_count, 1);
6311 			}
6312 			break;
6313 		}
6314 
6315 		case F_GETLK:
6316 			if (vnode != NULL) {
6317 				struct flock normalizedLock;
6318 
6319 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6320 				status = normalize_flock(descriptor, &normalizedLock);
6321 				if (status != B_OK)
6322 					break;
6323 
6324 				if (HAS_FS_CALL(vnode, test_lock)) {
6325 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6326 						&normalizedLock);
6327 				} else
6328 					status = test_advisory_lock(vnode, &normalizedLock);
6329 				if (status == B_OK) {
6330 					if (normalizedLock.l_type == F_UNLCK) {
6331 						// no conflicting lock found, copy back the same struct
6332 						// we were given except change type to F_UNLCK
6333 						flock.l_type = F_UNLCK;
6334 						if (kernel) {
6335 							memcpy((struct flock*)argument, &flock,
6336 								sizeof(struct flock));
6337 						} else {
6338 							status = user_memcpy((struct flock*)argument,
6339 								&flock, sizeof(struct flock));
6340 						}
6341 					} else {
6342 						// a conflicting lock was found, copy back its range and
6343 						// type
6344 						if (normalizedLock.l_len == OFF_MAX)
6345 							normalizedLock.l_len = 0;
6346 
6347 						if (kernel) {
6348 							memcpy((struct flock*)argument,
6349 								&normalizedLock, sizeof(struct flock));
6350 						} else {
6351 							status = user_memcpy((struct flock*)argument,
6352 								&normalizedLock, sizeof(struct flock));
6353 						}
6354 					}
6355 				}
6356 			} else
6357 				status = B_BAD_VALUE;
6358 			break;
6359 
6360 		case F_SETLK:
6361 		case F_SETLKW:
6362 			status = normalize_flock(descriptor, &flock);
6363 			if (status != B_OK)
6364 				break;
6365 
6366 			if (vnode == NULL) {
6367 				status = B_BAD_VALUE;
6368 			} else if (flock.l_type == F_UNLCK) {
6369 				if (HAS_FS_CALL(vnode, release_lock)) {
6370 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6371 						&flock);
6372 				} else {
6373 					status = release_advisory_lock(vnode, context, NULL,
6374 						&flock);
6375 				}
6376 			} else {
6377 				// the open mode must match the lock type
6378 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6379 						&& flock.l_type == F_WRLCK)
6380 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6381 						&& flock.l_type == F_RDLCK))
6382 					status = B_FILE_ERROR;
6383 				else {
6384 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6385 						status = FS_CALL(vnode, acquire_lock,
6386 							descriptor->cookie, &flock, op == F_SETLKW);
6387 					} else {
6388 						status = acquire_advisory_lock(vnode, context, NULL,
6389 							&flock, op == F_SETLKW);
6390 					}
6391 				}
6392 			}
6393 			break;
6394 
6395 		// ToDo: add support for more ops?
6396 
6397 		default:
6398 			status = B_BAD_VALUE;
6399 	}
6400 
6401 	put_fd(descriptor);
6402 	return status;
6403 }
6404 
6405 
6406 static status_t
6407 common_sync(int fd, bool kernel)
6408 {
6409 	struct file_descriptor* descriptor;
6410 	struct vnode* vnode;
6411 	status_t status;
6412 
6413 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6414 
6415 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6416 	if (descriptor == NULL)
6417 		return B_FILE_ERROR;
6418 
6419 	if (HAS_FS_CALL(vnode, fsync))
6420 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6421 	else
6422 		status = B_UNSUPPORTED;
6423 
6424 	put_fd(descriptor);
6425 	return status;
6426 }
6427 
6428 
6429 static status_t
6430 common_lock_node(int fd, bool kernel)
6431 {
6432 	struct file_descriptor* descriptor;
6433 	struct vnode* vnode;
6434 
6435 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6436 	if (descriptor == NULL)
6437 		return B_FILE_ERROR;
6438 
6439 	status_t status = B_OK;
6440 
6441 	// We need to set the locking atomically - someone
6442 	// else might set one at the same time
6443 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6444 			(file_descriptor*)NULL) != NULL)
6445 		status = B_BUSY;
6446 
6447 	put_fd(descriptor);
6448 	return status;
6449 }
6450 
6451 
6452 static status_t
6453 common_unlock_node(int fd, bool kernel)
6454 {
6455 	struct file_descriptor* descriptor;
6456 	struct vnode* vnode;
6457 
6458 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6459 	if (descriptor == NULL)
6460 		return B_FILE_ERROR;
6461 
6462 	status_t status = B_OK;
6463 
6464 	// We need to set the locking atomically - someone
6465 	// else might set one at the same time
6466 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6467 			(file_descriptor*)NULL, descriptor) != descriptor)
6468 		status = B_BAD_VALUE;
6469 
6470 	put_fd(descriptor);
6471 	return status;
6472 }
6473 
6474 
6475 static status_t
6476 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6477 {
6478 	struct file_descriptor* descriptor;
6479 	struct vnode* vnode;
6480 
6481 	if (offset < 0 || length == 0)
6482 		return B_BAD_VALUE;
6483 	if (offset > OFF_MAX - length)
6484 		return B_FILE_TOO_LARGE;
6485 
6486 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6487 	if (descriptor == NULL || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6488 		return B_FILE_ERROR;
6489 
6490 	switch (vnode->Type() & S_IFMT) {
6491 		case S_IFIFO:
6492 		case S_IFSOCK:
6493 			return ESPIPE;
6494 
6495 		case S_IFBLK:
6496 		case S_IFCHR:
6497 		case S_IFDIR:
6498 		case S_IFLNK:
6499 			return B_DEVICE_NOT_FOUND;
6500 
6501 		case S_IFREG:
6502 			break;
6503 	}
6504 
6505 	status_t status = B_OK;
6506 	if (HAS_FS_CALL(vnode, preallocate)) {
6507 		status = FS_CALL(vnode, preallocate, offset, length);
6508 	} else {
6509 		status = HAS_FS_CALL(vnode, write)
6510 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6511 	}
6512 
6513 	return status;
6514 }
6515 
6516 
6517 static status_t
6518 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6519 	bool kernel)
6520 {
6521 	struct vnode* vnode;
6522 	status_t status;
6523 
6524 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6525 	if (status != B_OK)
6526 		return status;
6527 
6528 	if (HAS_FS_CALL(vnode, read_symlink)) {
6529 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6530 	} else
6531 		status = B_BAD_VALUE;
6532 
6533 	put_vnode(vnode);
6534 	return status;
6535 }
6536 
6537 
6538 static status_t
6539 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6540 	bool kernel)
6541 {
6542 	// path validity checks have to be in the calling function!
6543 	char name[B_FILE_NAME_LENGTH];
6544 	struct vnode* vnode;
6545 	status_t status;
6546 
6547 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6548 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6549 
6550 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6551 	if (status != B_OK)
6552 		return status;
6553 
6554 	if (HAS_FS_CALL(vnode, create_symlink))
6555 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6556 	else {
6557 		status = HAS_FS_CALL(vnode, write)
6558 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6559 	}
6560 
6561 	put_vnode(vnode);
6562 
6563 	return status;
6564 }
6565 
6566 
6567 static status_t
6568 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6569 	bool traverseLeafLink, bool kernel)
6570 {
6571 	// path validity checks have to be in the calling function!
6572 
6573 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6574 		toPath, kernel));
6575 
6576 	char name[B_FILE_NAME_LENGTH];
6577 	struct vnode* directory;
6578 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6579 		kernel);
6580 	if (status != B_OK)
6581 		return status;
6582 
6583 	struct vnode* vnode;
6584 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6585 		kernel);
6586 	if (status != B_OK)
6587 		goto err;
6588 
6589 	if (directory->mount != vnode->mount) {
6590 		status = B_CROSS_DEVICE_LINK;
6591 		goto err1;
6592 	}
6593 
6594 	if (HAS_FS_CALL(directory, link))
6595 		status = FS_CALL(directory, link, name, vnode);
6596 	else
6597 		status = B_READ_ONLY_DEVICE;
6598 
6599 err1:
6600 	put_vnode(vnode);
6601 err:
6602 	put_vnode(directory);
6603 
6604 	return status;
6605 }
6606 
6607 
6608 static status_t
6609 common_unlink(int fd, char* path, bool kernel)
6610 {
6611 	char filename[B_FILE_NAME_LENGTH];
6612 	struct vnode* vnode;
6613 	status_t status;
6614 
6615 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6616 		kernel));
6617 
6618 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6619 	if (status < 0)
6620 		return status;
6621 
6622 	if (HAS_FS_CALL(vnode, unlink))
6623 		status = FS_CALL(vnode, unlink, filename);
6624 	else
6625 		status = B_READ_ONLY_DEVICE;
6626 
6627 	put_vnode(vnode);
6628 
6629 	return status;
6630 }
6631 
6632 
6633 static status_t
6634 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6635 {
6636 	struct vnode* vnode;
6637 	status_t status;
6638 
6639 	// TODO: honor effectiveUserGroup argument
6640 
6641 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6642 	if (status != B_OK)
6643 		return status;
6644 
6645 	if (HAS_FS_CALL(vnode, access))
6646 		status = FS_CALL(vnode, access, mode);
6647 	else
6648 		status = B_OK;
6649 
6650 	put_vnode(vnode);
6651 
6652 	return status;
6653 }
6654 
6655 
6656 static status_t
6657 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6658 {
6659 	struct vnode* fromVnode;
6660 	struct vnode* toVnode;
6661 	char fromName[B_FILE_NAME_LENGTH];
6662 	char toName[B_FILE_NAME_LENGTH];
6663 	status_t status;
6664 
6665 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6666 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6667 
6668 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6669 	if (status != B_OK)
6670 		return status;
6671 
6672 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6673 	if (status != B_OK)
6674 		goto err1;
6675 
6676 	if (fromVnode->device != toVnode->device) {
6677 		status = B_CROSS_DEVICE_LINK;
6678 		goto err2;
6679 	}
6680 
6681 	if (fromName[0] == '\0' || toName[0] == '\0'
6682 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6683 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6684 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6685 		status = B_BAD_VALUE;
6686 		goto err2;
6687 	}
6688 
6689 	if (HAS_FS_CALL(fromVnode, rename))
6690 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6691 	else
6692 		status = B_READ_ONLY_DEVICE;
6693 
6694 err2:
6695 	put_vnode(toVnode);
6696 err1:
6697 	put_vnode(fromVnode);
6698 
6699 	return status;
6700 }
6701 
6702 
6703 static status_t
6704 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6705 {
6706 	struct vnode* vnode = descriptor->u.vnode;
6707 
6708 	FUNCTION(("common_read_stat: stat %p\n", stat));
6709 
6710 	// TODO: remove this once all file systems properly set them!
6711 	stat->st_crtim.tv_nsec = 0;
6712 	stat->st_ctim.tv_nsec = 0;
6713 	stat->st_mtim.tv_nsec = 0;
6714 	stat->st_atim.tv_nsec = 0;
6715 
6716 	return vfs_stat_vnode(vnode, stat);
6717 }
6718 
6719 
6720 static status_t
6721 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6722 	int statMask)
6723 {
6724 	struct vnode* vnode = descriptor->u.vnode;
6725 
6726 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6727 		vnode, stat, statMask));
6728 
6729 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6730 		&& (statMask & B_STAT_SIZE) != 0) {
6731 		return B_BAD_VALUE;
6732 	}
6733 
6734 	if (!HAS_FS_CALL(vnode, write_stat))
6735 		return B_READ_ONLY_DEVICE;
6736 
6737 	return FS_CALL(vnode, write_stat, stat, statMask);
6738 }
6739 
6740 
6741 static status_t
6742 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6743 	struct stat* stat, bool kernel)
6744 {
6745 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6746 		stat));
6747 
6748 	struct vnode* vnode;
6749 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6750 		NULL, kernel);
6751 	if (status != B_OK)
6752 		return status;
6753 
6754 	status = vfs_stat_vnode(vnode, stat);
6755 
6756 	put_vnode(vnode);
6757 	return status;
6758 }
6759 
6760 
6761 static status_t
6762 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6763 	const struct stat* stat, int statMask, bool kernel)
6764 {
6765 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6766 		"kernel %d\n", fd, path, stat, statMask, kernel));
6767 
6768 	struct vnode* vnode;
6769 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6770 		NULL, kernel);
6771 	if (status != B_OK)
6772 		return status;
6773 
6774 	if (HAS_FS_CALL(vnode, write_stat))
6775 		status = FS_CALL(vnode, write_stat, stat, statMask);
6776 	else
6777 		status = B_READ_ONLY_DEVICE;
6778 
6779 	put_vnode(vnode);
6780 
6781 	return status;
6782 }
6783 
6784 
6785 static int
6786 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6787 {
6788 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6789 		kernel));
6790 
6791 	struct vnode* vnode;
6792 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6793 		NULL, kernel);
6794 	if (status != B_OK)
6795 		return status;
6796 
6797 	status = open_attr_dir_vnode(vnode, kernel);
6798 	if (status < 0)
6799 		put_vnode(vnode);
6800 
6801 	return status;
6802 }
6803 
6804 
6805 static status_t
6806 attr_dir_close(struct file_descriptor* descriptor)
6807 {
6808 	struct vnode* vnode = descriptor->u.vnode;
6809 
6810 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6811 
6812 	if (HAS_FS_CALL(vnode, close_attr_dir))
6813 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6814 
6815 	return B_OK;
6816 }
6817 
6818 
6819 static void
6820 attr_dir_free_fd(struct file_descriptor* descriptor)
6821 {
6822 	struct vnode* vnode = descriptor->u.vnode;
6823 
6824 	if (vnode != NULL) {
6825 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6826 		put_vnode(vnode);
6827 	}
6828 }
6829 
6830 
6831 static status_t
6832 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6833 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6834 {
6835 	struct vnode* vnode = descriptor->u.vnode;
6836 
6837 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6838 
6839 	if (HAS_FS_CALL(vnode, read_attr_dir))
6840 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6841 			bufferSize, _count);
6842 
6843 	return B_UNSUPPORTED;
6844 }
6845 
6846 
6847 static status_t
6848 attr_dir_rewind(struct file_descriptor* descriptor)
6849 {
6850 	struct vnode* vnode = descriptor->u.vnode;
6851 
6852 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6853 
6854 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6855 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6856 
6857 	return B_UNSUPPORTED;
6858 }
6859 
6860 
6861 static int
6862 attr_create(int fd, char* path, const char* name, uint32 type,
6863 	int openMode, bool kernel)
6864 {
6865 	if (name == NULL || *name == '\0')
6866 		return B_BAD_VALUE;
6867 
6868 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6869 	struct vnode* vnode;
6870 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6871 		kernel);
6872 	if (status != B_OK)
6873 		return status;
6874 
6875 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6876 		status = B_LINK_LIMIT;
6877 		goto err;
6878 	}
6879 
6880 	if (!HAS_FS_CALL(vnode, create_attr)) {
6881 		status = B_READ_ONLY_DEVICE;
6882 		goto err;
6883 	}
6884 
6885 	void* cookie;
6886 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6887 	if (status != B_OK)
6888 		goto err;
6889 
6890 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6891 	if (fd >= 0)
6892 		return fd;
6893 
6894 	status = fd;
6895 
6896 	FS_CALL(vnode, close_attr, cookie);
6897 	FS_CALL(vnode, free_attr_cookie, cookie);
6898 
6899 	FS_CALL(vnode, remove_attr, name);
6900 
6901 err:
6902 	put_vnode(vnode);
6903 
6904 	return status;
6905 }
6906 
6907 
6908 static int
6909 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6910 {
6911 	if (name == NULL || *name == '\0')
6912 		return B_BAD_VALUE;
6913 
6914 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6915 	struct vnode* vnode;
6916 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6917 		kernel);
6918 	if (status != B_OK)
6919 		return status;
6920 
6921 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6922 		status = B_LINK_LIMIT;
6923 		goto err;
6924 	}
6925 
6926 	if (!HAS_FS_CALL(vnode, open_attr)) {
6927 		status = B_UNSUPPORTED;
6928 		goto err;
6929 	}
6930 
6931 	void* cookie;
6932 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6933 	if (status != B_OK)
6934 		goto err;
6935 
6936 	// now we only need a file descriptor for this attribute and we're done
6937 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6938 	if (fd >= 0)
6939 		return fd;
6940 
6941 	status = fd;
6942 
6943 	FS_CALL(vnode, close_attr, cookie);
6944 	FS_CALL(vnode, free_attr_cookie, cookie);
6945 
6946 err:
6947 	put_vnode(vnode);
6948 
6949 	return status;
6950 }
6951 
6952 
6953 static status_t
6954 attr_close(struct file_descriptor* descriptor)
6955 {
6956 	struct vnode* vnode = descriptor->u.vnode;
6957 
6958 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6959 
6960 	if (HAS_FS_CALL(vnode, close_attr))
6961 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6962 
6963 	return B_OK;
6964 }
6965 
6966 
6967 static void
6968 attr_free_fd(struct file_descriptor* descriptor)
6969 {
6970 	struct vnode* vnode = descriptor->u.vnode;
6971 
6972 	if (vnode != NULL) {
6973 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6974 		put_vnode(vnode);
6975 	}
6976 }
6977 
6978 
6979 static status_t
6980 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6981 	size_t* length)
6982 {
6983 	struct vnode* vnode = descriptor->u.vnode;
6984 
6985 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6986 		pos, length, *length));
6987 
6988 	if (!HAS_FS_CALL(vnode, read_attr))
6989 		return B_UNSUPPORTED;
6990 
6991 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6992 }
6993 
6994 
6995 static status_t
6996 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6997 	size_t* length)
6998 {
6999 	struct vnode* vnode = descriptor->u.vnode;
7000 
7001 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
7002 		length));
7003 
7004 	if (!HAS_FS_CALL(vnode, write_attr))
7005 		return B_UNSUPPORTED;
7006 
7007 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
7008 }
7009 
7010 
7011 static off_t
7012 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
7013 {
7014 	off_t offset;
7015 
7016 	switch (seekType) {
7017 		case SEEK_SET:
7018 			offset = 0;
7019 			break;
7020 		case SEEK_CUR:
7021 			offset = descriptor->pos;
7022 			break;
7023 		case SEEK_END:
7024 		{
7025 			struct vnode* vnode = descriptor->u.vnode;
7026 			if (!HAS_FS_CALL(vnode, read_stat))
7027 				return B_UNSUPPORTED;
7028 
7029 			struct stat stat;
7030 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
7031 				&stat);
7032 			if (status != B_OK)
7033 				return status;
7034 
7035 			offset = stat.st_size;
7036 			break;
7037 		}
7038 		default:
7039 			return B_BAD_VALUE;
7040 	}
7041 
7042 	// assumes off_t is 64 bits wide
7043 	if (offset > 0 && LONGLONG_MAX - offset < pos)
7044 		return B_BUFFER_OVERFLOW;
7045 
7046 	pos += offset;
7047 	if (pos < 0)
7048 		return B_BAD_VALUE;
7049 
7050 	return descriptor->pos = pos;
7051 }
7052 
7053 
7054 static status_t
7055 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7056 {
7057 	struct vnode* vnode = descriptor->u.vnode;
7058 
7059 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7060 
7061 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7062 		return B_UNSUPPORTED;
7063 
7064 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7065 }
7066 
7067 
7068 static status_t
7069 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7070 	int statMask)
7071 {
7072 	struct vnode* vnode = descriptor->u.vnode;
7073 
7074 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7075 
7076 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7077 		return B_READ_ONLY_DEVICE;
7078 
7079 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7080 }
7081 
7082 
7083 static status_t
7084 attr_remove(int fd, const char* name, bool kernel)
7085 {
7086 	struct file_descriptor* descriptor;
7087 	struct vnode* vnode;
7088 	status_t status;
7089 
7090 	if (name == NULL || *name == '\0')
7091 		return B_BAD_VALUE;
7092 
7093 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7094 		kernel));
7095 
7096 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
7097 	if (descriptor == NULL)
7098 		return B_FILE_ERROR;
7099 
7100 	if (HAS_FS_CALL(vnode, remove_attr))
7101 		status = FS_CALL(vnode, remove_attr, name);
7102 	else
7103 		status = B_READ_ONLY_DEVICE;
7104 
7105 	put_fd(descriptor);
7106 
7107 	return status;
7108 }
7109 
7110 
7111 static status_t
7112 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7113 	bool kernel)
7114 {
7115 	struct file_descriptor* fromDescriptor;
7116 	struct file_descriptor* toDescriptor;
7117 	struct vnode* fromVnode;
7118 	struct vnode* toVnode;
7119 	status_t status;
7120 
7121 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7122 		|| *toName == '\0')
7123 		return B_BAD_VALUE;
7124 
7125 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7126 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7127 
7128 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
7129 	if (fromDescriptor == NULL)
7130 		return B_FILE_ERROR;
7131 
7132 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
7133 	if (toDescriptor == NULL) {
7134 		status = B_FILE_ERROR;
7135 		goto err;
7136 	}
7137 
7138 	// are the files on the same volume?
7139 	if (fromVnode->device != toVnode->device) {
7140 		status = B_CROSS_DEVICE_LINK;
7141 		goto err1;
7142 	}
7143 
7144 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7145 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7146 	} else
7147 		status = B_READ_ONLY_DEVICE;
7148 
7149 err1:
7150 	put_fd(toDescriptor);
7151 err:
7152 	put_fd(fromDescriptor);
7153 
7154 	return status;
7155 }
7156 
7157 
7158 static int
7159 index_dir_open(dev_t mountID, bool kernel)
7160 {
7161 	struct fs_mount* mount;
7162 	void* cookie;
7163 
7164 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7165 		kernel));
7166 
7167 	status_t status = get_mount(mountID, &mount);
7168 	if (status != B_OK)
7169 		return status;
7170 
7171 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7172 		status = B_UNSUPPORTED;
7173 		goto error;
7174 	}
7175 
7176 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7177 	if (status != B_OK)
7178 		goto error;
7179 
7180 	// get fd for the index directory
7181 	int fd;
7182 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7183 	if (fd >= 0)
7184 		return fd;
7185 
7186 	// something went wrong
7187 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7188 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7189 
7190 	status = fd;
7191 
7192 error:
7193 	put_mount(mount);
7194 	return status;
7195 }
7196 
7197 
7198 static status_t
7199 index_dir_close(struct file_descriptor* descriptor)
7200 {
7201 	struct fs_mount* mount = descriptor->u.mount;
7202 
7203 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7204 
7205 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7206 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7207 
7208 	return B_OK;
7209 }
7210 
7211 
7212 static void
7213 index_dir_free_fd(struct file_descriptor* descriptor)
7214 {
7215 	struct fs_mount* mount = descriptor->u.mount;
7216 
7217 	if (mount != NULL) {
7218 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7219 		put_mount(mount);
7220 	}
7221 }
7222 
7223 
7224 static status_t
7225 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7226 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7227 {
7228 	struct fs_mount* mount = descriptor->u.mount;
7229 
7230 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7231 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7232 			bufferSize, _count);
7233 	}
7234 
7235 	return B_UNSUPPORTED;
7236 }
7237 
7238 
7239 static status_t
7240 index_dir_rewind(struct file_descriptor* descriptor)
7241 {
7242 	struct fs_mount* mount = descriptor->u.mount;
7243 
7244 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7245 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7246 
7247 	return B_UNSUPPORTED;
7248 }
7249 
7250 
7251 static status_t
7252 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7253 	bool kernel)
7254 {
7255 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7256 		mountID, name, kernel));
7257 
7258 	struct fs_mount* mount;
7259 	status_t status = get_mount(mountID, &mount);
7260 	if (status != B_OK)
7261 		return status;
7262 
7263 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7264 		status = B_READ_ONLY_DEVICE;
7265 		goto out;
7266 	}
7267 
7268 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7269 
7270 out:
7271 	put_mount(mount);
7272 	return status;
7273 }
7274 
7275 
7276 #if 0
7277 static status_t
7278 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7279 {
7280 	struct vnode* vnode = descriptor->u.vnode;
7281 
7282 	// ToDo: currently unused!
7283 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7284 	if (!HAS_FS_CALL(vnode, read_index_stat))
7285 		return B_UNSUPPORTED;
7286 
7287 	return B_UNSUPPORTED;
7288 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7289 }
7290 
7291 
7292 static void
7293 index_free_fd(struct file_descriptor* descriptor)
7294 {
7295 	struct vnode* vnode = descriptor->u.vnode;
7296 
7297 	if (vnode != NULL) {
7298 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7299 		put_vnode(vnode);
7300 	}
7301 }
7302 #endif
7303 
7304 
7305 static status_t
7306 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7307 	bool kernel)
7308 {
7309 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7310 		mountID, name, kernel));
7311 
7312 	struct fs_mount* mount;
7313 	status_t status = get_mount(mountID, &mount);
7314 	if (status != B_OK)
7315 		return status;
7316 
7317 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7318 		status = B_UNSUPPORTED;
7319 		goto out;
7320 	}
7321 
7322 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7323 
7324 out:
7325 	put_mount(mount);
7326 	return status;
7327 }
7328 
7329 
7330 static status_t
7331 index_remove(dev_t mountID, const char* name, bool kernel)
7332 {
7333 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7334 		mountID, name, kernel));
7335 
7336 	struct fs_mount* mount;
7337 	status_t status = get_mount(mountID, &mount);
7338 	if (status != B_OK)
7339 		return status;
7340 
7341 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7342 		status = B_READ_ONLY_DEVICE;
7343 		goto out;
7344 	}
7345 
7346 	status = FS_MOUNT_CALL(mount, remove_index, name);
7347 
7348 out:
7349 	put_mount(mount);
7350 	return status;
7351 }
7352 
7353 
7354 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7355 		It would be nice if the FS would find some more kernel support
7356 		for them.
7357 		For example, query parsing should be moved into the kernel.
7358 */
7359 static int
7360 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7361 	int32 token, bool kernel)
7362 {
7363 	struct fs_mount* mount;
7364 	void* cookie;
7365 
7366 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7367 		device, query, kernel));
7368 
7369 	status_t status = get_mount(device, &mount);
7370 	if (status != B_OK)
7371 		return status;
7372 
7373 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7374 		status = B_UNSUPPORTED;
7375 		goto error;
7376 	}
7377 
7378 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7379 		&cookie);
7380 	if (status != B_OK)
7381 		goto error;
7382 
7383 	// get fd for the index directory
7384 	int fd;
7385 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7386 	if (fd >= 0)
7387 		return fd;
7388 
7389 	status = fd;
7390 
7391 	// something went wrong
7392 	FS_MOUNT_CALL(mount, close_query, cookie);
7393 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7394 
7395 error:
7396 	put_mount(mount);
7397 	return status;
7398 }
7399 
7400 
7401 static status_t
7402 query_close(struct file_descriptor* descriptor)
7403 {
7404 	struct fs_mount* mount = descriptor->u.mount;
7405 
7406 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7407 
7408 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7409 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7410 
7411 	return B_OK;
7412 }
7413 
7414 
7415 static void
7416 query_free_fd(struct file_descriptor* descriptor)
7417 {
7418 	struct fs_mount* mount = descriptor->u.mount;
7419 
7420 	if (mount != NULL) {
7421 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7422 		put_mount(mount);
7423 	}
7424 }
7425 
7426 
7427 static status_t
7428 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7429 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7430 {
7431 	struct fs_mount* mount = descriptor->u.mount;
7432 
7433 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7434 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7435 			bufferSize, _count);
7436 	}
7437 
7438 	return B_UNSUPPORTED;
7439 }
7440 
7441 
7442 static status_t
7443 query_rewind(struct file_descriptor* descriptor)
7444 {
7445 	struct fs_mount* mount = descriptor->u.mount;
7446 
7447 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7448 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7449 
7450 	return B_UNSUPPORTED;
7451 }
7452 
7453 
7454 //	#pragma mark - General File System functions
7455 
7456 
7457 static dev_t
7458 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7459 	const char* args, bool kernel)
7460 {
7461 	struct ::fs_mount* mount;
7462 	status_t status = B_OK;
7463 	fs_volume* volume = NULL;
7464 	int32 layer = 0;
7465 	Vnode* coveredNode = NULL;
7466 
7467 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7468 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7469 
7470 	// The path is always safe, we just have to make sure that fsName is
7471 	// almost valid - we can't make any assumptions about args, though.
7472 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7473 	// We'll get it from the DDM later.
7474 	if (fsName == NULL) {
7475 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7476 			return B_BAD_VALUE;
7477 	} else if (fsName[0] == '\0')
7478 		return B_BAD_VALUE;
7479 
7480 	RecursiveLocker mountOpLocker(sMountOpLock);
7481 
7482 	// Helper to delete a newly created file device on failure.
7483 	// Not exactly beautiful, but helps to keep the code below cleaner.
7484 	struct FileDeviceDeleter {
7485 		FileDeviceDeleter() : id(-1) {}
7486 		~FileDeviceDeleter()
7487 		{
7488 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7489 		}
7490 
7491 		partition_id id;
7492 	} fileDeviceDeleter;
7493 
7494 	// If the file system is not a "virtual" one, the device argument should
7495 	// point to a real file/device (if given at all).
7496 	// get the partition
7497 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7498 	KPartition* partition = NULL;
7499 	KPath normalizedDevice;
7500 	bool newlyCreatedFileDevice = false;
7501 
7502 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7503 		// normalize the device path
7504 		status = normalizedDevice.SetTo(device, true);
7505 		if (status != B_OK)
7506 			return status;
7507 
7508 		// get a corresponding partition from the DDM
7509 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7510 		if (partition == NULL) {
7511 			// Partition not found: This either means, the user supplied
7512 			// an invalid path, or the path refers to an image file. We try
7513 			// to let the DDM create a file device for the path.
7514 			partition_id deviceID = ddm->CreateFileDevice(
7515 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7516 			if (deviceID >= 0) {
7517 				partition = ddm->RegisterPartition(deviceID);
7518 				if (newlyCreatedFileDevice)
7519 					fileDeviceDeleter.id = deviceID;
7520 			}
7521 		}
7522 
7523 		if (!partition) {
7524 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7525 				normalizedDevice.Path()));
7526 			return B_ENTRY_NOT_FOUND;
7527 		}
7528 
7529 		device = normalizedDevice.Path();
7530 			// correct path to file device
7531 	}
7532 	PartitionRegistrar partitionRegistrar(partition, true);
7533 
7534 	// Write lock the partition's device. For the time being, we keep the lock
7535 	// until we're done mounting -- not nice, but ensure, that no-one is
7536 	// interfering.
7537 	// TODO: Just mark the partition busy while mounting!
7538 	KDiskDevice* diskDevice = NULL;
7539 	if (partition) {
7540 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7541 		if (!diskDevice) {
7542 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7543 			return B_ERROR;
7544 		}
7545 	}
7546 
7547 	DeviceWriteLocker writeLocker(diskDevice, true);
7548 		// this takes over the write lock acquired before
7549 
7550 	if (partition != NULL) {
7551 		// make sure, that the partition is not busy
7552 		if (partition->IsBusy()) {
7553 			TRACE(("fs_mount(): Partition is busy.\n"));
7554 			return B_BUSY;
7555 		}
7556 
7557 		// if no FS name had been supplied, we get it from the partition
7558 		if (fsName == NULL) {
7559 			KDiskSystem* diskSystem = partition->DiskSystem();
7560 			if (!diskSystem) {
7561 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7562 					"recognize it.\n"));
7563 				return B_BAD_VALUE;
7564 			}
7565 
7566 			if (!diskSystem->IsFileSystem()) {
7567 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7568 					"partitioning system.\n"));
7569 				return B_BAD_VALUE;
7570 			}
7571 
7572 			// The disk system name will not change, and the KDiskSystem
7573 			// object will not go away while the disk device is locked (and
7574 			// the partition has a reference to it), so this is safe.
7575 			fsName = diskSystem->Name();
7576 		}
7577 	}
7578 
7579 	mount = new(std::nothrow) (struct ::fs_mount);
7580 	if (mount == NULL)
7581 		return B_NO_MEMORY;
7582 
7583 	mount->device_name = strdup(device);
7584 		// "device" can be NULL
7585 
7586 	status = mount->entry_cache.Init();
7587 	if (status != B_OK)
7588 		goto err1;
7589 
7590 	// initialize structure
7591 	mount->id = sNextMountID++;
7592 	mount->partition = NULL;
7593 	mount->root_vnode = NULL;
7594 	mount->covers_vnode = NULL;
7595 	mount->unmounting = false;
7596 	mount->owns_file_device = false;
7597 	mount->volume = NULL;
7598 
7599 	// build up the volume(s)
7600 	while (true) {
7601 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7602 		if (layerFSName == NULL) {
7603 			if (layer == 0) {
7604 				status = B_NO_MEMORY;
7605 				goto err1;
7606 			}
7607 
7608 			break;
7609 		}
7610 		MemoryDeleter layerFSNameDeleter(layerFSName);
7611 
7612 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7613 		if (volume == NULL) {
7614 			status = B_NO_MEMORY;
7615 			goto err1;
7616 		}
7617 
7618 		volume->id = mount->id;
7619 		volume->partition = partition != NULL ? partition->ID() : -1;
7620 		volume->layer = layer++;
7621 		volume->private_volume = NULL;
7622 		volume->ops = NULL;
7623 		volume->sub_volume = NULL;
7624 		volume->super_volume = NULL;
7625 		volume->file_system = NULL;
7626 		volume->file_system_name = NULL;
7627 
7628 		volume->file_system_name = get_file_system_name(layerFSName);
7629 		if (volume->file_system_name == NULL) {
7630 			status = B_NO_MEMORY;
7631 			free(volume);
7632 			goto err1;
7633 		}
7634 
7635 		volume->file_system = get_file_system(layerFSName);
7636 		if (volume->file_system == NULL) {
7637 			status = B_DEVICE_NOT_FOUND;
7638 			free(volume->file_system_name);
7639 			free(volume);
7640 			goto err1;
7641 		}
7642 
7643 		if (mount->volume == NULL)
7644 			mount->volume = volume;
7645 		else {
7646 			volume->super_volume = mount->volume;
7647 			mount->volume->sub_volume = volume;
7648 			mount->volume = volume;
7649 		}
7650 	}
7651 
7652 	// insert mount struct into list before we call FS's mount() function
7653 	// so that vnodes can be created for this mount
7654 	rw_lock_write_lock(&sMountLock);
7655 	sMountsTable->Insert(mount);
7656 	rw_lock_write_unlock(&sMountLock);
7657 
7658 	ino_t rootID;
7659 
7660 	if (!sRoot) {
7661 		// we haven't mounted anything yet
7662 		if (strcmp(path, "/") != 0) {
7663 			status = B_ERROR;
7664 			goto err2;
7665 		}
7666 
7667 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7668 			args, &rootID);
7669 		if (status != B_OK || mount->volume->ops == NULL)
7670 			goto err2;
7671 	} else {
7672 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7673 		if (status != B_OK)
7674 			goto err2;
7675 
7676 		mount->covers_vnode = coveredNode;
7677 
7678 		// make sure covered_vnode is a directory
7679 		if (!S_ISDIR(coveredNode->Type())) {
7680 			status = B_NOT_A_DIRECTORY;
7681 			goto err3;
7682 		}
7683 
7684 		if (coveredNode->IsCovered()) {
7685 			// this is already a covered vnode
7686 			status = B_BUSY;
7687 			goto err3;
7688 		}
7689 
7690 		// mount it/them
7691 		fs_volume* volume = mount->volume;
7692 		while (volume) {
7693 			status = volume->file_system->mount(volume, device, flags, args,
7694 				&rootID);
7695 			if (status != B_OK || volume->ops == NULL) {
7696 				if (status == B_OK && volume->ops == NULL)
7697 					panic("fs_mount: mount() succeeded but ops is NULL!");
7698 				if (volume->sub_volume)
7699 					goto err4;
7700 				goto err3;
7701 			}
7702 
7703 			volume = volume->super_volume;
7704 		}
7705 
7706 		volume = mount->volume;
7707 		while (volume) {
7708 			if (volume->ops->all_layers_mounted != NULL)
7709 				volume->ops->all_layers_mounted(volume);
7710 			volume = volume->super_volume;
7711 		}
7712 	}
7713 
7714 	// the root node is supposed to be owned by the file system - it must
7715 	// exist at this point
7716 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7717 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7718 		panic("fs_mount: file system does not own its root node!\n");
7719 		status = B_ERROR;
7720 		goto err4;
7721 	}
7722 
7723 	// set up the links between the root vnode and the vnode it covers
7724 	rw_lock_write_lock(&sVnodeLock);
7725 	if (coveredNode != NULL) {
7726 		if (coveredNode->IsCovered()) {
7727 			// the vnode is covered now
7728 			status = B_BUSY;
7729 			rw_lock_write_unlock(&sVnodeLock);
7730 			goto err4;
7731 		}
7732 
7733 		mount->root_vnode->covers = coveredNode;
7734 		mount->root_vnode->SetCovering(true);
7735 
7736 		coveredNode->covered_by = mount->root_vnode;
7737 		coveredNode->SetCovered(true);
7738 	}
7739 	rw_lock_write_unlock(&sVnodeLock);
7740 
7741 	if (!sRoot) {
7742 		sRoot = mount->root_vnode;
7743 		mutex_lock(&sIOContextRootLock);
7744 		get_current_io_context(true)->root = sRoot;
7745 		mutex_unlock(&sIOContextRootLock);
7746 		inc_vnode_ref_count(sRoot);
7747 	}
7748 
7749 	// supply the partition (if any) with the mount cookie and mark it mounted
7750 	if (partition) {
7751 		partition->SetMountCookie(mount->volume->private_volume);
7752 		partition->SetVolumeID(mount->id);
7753 
7754 		// keep a partition reference as long as the partition is mounted
7755 		partitionRegistrar.Detach();
7756 		mount->partition = partition;
7757 		mount->owns_file_device = newlyCreatedFileDevice;
7758 		fileDeviceDeleter.id = -1;
7759 	}
7760 
7761 	notify_mount(mount->id,
7762 		coveredNode != NULL ? coveredNode->device : -1,
7763 		coveredNode ? coveredNode->id : -1);
7764 
7765 	return mount->id;
7766 
7767 err4:
7768 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7769 err3:
7770 	if (coveredNode != NULL)
7771 		put_vnode(coveredNode);
7772 err2:
7773 	rw_lock_write_lock(&sMountLock);
7774 	sMountsTable->Remove(mount);
7775 	rw_lock_write_unlock(&sMountLock);
7776 err1:
7777 	delete mount;
7778 
7779 	return status;
7780 }
7781 
7782 
7783 static status_t
7784 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7785 {
7786 	struct fs_mount* mount;
7787 	status_t err;
7788 
7789 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7790 		mountID, kernel));
7791 
7792 	struct vnode* pathVnode = NULL;
7793 	if (path != NULL) {
7794 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7795 		if (err != B_OK)
7796 			return B_ENTRY_NOT_FOUND;
7797 	}
7798 
7799 	RecursiveLocker mountOpLocker(sMountOpLock);
7800 	ReadLocker mountLocker(sMountLock);
7801 
7802 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7803 	if (mount == NULL) {
7804 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7805 			pathVnode);
7806 	}
7807 
7808 	mountLocker.Unlock();
7809 
7810 	if (path != NULL) {
7811 		put_vnode(pathVnode);
7812 
7813 		if (mount->root_vnode != pathVnode) {
7814 			// not mountpoint
7815 			return B_BAD_VALUE;
7816 		}
7817 	}
7818 
7819 	// if the volume is associated with a partition, lock the device of the
7820 	// partition as long as we are unmounting
7821 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7822 	KPartition* partition = mount->partition;
7823 	KDiskDevice* diskDevice = NULL;
7824 	if (partition != NULL) {
7825 		if (partition->Device() == NULL) {
7826 			dprintf("fs_unmount(): There is no device!\n");
7827 			return B_ERROR;
7828 		}
7829 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7830 		if (!diskDevice) {
7831 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7832 			return B_ERROR;
7833 		}
7834 	}
7835 	DeviceWriteLocker writeLocker(diskDevice, true);
7836 
7837 	// make sure, that the partition is not busy
7838 	if (partition != NULL) {
7839 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7840 			TRACE(("fs_unmount(): Partition is busy.\n"));
7841 			return B_BUSY;
7842 		}
7843 	}
7844 
7845 	// grab the vnode master mutex to keep someone from creating
7846 	// a vnode while we're figuring out if we can continue
7847 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7848 
7849 	bool disconnectedDescriptors = false;
7850 
7851 	while (true) {
7852 		bool busy = false;
7853 
7854 		// cycle through the list of vnodes associated with this mount and
7855 		// make sure all of them are not busy or have refs on them
7856 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7857 		while (struct vnode* vnode = iterator.Next()) {
7858 			if (vnode->IsBusy()) {
7859 				busy = true;
7860 				break;
7861 			}
7862 
7863 			// check the vnode's ref count -- subtract additional references for
7864 			// covering
7865 			int32 refCount = vnode->ref_count;
7866 			if (vnode->covers != NULL)
7867 				refCount--;
7868 			if (vnode->covered_by != NULL)
7869 				refCount--;
7870 
7871 			if (refCount != 0) {
7872 				// there are still vnodes in use on this mount, so we cannot
7873 				// unmount yet
7874 				busy = true;
7875 				break;
7876 			}
7877 		}
7878 
7879 		if (!busy)
7880 			break;
7881 
7882 		if ((flags & B_FORCE_UNMOUNT) == 0)
7883 			return B_BUSY;
7884 
7885 		if (disconnectedDescriptors) {
7886 			// wait a bit until the last access is finished, and then try again
7887 			vnodesWriteLocker.Unlock();
7888 			snooze(100000);
7889 			// TODO: if there is some kind of bug that prevents the ref counts
7890 			// from getting back to zero, this will fall into an endless loop...
7891 			vnodesWriteLocker.Lock();
7892 			continue;
7893 		}
7894 
7895 		// the file system is still busy - but we're forced to unmount it,
7896 		// so let's disconnect all open file descriptors
7897 
7898 		mount->unmounting = true;
7899 			// prevent new vnodes from being created
7900 
7901 		vnodesWriteLocker.Unlock();
7902 
7903 		disconnect_mount_or_vnode_fds(mount, NULL);
7904 		disconnectedDescriptors = true;
7905 
7906 		vnodesWriteLocker.Lock();
7907 	}
7908 
7909 	// We can safely continue. Mark all of the vnodes busy and this mount
7910 	// structure in unmounting state. Also undo the vnode covers/covered_by
7911 	// links.
7912 	mount->unmounting = true;
7913 
7914 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7915 	while (struct vnode* vnode = iterator.Next()) {
7916 		// Remove all covers/covered_by links from other mounts' nodes to this
7917 		// vnode and adjust the node ref count accordingly. We will release the
7918 		// references to the external vnodes below.
7919 		if (Vnode* coveredNode = vnode->covers) {
7920 			if (Vnode* coveringNode = vnode->covered_by) {
7921 				// We have both covered and covering vnodes, so just remove us
7922 				// from the chain.
7923 				coveredNode->covered_by = coveringNode;
7924 				coveringNode->covers = coveredNode;
7925 				vnode->ref_count -= 2;
7926 
7927 				vnode->covered_by = NULL;
7928 				vnode->covers = NULL;
7929 				vnode->SetCovering(false);
7930 				vnode->SetCovered(false);
7931 			} else {
7932 				// We only have a covered vnode. Remove its link to us.
7933 				coveredNode->covered_by = NULL;
7934 				coveredNode->SetCovered(false);
7935 				vnode->ref_count--;
7936 
7937 				// If the other node is an external vnode, we keep its link
7938 				// link around so we can put the reference later on. Otherwise
7939 				// we get rid of it right now.
7940 				if (coveredNode->mount == mount) {
7941 					vnode->covers = NULL;
7942 					coveredNode->ref_count--;
7943 				}
7944 			}
7945 		} else if (Vnode* coveringNode = vnode->covered_by) {
7946 			// We only have a covering vnode. Remove its link to us.
7947 			coveringNode->covers = NULL;
7948 			coveringNode->SetCovering(false);
7949 			vnode->ref_count--;
7950 
7951 			// If the other node is an external vnode, we keep its link
7952 			// link around so we can put the reference later on. Otherwise
7953 			// we get rid of it right now.
7954 			if (coveringNode->mount == mount) {
7955 				vnode->covered_by = NULL;
7956 				coveringNode->ref_count--;
7957 			}
7958 		}
7959 
7960 		vnode->SetBusy(true);
7961 		vnode_to_be_freed(vnode);
7962 	}
7963 
7964 	vnodesWriteLocker.Unlock();
7965 
7966 	// Free all vnodes associated with this mount.
7967 	// They will be removed from the mount list by free_vnode(), so
7968 	// we don't have to do this.
7969 	while (struct vnode* vnode = mount->vnodes.Head()) {
7970 		// Put the references to external covered/covering vnodes we kept above.
7971 		if (Vnode* coveredNode = vnode->covers)
7972 			put_vnode(coveredNode);
7973 		if (Vnode* coveringNode = vnode->covered_by)
7974 			put_vnode(coveringNode);
7975 
7976 		free_vnode(vnode, false);
7977 	}
7978 
7979 	// remove the mount structure from the hash table
7980 	rw_lock_write_lock(&sMountLock);
7981 	sMountsTable->Remove(mount);
7982 	rw_lock_write_unlock(&sMountLock);
7983 
7984 	mountOpLocker.Unlock();
7985 
7986 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7987 	notify_unmount(mount->id);
7988 
7989 	// dereference the partition and mark it unmounted
7990 	if (partition) {
7991 		partition->SetVolumeID(-1);
7992 		partition->SetMountCookie(NULL);
7993 
7994 		if (mount->owns_file_device)
7995 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7996 		partition->Unregister();
7997 	}
7998 
7999 	delete mount;
8000 	return B_OK;
8001 }
8002 
8003 
8004 static status_t
8005 fs_sync(dev_t device)
8006 {
8007 	struct fs_mount* mount;
8008 	status_t status = get_mount(device, &mount);
8009 	if (status != B_OK)
8010 		return status;
8011 
8012 	struct vnode marker;
8013 	memset(&marker, 0, sizeof(marker));
8014 	marker.SetBusy(true);
8015 	marker.SetRemoved(true);
8016 
8017 	// First, synchronize all file caches
8018 
8019 	while (true) {
8020 		WriteLocker locker(sVnodeLock);
8021 			// Note: That's the easy way. Which is probably OK for sync(),
8022 			// since it's a relatively rare call and doesn't need to allow for
8023 			// a lot of concurrency. Using a read lock would be possible, but
8024 			// also more involved, since we had to lock the individual nodes
8025 			// and take care of the locking order, which we might not want to
8026 			// do while holding fs_mount::lock.
8027 
8028 		// synchronize access to vnode list
8029 		mutex_lock(&mount->lock);
8030 
8031 		struct vnode* vnode;
8032 		if (!marker.IsRemoved()) {
8033 			vnode = mount->vnodes.GetNext(&marker);
8034 			mount->vnodes.Remove(&marker);
8035 			marker.SetRemoved(true);
8036 		} else
8037 			vnode = mount->vnodes.First();
8038 
8039 		while (vnode != NULL && (vnode->cache == NULL
8040 			|| vnode->IsRemoved() || vnode->IsBusy())) {
8041 			// TODO: we could track writes (and writable mapped vnodes)
8042 			//	and have a simple flag that we could test for here
8043 			vnode = mount->vnodes.GetNext(vnode);
8044 		}
8045 
8046 		if (vnode != NULL) {
8047 			// insert marker vnode again
8048 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
8049 			marker.SetRemoved(false);
8050 		}
8051 
8052 		mutex_unlock(&mount->lock);
8053 
8054 		if (vnode == NULL)
8055 			break;
8056 
8057 		vnode = lookup_vnode(mount->id, vnode->id);
8058 		if (vnode == NULL || vnode->IsBusy())
8059 			continue;
8060 
8061 		if (vnode->ref_count == 0) {
8062 			// this vnode has been unused before
8063 			vnode_used(vnode);
8064 		}
8065 		inc_vnode_ref_count(vnode);
8066 
8067 		locker.Unlock();
8068 
8069 		if (vnode->cache != NULL && !vnode->IsRemoved())
8070 			vnode->cache->WriteModified();
8071 
8072 		put_vnode(vnode);
8073 	}
8074 
8075 	// Let the file systems do their synchronizing work
8076 	if (HAS_FS_MOUNT_CALL(mount, sync))
8077 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8078 
8079 	// Finally, flush the underlying device's write cache (if possible.)
8080 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8081 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8082 
8083 	put_mount(mount);
8084 	return status;
8085 }
8086 
8087 
8088 static status_t
8089 fs_read_info(dev_t device, struct fs_info* info)
8090 {
8091 	struct fs_mount* mount;
8092 	status_t status = get_mount(device, &mount);
8093 	if (status != B_OK)
8094 		return status;
8095 
8096 	memset(info, 0, sizeof(struct fs_info));
8097 
8098 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8099 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8100 
8101 	// fill in info the file system doesn't (have to) know about
8102 	if (status == B_OK) {
8103 		info->dev = mount->id;
8104 		info->root = mount->root_vnode->id;
8105 
8106 		fs_volume* volume = mount->volume;
8107 		while (volume->super_volume != NULL)
8108 			volume = volume->super_volume;
8109 
8110 		strlcpy(info->fsh_name, volume->file_system_name,
8111 			sizeof(info->fsh_name));
8112 		if (mount->device_name != NULL) {
8113 			strlcpy(info->device_name, mount->device_name,
8114 				sizeof(info->device_name));
8115 		}
8116 	}
8117 
8118 	// if the call is not supported by the file system, there are still
8119 	// the parts that we filled out ourselves
8120 
8121 	put_mount(mount);
8122 	return status;
8123 }
8124 
8125 
8126 static status_t
8127 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8128 {
8129 	struct fs_mount* mount;
8130 	status_t status = get_mount(device, &mount);
8131 	if (status != B_OK)
8132 		return status;
8133 
8134 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8135 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8136 	else
8137 		status = B_READ_ONLY_DEVICE;
8138 
8139 	put_mount(mount);
8140 	return status;
8141 }
8142 
8143 
8144 static dev_t
8145 fs_next_device(int32* _cookie)
8146 {
8147 	struct fs_mount* mount = NULL;
8148 	dev_t device = *_cookie;
8149 
8150 	rw_lock_read_lock(&sMountLock);
8151 
8152 	// Since device IDs are assigned sequentially, this algorithm
8153 	// does work good enough. It makes sure that the device list
8154 	// returned is sorted, and that no device is skipped when an
8155 	// already visited device got unmounted.
8156 
8157 	while (device < sNextMountID) {
8158 		mount = find_mount(device++);
8159 		if (mount != NULL && mount->volume->private_volume != NULL)
8160 			break;
8161 	}
8162 
8163 	*_cookie = device;
8164 
8165 	if (mount != NULL)
8166 		device = mount->id;
8167 	else
8168 		device = B_BAD_VALUE;
8169 
8170 	rw_lock_read_unlock(&sMountLock);
8171 
8172 	return device;
8173 }
8174 
8175 
8176 ssize_t
8177 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8178 	void *buffer, size_t readBytes)
8179 {
8180 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8181 	if (attrFD < 0)
8182 		return attrFD;
8183 
8184 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8185 
8186 	_kern_close(attrFD);
8187 
8188 	return bytesRead;
8189 }
8190 
8191 
8192 static status_t
8193 get_cwd(char* buffer, size_t size, bool kernel)
8194 {
8195 	// Get current working directory from io context
8196 	struct io_context* context = get_current_io_context(kernel);
8197 	status_t status;
8198 
8199 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8200 
8201 	mutex_lock(&context->io_mutex);
8202 
8203 	struct vnode* vnode = context->cwd;
8204 	if (vnode)
8205 		inc_vnode_ref_count(vnode);
8206 
8207 	mutex_unlock(&context->io_mutex);
8208 
8209 	if (vnode) {
8210 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8211 		put_vnode(vnode);
8212 	} else
8213 		status = B_ERROR;
8214 
8215 	return status;
8216 }
8217 
8218 
8219 static status_t
8220 set_cwd(int fd, char* path, bool kernel)
8221 {
8222 	struct io_context* context;
8223 	struct vnode* vnode = NULL;
8224 	struct vnode* oldDirectory;
8225 	status_t status;
8226 
8227 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8228 
8229 	// Get vnode for passed path, and bail if it failed
8230 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8231 	if (status < 0)
8232 		return status;
8233 
8234 	if (!S_ISDIR(vnode->Type())) {
8235 		// nope, can't cwd to here
8236 		status = B_NOT_A_DIRECTORY;
8237 		goto err;
8238 	}
8239 
8240 	// We need to have the permission to enter the directory, too
8241 	if (HAS_FS_CALL(vnode, access)) {
8242 		status = FS_CALL(vnode, access, X_OK);
8243 		if (status != B_OK)
8244 			goto err;
8245 	}
8246 
8247 	// Get current io context and lock
8248 	context = get_current_io_context(kernel);
8249 	mutex_lock(&context->io_mutex);
8250 
8251 	// save the old current working directory first
8252 	oldDirectory = context->cwd;
8253 	context->cwd = vnode;
8254 
8255 	mutex_unlock(&context->io_mutex);
8256 
8257 	if (oldDirectory)
8258 		put_vnode(oldDirectory);
8259 
8260 	return B_NO_ERROR;
8261 
8262 err:
8263 	put_vnode(vnode);
8264 	return status;
8265 }
8266 
8267 
8268 static status_t
8269 user_copy_name(char* to, const char* from, size_t length)
8270 {
8271 	ssize_t len = user_strlcpy(to, from, length);
8272 	if (len < 0)
8273 		return len;
8274 	if (len >= (ssize_t)length)
8275 		return B_NAME_TOO_LONG;
8276 	return B_OK;
8277 }
8278 
8279 
8280 //	#pragma mark - kernel mirrored syscalls
8281 
8282 
8283 dev_t
8284 _kern_mount(const char* path, const char* device, const char* fsName,
8285 	uint32 flags, const char* args, size_t argsLength)
8286 {
8287 	KPath pathBuffer(path);
8288 	if (pathBuffer.InitCheck() != B_OK)
8289 		return B_NO_MEMORY;
8290 
8291 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8292 }
8293 
8294 
8295 status_t
8296 _kern_unmount(const char* path, uint32 flags)
8297 {
8298 	KPath pathBuffer(path);
8299 	if (pathBuffer.InitCheck() != B_OK)
8300 		return B_NO_MEMORY;
8301 
8302 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8303 }
8304 
8305 
8306 status_t
8307 _kern_read_fs_info(dev_t device, struct fs_info* info)
8308 {
8309 	if (info == NULL)
8310 		return B_BAD_VALUE;
8311 
8312 	return fs_read_info(device, info);
8313 }
8314 
8315 
8316 status_t
8317 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8318 {
8319 	if (info == NULL)
8320 		return B_BAD_VALUE;
8321 
8322 	return fs_write_info(device, info, mask);
8323 }
8324 
8325 
8326 status_t
8327 _kern_sync(void)
8328 {
8329 	// Note: _kern_sync() is also called from _user_sync()
8330 	int32 cookie = 0;
8331 	dev_t device;
8332 	while ((device = next_dev(&cookie)) >= 0) {
8333 		status_t status = fs_sync(device);
8334 		if (status != B_OK && status != B_BAD_VALUE) {
8335 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8336 				strerror(status));
8337 		}
8338 	}
8339 
8340 	return B_OK;
8341 }
8342 
8343 
8344 dev_t
8345 _kern_next_device(int32* _cookie)
8346 {
8347 	return fs_next_device(_cookie);
8348 }
8349 
8350 
8351 status_t
8352 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8353 	size_t infoSize)
8354 {
8355 	if (infoSize != sizeof(fd_info))
8356 		return B_BAD_VALUE;
8357 
8358 	// get the team
8359 	Team* team = Team::Get(teamID);
8360 	if (team == NULL)
8361 		return B_BAD_TEAM_ID;
8362 	BReference<Team> teamReference(team, true);
8363 
8364 	// now that we have a team reference, its I/O context won't go away
8365 	io_context* context = team->io_context;
8366 	MutexLocker contextLocker(context->io_mutex);
8367 
8368 	uint32 slot = *_cookie;
8369 
8370 	struct file_descriptor* descriptor;
8371 	while (slot < context->table_size
8372 		&& (descriptor = context->fds[slot]) == NULL) {
8373 		slot++;
8374 	}
8375 
8376 	if (slot >= context->table_size)
8377 		return B_ENTRY_NOT_FOUND;
8378 
8379 	info->number = slot;
8380 	info->open_mode = descriptor->open_mode;
8381 
8382 	struct vnode* vnode = fd_vnode(descriptor);
8383 	if (vnode != NULL) {
8384 		info->device = vnode->device;
8385 		info->node = vnode->id;
8386 	} else if (descriptor->u.mount != NULL) {
8387 		info->device = descriptor->u.mount->id;
8388 		info->node = -1;
8389 	}
8390 
8391 	*_cookie = slot + 1;
8392 	return B_OK;
8393 }
8394 
8395 
8396 int
8397 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8398 	int perms)
8399 {
8400 	if ((openMode & O_CREAT) != 0) {
8401 		return file_create_entry_ref(device, inode, name, openMode, perms,
8402 			true);
8403 	}
8404 
8405 	return file_open_entry_ref(device, inode, name, openMode, true);
8406 }
8407 
8408 
8409 /*!	\brief Opens a node specified by a FD + path pair.
8410 
8411 	At least one of \a fd and \a path must be specified.
8412 	If only \a fd is given, the function opens the node identified by this
8413 	FD. If only a path is given, this path is opened. If both are given and
8414 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8415 	of the directory (!) identified by \a fd.
8416 
8417 	\param fd The FD. May be < 0.
8418 	\param path The absolute or relative path. May be \c NULL.
8419 	\param openMode The open mode.
8420 	\return A FD referring to the newly opened node, or an error code,
8421 			if an error occurs.
8422 */
8423 int
8424 _kern_open(int fd, const char* path, int openMode, int perms)
8425 {
8426 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8427 	if (pathBuffer.InitCheck() != B_OK)
8428 		return B_NO_MEMORY;
8429 
8430 	if ((openMode & O_CREAT) != 0)
8431 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8432 
8433 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8434 }
8435 
8436 
8437 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8438 
8439 	The supplied name may be \c NULL, in which case directory identified
8440 	by \a device and \a inode will be opened. Otherwise \a device and
8441 	\a inode identify the parent directory of the directory to be opened
8442 	and \a name its entry name.
8443 
8444 	\param device If \a name is specified the ID of the device the parent
8445 		   directory of the directory to be opened resides on, otherwise
8446 		   the device of the directory itself.
8447 	\param inode If \a name is specified the node ID of the parent
8448 		   directory of the directory to be opened, otherwise node ID of the
8449 		   directory itself.
8450 	\param name The entry name of the directory to be opened. If \c NULL,
8451 		   the \a device + \a inode pair identify the node to be opened.
8452 	\return The FD of the newly opened directory or an error code, if
8453 			something went wrong.
8454 */
8455 int
8456 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8457 {
8458 	return dir_open_entry_ref(device, inode, name, true);
8459 }
8460 
8461 
8462 /*!	\brief Opens a directory specified by a FD + path pair.
8463 
8464 	At least one of \a fd and \a path must be specified.
8465 	If only \a fd is given, the function opens the directory identified by this
8466 	FD. If only a path is given, this path is opened. If both are given and
8467 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8468 	of the directory (!) identified by \a fd.
8469 
8470 	\param fd The FD. May be < 0.
8471 	\param path The absolute or relative path. May be \c NULL.
8472 	\return A FD referring to the newly opened directory, or an error code,
8473 			if an error occurs.
8474 */
8475 int
8476 _kern_open_dir(int fd, const char* path)
8477 {
8478 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8479 	if (pathBuffer.InitCheck() != B_OK)
8480 		return B_NO_MEMORY;
8481 
8482 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8483 }
8484 
8485 
8486 status_t
8487 _kern_fcntl(int fd, int op, size_t argument)
8488 {
8489 	return common_fcntl(fd, op, argument, true);
8490 }
8491 
8492 
8493 status_t
8494 _kern_fsync(int fd)
8495 {
8496 	return common_sync(fd, true);
8497 }
8498 
8499 
8500 status_t
8501 _kern_lock_node(int fd)
8502 {
8503 	return common_lock_node(fd, true);
8504 }
8505 
8506 
8507 status_t
8508 _kern_unlock_node(int fd)
8509 {
8510 	return common_unlock_node(fd, true);
8511 }
8512 
8513 
8514 status_t
8515 _kern_preallocate(int fd, off_t offset, off_t length)
8516 {
8517 	return common_preallocate(fd, offset, length, true);
8518 }
8519 
8520 
8521 status_t
8522 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8523 	int perms)
8524 {
8525 	return dir_create_entry_ref(device, inode, name, perms, true);
8526 }
8527 
8528 
8529 /*!	\brief Creates a directory specified by a FD + path pair.
8530 
8531 	\a path must always be specified (it contains the name of the new directory
8532 	at least). If only a path is given, this path identifies the location at
8533 	which the directory shall be created. If both \a fd and \a path are given
8534 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8535 	of the directory (!) identified by \a fd.
8536 
8537 	\param fd The FD. May be < 0.
8538 	\param path The absolute or relative path. Must not be \c NULL.
8539 	\param perms The access permissions the new directory shall have.
8540 	\return \c B_OK, if the directory has been created successfully, another
8541 			error code otherwise.
8542 */
8543 status_t
8544 _kern_create_dir(int fd, const char* path, int perms)
8545 {
8546 	KPath pathBuffer(path, KPath::DEFAULT);
8547 	if (pathBuffer.InitCheck() != B_OK)
8548 		return B_NO_MEMORY;
8549 
8550 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8551 }
8552 
8553 
8554 status_t
8555 _kern_remove_dir(int fd, const char* path)
8556 {
8557 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8558 	if (pathBuffer.InitCheck() != B_OK)
8559 		return B_NO_MEMORY;
8560 
8561 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8562 }
8563 
8564 
8565 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8566 
8567 	At least one of \a fd and \a path must be specified.
8568 	If only \a fd is given, the function the symlink to be read is the node
8569 	identified by this FD. If only a path is given, this path identifies the
8570 	symlink to be read. If both are given and the path is absolute, \a fd is
8571 	ignored; a relative path is reckoned off of the directory (!) identified
8572 	by \a fd.
8573 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8574 	will still be updated to reflect the required buffer size.
8575 
8576 	\param fd The FD. May be < 0.
8577 	\param path The absolute or relative path. May be \c NULL.
8578 	\param buffer The buffer into which the contents of the symlink shall be
8579 		   written.
8580 	\param _bufferSize A pointer to the size of the supplied buffer.
8581 	\return The length of the link on success or an appropriate error code
8582 */
8583 status_t
8584 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8585 {
8586 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8587 	if (pathBuffer.InitCheck() != B_OK)
8588 		return B_NO_MEMORY;
8589 
8590 	return common_read_link(fd, pathBuffer.LockBuffer(),
8591 		buffer, _bufferSize, true);
8592 }
8593 
8594 
8595 /*!	\brief Creates a symlink specified by a FD + path pair.
8596 
8597 	\a path must always be specified (it contains the name of the new symlink
8598 	at least). If only a path is given, this path identifies the location at
8599 	which the symlink shall be created. If both \a fd and \a path are given and
8600 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8601 	of the directory (!) identified by \a fd.
8602 
8603 	\param fd The FD. May be < 0.
8604 	\param toPath The absolute or relative path. Must not be \c NULL.
8605 	\param mode The access permissions the new symlink shall have.
8606 	\return \c B_OK, if the symlink has been created successfully, another
8607 			error code otherwise.
8608 */
8609 status_t
8610 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8611 {
8612 	KPath pathBuffer(path);
8613 	if (pathBuffer.InitCheck() != B_OK)
8614 		return B_NO_MEMORY;
8615 
8616 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8617 		toPath, mode, true);
8618 }
8619 
8620 
8621 status_t
8622 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8623 	bool traverseLeafLink)
8624 {
8625 	KPath pathBuffer(path);
8626 	KPath toPathBuffer(toPath);
8627 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8628 		return B_NO_MEMORY;
8629 
8630 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8631 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8632 }
8633 
8634 
8635 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8636 
8637 	\a path must always be specified (it contains at least the name of the entry
8638 	to be deleted). If only a path is given, this path identifies the entry
8639 	directly. If both \a fd and \a path are given and the path is absolute,
8640 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8641 	identified by \a fd.
8642 
8643 	\param fd The FD. May be < 0.
8644 	\param path The absolute or relative path. Must not be \c NULL.
8645 	\return \c B_OK, if the entry has been removed successfully, another
8646 			error code otherwise.
8647 */
8648 status_t
8649 _kern_unlink(int fd, const char* path)
8650 {
8651 	KPath pathBuffer(path);
8652 	if (pathBuffer.InitCheck() != B_OK)
8653 		return B_NO_MEMORY;
8654 
8655 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8656 }
8657 
8658 
8659 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8660 		   by another FD + path pair.
8661 
8662 	\a oldPath and \a newPath must always be specified (they contain at least
8663 	the name of the entry). If only a path is given, this path identifies the
8664 	entry directly. If both a FD and a path are given and the path is absolute,
8665 	the FD is ignored; a relative path is reckoned off of the directory (!)
8666 	identified by the respective FD.
8667 
8668 	\param oldFD The FD of the old location. May be < 0.
8669 	\param oldPath The absolute or relative path of the old location. Must not
8670 		   be \c NULL.
8671 	\param newFD The FD of the new location. May be < 0.
8672 	\param newPath The absolute or relative path of the new location. Must not
8673 		   be \c NULL.
8674 	\return \c B_OK, if the entry has been moved successfully, another
8675 			error code otherwise.
8676 */
8677 status_t
8678 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8679 {
8680 	KPath oldPathBuffer(oldPath);
8681 	KPath newPathBuffer(newPath);
8682 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8683 		return B_NO_MEMORY;
8684 
8685 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8686 		newFD, newPathBuffer.LockBuffer(), true);
8687 }
8688 
8689 
8690 status_t
8691 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8692 {
8693 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8694 	if (pathBuffer.InitCheck() != B_OK)
8695 		return B_NO_MEMORY;
8696 
8697 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8698 		true);
8699 }
8700 
8701 
8702 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8703 
8704 	If only \a fd is given, the stat operation associated with the type
8705 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8706 	given, this path identifies the entry for whose node to retrieve the
8707 	stat data. If both \a fd and \a path are given and the path is absolute,
8708 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8709 	identified by \a fd and specifies the entry whose stat data shall be
8710 	retrieved.
8711 
8712 	\param fd The FD. May be < 0.
8713 	\param path The absolute or relative path. Must not be \c NULL.
8714 	\param traverseLeafLink If \a path is given, \c true specifies that the
8715 		   function shall not stick to symlinks, but traverse them.
8716 	\param stat The buffer the stat data shall be written into.
8717 	\param statSize The size of the supplied stat buffer.
8718 	\return \c B_OK, if the the stat data have been read successfully, another
8719 			error code otherwise.
8720 */
8721 status_t
8722 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8723 	struct stat* stat, size_t statSize)
8724 {
8725 	struct stat completeStat;
8726 	struct stat* originalStat = NULL;
8727 	status_t status;
8728 
8729 	if (statSize > sizeof(struct stat))
8730 		return B_BAD_VALUE;
8731 
8732 	// this supports different stat extensions
8733 	if (statSize < sizeof(struct stat)) {
8734 		originalStat = stat;
8735 		stat = &completeStat;
8736 	}
8737 
8738 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8739 
8740 	if (status == B_OK && originalStat != NULL)
8741 		memcpy(originalStat, stat, statSize);
8742 
8743 	return status;
8744 }
8745 
8746 
8747 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8748 
8749 	If only \a fd is given, the stat operation associated with the type
8750 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8751 	given, this path identifies the entry for whose node to write the
8752 	stat data. If both \a fd and \a path are given and the path is absolute,
8753 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8754 	identified by \a fd and specifies the entry whose stat data shall be
8755 	written.
8756 
8757 	\param fd The FD. May be < 0.
8758 	\param path The absolute or relative path. May be \c NULL.
8759 	\param traverseLeafLink If \a path is given, \c true specifies that the
8760 		   function shall not stick to symlinks, but traverse them.
8761 	\param stat The buffer containing the stat data to be written.
8762 	\param statSize The size of the supplied stat buffer.
8763 	\param statMask A mask specifying which parts of the stat data shall be
8764 		   written.
8765 	\return \c B_OK, if the the stat data have been written successfully,
8766 			another error code otherwise.
8767 */
8768 status_t
8769 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8770 	const struct stat* stat, size_t statSize, int statMask)
8771 {
8772 	struct stat completeStat;
8773 
8774 	if (statSize > sizeof(struct stat))
8775 		return B_BAD_VALUE;
8776 
8777 	// this supports different stat extensions
8778 	if (statSize < sizeof(struct stat)) {
8779 		memset((uint8*)&completeStat + statSize, 0,
8780 			sizeof(struct stat) - statSize);
8781 		memcpy(&completeStat, stat, statSize);
8782 		stat = &completeStat;
8783 	}
8784 
8785 	status_t status;
8786 
8787 	if (path != NULL) {
8788 		// path given: write the stat of the node referred to by (fd, path)
8789 		KPath pathBuffer(path);
8790 		if (pathBuffer.InitCheck() != B_OK)
8791 			return B_NO_MEMORY;
8792 
8793 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8794 			traverseLeafLink, stat, statMask, true);
8795 	} else {
8796 		// no path given: get the FD and use the FD operation
8797 		struct file_descriptor* descriptor
8798 			= get_fd(get_current_io_context(true), fd);
8799 		if (descriptor == NULL)
8800 			return B_FILE_ERROR;
8801 
8802 		if (descriptor->ops->fd_write_stat)
8803 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8804 		else
8805 			status = B_UNSUPPORTED;
8806 
8807 		put_fd(descriptor);
8808 	}
8809 
8810 	return status;
8811 }
8812 
8813 
8814 int
8815 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8816 {
8817 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8818 	if (pathBuffer.InitCheck() != B_OK)
8819 		return B_NO_MEMORY;
8820 
8821 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8822 }
8823 
8824 
8825 int
8826 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8827 	int openMode)
8828 {
8829 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8830 	if (pathBuffer.InitCheck() != B_OK)
8831 		return B_NO_MEMORY;
8832 
8833 	if ((openMode & O_CREAT) != 0) {
8834 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8835 			true);
8836 	}
8837 
8838 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8839 }
8840 
8841 
8842 status_t
8843 _kern_remove_attr(int fd, const char* name)
8844 {
8845 	return attr_remove(fd, name, true);
8846 }
8847 
8848 
8849 status_t
8850 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8851 	const char* toName)
8852 {
8853 	return attr_rename(fromFile, fromName, toFile, toName, true);
8854 }
8855 
8856 
8857 int
8858 _kern_open_index_dir(dev_t device)
8859 {
8860 	return index_dir_open(device, true);
8861 }
8862 
8863 
8864 status_t
8865 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8866 {
8867 	return index_create(device, name, type, flags, true);
8868 }
8869 
8870 
8871 status_t
8872 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8873 {
8874 	return index_name_read_stat(device, name, stat, true);
8875 }
8876 
8877 
8878 status_t
8879 _kern_remove_index(dev_t device, const char* name)
8880 {
8881 	return index_remove(device, name, true);
8882 }
8883 
8884 
8885 status_t
8886 _kern_getcwd(char* buffer, size_t size)
8887 {
8888 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8889 
8890 	// Call vfs to get current working directory
8891 	return get_cwd(buffer, size, true);
8892 }
8893 
8894 
8895 status_t
8896 _kern_setcwd(int fd, const char* path)
8897 {
8898 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8899 	if (pathBuffer.InitCheck() != B_OK)
8900 		return B_NO_MEMORY;
8901 
8902 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8903 }
8904 
8905 
8906 //	#pragma mark - userland syscalls
8907 
8908 
8909 dev_t
8910 _user_mount(const char* userPath, const char* userDevice,
8911 	const char* userFileSystem, uint32 flags, const char* userArgs,
8912 	size_t argsLength)
8913 {
8914 	char fileSystem[B_FILE_NAME_LENGTH];
8915 	KPath path, device;
8916 	char* args = NULL;
8917 	status_t status;
8918 
8919 	if (!IS_USER_ADDRESS(userPath))
8920 		return B_BAD_ADDRESS;
8921 
8922 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8923 		return B_NO_MEMORY;
8924 
8925 	status = user_copy_name(path.LockBuffer(), userPath,
8926 		B_PATH_NAME_LENGTH);
8927 	if (status != B_OK)
8928 		return status;
8929 	path.UnlockBuffer();
8930 
8931 	if (userFileSystem != NULL) {
8932 		if (!IS_USER_ADDRESS(userFileSystem))
8933 			return B_BAD_ADDRESS;
8934 
8935 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8936 		if (status != B_OK)
8937 			return status;
8938 	}
8939 
8940 	if (userDevice != NULL) {
8941 		if (!IS_USER_ADDRESS(userDevice))
8942 			return B_BAD_ADDRESS;
8943 
8944 		status = user_copy_name(device.LockBuffer(), userDevice,
8945 			B_PATH_NAME_LENGTH);
8946 		if (status != B_OK)
8947 			return status;
8948 		device.UnlockBuffer();
8949 	}
8950 
8951 	if (userArgs != NULL && argsLength > 0) {
8952 		if (!IS_USER_ADDRESS(userArgs))
8953 			return B_BAD_ADDRESS;
8954 
8955 		// this is a safety restriction
8956 		if (argsLength >= 65536)
8957 			return B_NAME_TOO_LONG;
8958 
8959 		args = (char*)malloc(argsLength + 1);
8960 		if (args == NULL)
8961 			return B_NO_MEMORY;
8962 
8963 		status = user_copy_name(args, userArgs, argsLength + 1);
8964 		if (status != B_OK) {
8965 			free(args);
8966 			return status;
8967 		}
8968 	}
8969 
8970 	status = fs_mount(path.LockBuffer(),
8971 		userDevice != NULL ? device.Path() : NULL,
8972 		userFileSystem ? fileSystem : NULL, flags, args, false);
8973 
8974 	free(args);
8975 	return status;
8976 }
8977 
8978 
8979 status_t
8980 _user_unmount(const char* userPath, uint32 flags)
8981 {
8982 	if (!IS_USER_ADDRESS(userPath))
8983 		return B_BAD_ADDRESS;
8984 
8985 	KPath pathBuffer;
8986 	if (pathBuffer.InitCheck() != B_OK)
8987 		return B_NO_MEMORY;
8988 
8989 	char* path = pathBuffer.LockBuffer();
8990 
8991 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8992 	if (status != B_OK)
8993 		return status;
8994 
8995 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8996 }
8997 
8998 
8999 status_t
9000 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
9001 {
9002 	struct fs_info info;
9003 	status_t status;
9004 
9005 	if (userInfo == NULL)
9006 		return B_BAD_VALUE;
9007 
9008 	if (!IS_USER_ADDRESS(userInfo))
9009 		return B_BAD_ADDRESS;
9010 
9011 	status = fs_read_info(device, &info);
9012 	if (status != B_OK)
9013 		return status;
9014 
9015 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
9016 		return B_BAD_ADDRESS;
9017 
9018 	return B_OK;
9019 }
9020 
9021 
9022 status_t
9023 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
9024 {
9025 	struct fs_info info;
9026 
9027 	if (userInfo == NULL)
9028 		return B_BAD_VALUE;
9029 
9030 	if (!IS_USER_ADDRESS(userInfo)
9031 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
9032 		return B_BAD_ADDRESS;
9033 
9034 	return fs_write_info(device, &info, mask);
9035 }
9036 
9037 
9038 dev_t
9039 _user_next_device(int32* _userCookie)
9040 {
9041 	int32 cookie;
9042 	dev_t device;
9043 
9044 	if (!IS_USER_ADDRESS(_userCookie)
9045 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
9046 		return B_BAD_ADDRESS;
9047 
9048 	device = fs_next_device(&cookie);
9049 
9050 	if (device >= B_OK) {
9051 		// update user cookie
9052 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
9053 			return B_BAD_ADDRESS;
9054 	}
9055 
9056 	return device;
9057 }
9058 
9059 
9060 status_t
9061 _user_sync(void)
9062 {
9063 	return _kern_sync();
9064 }
9065 
9066 
9067 status_t
9068 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
9069 	size_t infoSize)
9070 {
9071 	struct fd_info info;
9072 	uint32 cookie;
9073 
9074 	// only root can do this
9075 	if (geteuid() != 0)
9076 		return B_NOT_ALLOWED;
9077 
9078 	if (infoSize != sizeof(fd_info))
9079 		return B_BAD_VALUE;
9080 
9081 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9082 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9083 		return B_BAD_ADDRESS;
9084 
9085 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9086 	if (status != B_OK)
9087 		return status;
9088 
9089 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9090 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9091 		return B_BAD_ADDRESS;
9092 
9093 	return status;
9094 }
9095 
9096 
9097 status_t
9098 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9099 	char* userPath, size_t pathLength)
9100 {
9101 	if (!IS_USER_ADDRESS(userPath))
9102 		return B_BAD_ADDRESS;
9103 
9104 	KPath path;
9105 	if (path.InitCheck() != B_OK)
9106 		return B_NO_MEMORY;
9107 
9108 	// copy the leaf name onto the stack
9109 	char stackLeaf[B_FILE_NAME_LENGTH];
9110 	if (leaf != NULL) {
9111 		if (!IS_USER_ADDRESS(leaf))
9112 			return B_BAD_ADDRESS;
9113 
9114 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9115 		if (status != B_OK)
9116 			return status;
9117 
9118 		leaf = stackLeaf;
9119 	}
9120 
9121 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9122 		false, path.LockBuffer(), path.BufferSize());
9123 	if (status != B_OK)
9124 		return status;
9125 
9126 	path.UnlockBuffer();
9127 
9128 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9129 	if (length < 0)
9130 		return length;
9131 	if (length >= (int)pathLength)
9132 		return B_BUFFER_OVERFLOW;
9133 
9134 	return B_OK;
9135 }
9136 
9137 
9138 status_t
9139 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9140 {
9141 	if (userPath == NULL || buffer == NULL)
9142 		return B_BAD_VALUE;
9143 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9144 		return B_BAD_ADDRESS;
9145 
9146 	// copy path from userland
9147 	KPath pathBuffer;
9148 	if (pathBuffer.InitCheck() != B_OK)
9149 		return B_NO_MEMORY;
9150 	char* path = pathBuffer.LockBuffer();
9151 
9152 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9153 	if (status != B_OK)
9154 		return status;
9155 
9156 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9157 		false);
9158 	if (error != B_OK)
9159 		return error;
9160 
9161 	// copy back to userland
9162 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9163 	if (len < 0)
9164 		return len;
9165 	if (len >= B_PATH_NAME_LENGTH)
9166 		return B_BUFFER_OVERFLOW;
9167 
9168 	return B_OK;
9169 }
9170 
9171 
9172 int
9173 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9174 	int openMode, int perms)
9175 {
9176 	char name[B_FILE_NAME_LENGTH];
9177 
9178 	if (userName == NULL || device < 0 || inode < 0)
9179 		return B_BAD_VALUE;
9180 	if (!IS_USER_ADDRESS(userName))
9181 		return B_BAD_ADDRESS;
9182 	status_t status = user_copy_name(name, userName, sizeof(name));
9183 	if (status != B_OK)
9184 		return status;
9185 
9186 	if ((openMode & O_CREAT) != 0) {
9187 		return file_create_entry_ref(device, inode, name, openMode, perms,
9188 			false);
9189 	}
9190 
9191 	return file_open_entry_ref(device, inode, name, openMode, false);
9192 }
9193 
9194 
9195 int
9196 _user_open(int fd, const char* userPath, int openMode, int perms)
9197 {
9198 	KPath path;
9199 	if (path.InitCheck() != B_OK)
9200 		return B_NO_MEMORY;
9201 
9202 	char* buffer = path.LockBuffer();
9203 
9204 	if (!IS_USER_ADDRESS(userPath))
9205 		return B_BAD_ADDRESS;
9206 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9207 	if (status != B_OK)
9208 		return status;
9209 
9210 	if ((openMode & O_CREAT) != 0)
9211 		return file_create(fd, buffer, openMode, perms, false);
9212 
9213 	return file_open(fd, buffer, openMode, false);
9214 }
9215 
9216 
9217 int
9218 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9219 {
9220 	if (userName != NULL) {
9221 		char name[B_FILE_NAME_LENGTH];
9222 
9223 		if (!IS_USER_ADDRESS(userName))
9224 			return B_BAD_ADDRESS;
9225 		status_t status = user_copy_name(name, userName, sizeof(name));
9226 		if (status != B_OK)
9227 			return status;
9228 
9229 		return dir_open_entry_ref(device, inode, name, false);
9230 	}
9231 	return dir_open_entry_ref(device, inode, NULL, false);
9232 }
9233 
9234 
9235 int
9236 _user_open_dir(int fd, const char* userPath)
9237 {
9238 	if (userPath == NULL)
9239 		return dir_open(fd, NULL, false);
9240 
9241 	KPath path;
9242 	if (path.InitCheck() != B_OK)
9243 		return B_NO_MEMORY;
9244 
9245 	char* buffer = path.LockBuffer();
9246 
9247 	if (!IS_USER_ADDRESS(userPath))
9248 		return B_BAD_ADDRESS;
9249 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9250 	if (status != B_OK)
9251 		return status;
9252 
9253 	return dir_open(fd, buffer, false);
9254 }
9255 
9256 
9257 /*!	\brief Opens a directory's parent directory and returns the entry name
9258 		   of the former.
9259 
9260 	Aside from that it returns the directory's entry name, this method is
9261 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9262 	equivalent, if \a userName is \c NULL.
9263 
9264 	If a name buffer is supplied and the name does not fit the buffer, the
9265 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9266 
9267 	\param fd A FD referring to a directory.
9268 	\param userName Buffer the directory's entry name shall be written into.
9269 		   May be \c NULL.
9270 	\param nameLength Size of the name buffer.
9271 	\return The file descriptor of the opened parent directory, if everything
9272 			went fine, an error code otherwise.
9273 */
9274 int
9275 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9276 {
9277 	bool kernel = false;
9278 
9279 	if (userName && !IS_USER_ADDRESS(userName))
9280 		return B_BAD_ADDRESS;
9281 
9282 	// open the parent dir
9283 	int parentFD = dir_open(fd, (char*)"..", kernel);
9284 	if (parentFD < 0)
9285 		return parentFD;
9286 	FDCloser fdCloser(parentFD, kernel);
9287 
9288 	if (userName) {
9289 		// get the vnodes
9290 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9291 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9292 		VNodePutter parentVNodePutter(parentVNode);
9293 		VNodePutter dirVNodePutter(dirVNode);
9294 		if (!parentVNode || !dirVNode)
9295 			return B_FILE_ERROR;
9296 
9297 		// get the vnode name
9298 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9299 		struct dirent* buffer = (struct dirent*)_buffer;
9300 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9301 			sizeof(_buffer), get_current_io_context(false));
9302 		if (status != B_OK)
9303 			return status;
9304 
9305 		// copy the name to the userland buffer
9306 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9307 		if (len < 0)
9308 			return len;
9309 		if (len >= (int)nameLength)
9310 			return B_BUFFER_OVERFLOW;
9311 	}
9312 
9313 	return fdCloser.Detach();
9314 }
9315 
9316 
9317 status_t
9318 _user_fcntl(int fd, int op, size_t argument)
9319 {
9320 	status_t status = common_fcntl(fd, op, argument, false);
9321 	if (op == F_SETLKW)
9322 		syscall_restart_handle_post(status);
9323 
9324 	return status;
9325 }
9326 
9327 
9328 status_t
9329 _user_fsync(int fd)
9330 {
9331 	return common_sync(fd, false);
9332 }
9333 
9334 
9335 status_t
9336 _user_flock(int fd, int operation)
9337 {
9338 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9339 
9340 	// Check if the operation is valid
9341 	switch (operation & ~LOCK_NB) {
9342 		case LOCK_UN:
9343 		case LOCK_SH:
9344 		case LOCK_EX:
9345 			break;
9346 
9347 		default:
9348 			return B_BAD_VALUE;
9349 	}
9350 
9351 	struct file_descriptor* descriptor;
9352 	struct vnode* vnode;
9353 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9354 	if (descriptor == NULL)
9355 		return B_FILE_ERROR;
9356 
9357 	if (descriptor->type != FDTYPE_FILE) {
9358 		put_fd(descriptor);
9359 		return B_BAD_VALUE;
9360 	}
9361 
9362 	struct flock flock;
9363 	flock.l_start = 0;
9364 	flock.l_len = OFF_MAX;
9365 	flock.l_whence = 0;
9366 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9367 
9368 	status_t status;
9369 	if ((operation & LOCK_UN) != 0) {
9370 		if (HAS_FS_CALL(vnode, release_lock))
9371 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9372 		else
9373 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9374 	} else {
9375 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9376 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9377 				(operation & LOCK_NB) == 0);
9378 		} else {
9379 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9380 				(operation & LOCK_NB) == 0);
9381 		}
9382 	}
9383 
9384 	syscall_restart_handle_post(status);
9385 
9386 	put_fd(descriptor);
9387 	return status;
9388 }
9389 
9390 
9391 status_t
9392 _user_lock_node(int fd)
9393 {
9394 	return common_lock_node(fd, false);
9395 }
9396 
9397 
9398 status_t
9399 _user_unlock_node(int fd)
9400 {
9401 	return common_unlock_node(fd, false);
9402 }
9403 
9404 
9405 status_t
9406 _user_preallocate(int fd, off_t offset, off_t length)
9407 {
9408 	return common_preallocate(fd, offset, length, false);
9409 }
9410 
9411 
9412 status_t
9413 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9414 	int perms)
9415 {
9416 	char name[B_FILE_NAME_LENGTH];
9417 	status_t status;
9418 
9419 	if (!IS_USER_ADDRESS(userName))
9420 		return B_BAD_ADDRESS;
9421 
9422 	status = user_copy_name(name, userName, sizeof(name));
9423 	if (status != B_OK)
9424 		return status;
9425 
9426 	return dir_create_entry_ref(device, inode, name, perms, false);
9427 }
9428 
9429 
9430 status_t
9431 _user_create_dir(int fd, const char* userPath, int perms)
9432 {
9433 	KPath pathBuffer;
9434 	if (pathBuffer.InitCheck() != B_OK)
9435 		return B_NO_MEMORY;
9436 
9437 	char* path = pathBuffer.LockBuffer();
9438 
9439 	if (!IS_USER_ADDRESS(userPath))
9440 		return B_BAD_ADDRESS;
9441 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9442 	if (status != B_OK)
9443 		return status;
9444 
9445 	return dir_create(fd, path, perms, false);
9446 }
9447 
9448 
9449 status_t
9450 _user_remove_dir(int fd, const char* userPath)
9451 {
9452 	KPath pathBuffer;
9453 	if (pathBuffer.InitCheck() != B_OK)
9454 		return B_NO_MEMORY;
9455 
9456 	char* path = pathBuffer.LockBuffer();
9457 
9458 	if (userPath != NULL) {
9459 		if (!IS_USER_ADDRESS(userPath))
9460 			return B_BAD_ADDRESS;
9461 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9462 		if (status != B_OK)
9463 			return status;
9464 	}
9465 
9466 	return dir_remove(fd, userPath ? path : NULL, false);
9467 }
9468 
9469 
9470 status_t
9471 _user_read_link(int fd, const char* userPath, char* userBuffer,
9472 	size_t* userBufferSize)
9473 {
9474 	KPath pathBuffer, linkBuffer;
9475 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9476 		return B_NO_MEMORY;
9477 
9478 	size_t bufferSize;
9479 
9480 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9481 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9482 		return B_BAD_ADDRESS;
9483 
9484 	char* path = pathBuffer.LockBuffer();
9485 	char* buffer = linkBuffer.LockBuffer();
9486 
9487 	if (userPath) {
9488 		if (!IS_USER_ADDRESS(userPath))
9489 			return B_BAD_ADDRESS;
9490 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9491 		if (status != B_OK)
9492 			return status;
9493 
9494 		if (bufferSize > B_PATH_NAME_LENGTH)
9495 			bufferSize = B_PATH_NAME_LENGTH;
9496 	}
9497 
9498 	size_t newBufferSize = bufferSize;
9499 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9500 		&newBufferSize, false);
9501 
9502 	// we also update the bufferSize in case of errors
9503 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9504 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9505 		return B_BAD_ADDRESS;
9506 
9507 	if (status != B_OK)
9508 		return status;
9509 
9510 	bufferSize = min_c(newBufferSize, bufferSize);
9511 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9512 		return B_BAD_ADDRESS;
9513 
9514 	return B_OK;
9515 }
9516 
9517 
9518 status_t
9519 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9520 	int mode)
9521 {
9522 	KPath pathBuffer;
9523 	KPath toPathBuffer;
9524 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9525 		return B_NO_MEMORY;
9526 
9527 	char* path = pathBuffer.LockBuffer();
9528 	char* toPath = toPathBuffer.LockBuffer();
9529 
9530 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9531 		return B_BAD_ADDRESS;
9532 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9533 	if (status != B_OK)
9534 		return status;
9535 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9536 	if (status != B_OK)
9537 		return status;
9538 
9539 	return common_create_symlink(fd, path, toPath, mode, false);
9540 }
9541 
9542 
9543 status_t
9544 _user_create_link(int pathFD, const char* userPath, int toFD,
9545 	const char* userToPath, bool traverseLeafLink)
9546 {
9547 	KPath pathBuffer;
9548 	KPath toPathBuffer;
9549 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9550 		return B_NO_MEMORY;
9551 
9552 	char* path = pathBuffer.LockBuffer();
9553 	char* toPath = toPathBuffer.LockBuffer();
9554 
9555 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9556 		return B_BAD_ADDRESS;
9557 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9558 	if (status != B_OK)
9559 		return status;
9560 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9561 	if (status != B_OK)
9562 		return status;
9563 
9564 	status = check_path(toPath);
9565 	if (status != B_OK)
9566 		return status;
9567 
9568 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9569 		false);
9570 }
9571 
9572 
9573 status_t
9574 _user_unlink(int fd, const char* userPath)
9575 {
9576 	KPath pathBuffer;
9577 	if (pathBuffer.InitCheck() != B_OK)
9578 		return B_NO_MEMORY;
9579 
9580 	char* path = pathBuffer.LockBuffer();
9581 
9582 	if (!IS_USER_ADDRESS(userPath))
9583 		return B_BAD_ADDRESS;
9584 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9585 	if (status != B_OK)
9586 		return status;
9587 
9588 	return common_unlink(fd, path, false);
9589 }
9590 
9591 
9592 status_t
9593 _user_rename(int oldFD, const char* userOldPath, int newFD,
9594 	const char* userNewPath)
9595 {
9596 	KPath oldPathBuffer;
9597 	KPath newPathBuffer;
9598 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9599 		return B_NO_MEMORY;
9600 
9601 	char* oldPath = oldPathBuffer.LockBuffer();
9602 	char* newPath = newPathBuffer.LockBuffer();
9603 
9604 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9605 		return B_BAD_ADDRESS;
9606 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9607 	if (status != B_OK)
9608 		return status;
9609 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9610 	if (status != B_OK)
9611 		return status;
9612 
9613 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9614 }
9615 
9616 
9617 status_t
9618 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9619 {
9620 	KPath pathBuffer;
9621 	if (pathBuffer.InitCheck() != B_OK)
9622 		return B_NO_MEMORY;
9623 
9624 	char* path = pathBuffer.LockBuffer();
9625 
9626 	if (!IS_USER_ADDRESS(userPath))
9627 		return B_BAD_ADDRESS;
9628 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9629 	if (status != B_OK)
9630 		return status;
9631 
9632 	// split into directory vnode and filename path
9633 	char filename[B_FILE_NAME_LENGTH];
9634 	struct vnode* dir;
9635 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9636 	if (status != B_OK)
9637 		return status;
9638 
9639 	VNodePutter _(dir);
9640 
9641 	// the underlying FS needs to support creating FIFOs
9642 	if (!HAS_FS_CALL(dir, create_special_node))
9643 		return B_UNSUPPORTED;
9644 
9645 	// create the entry	-- the FIFO sub node is set up automatically
9646 	fs_vnode superVnode;
9647 	ino_t nodeID;
9648 	status = FS_CALL(dir, create_special_node, filename, NULL,
9649 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9650 
9651 	// create_special_node() acquired a reference for us that we don't need.
9652 	if (status == B_OK)
9653 		put_vnode(dir->mount->volume, nodeID);
9654 
9655 	return status;
9656 }
9657 
9658 
9659 status_t
9660 _user_create_pipe(int* userFDs)
9661 {
9662 	// rootfs should support creating FIFOs, but let's be sure
9663 	if (!HAS_FS_CALL(sRoot, create_special_node))
9664 		return B_UNSUPPORTED;
9665 
9666 	// create the node	-- the FIFO sub node is set up automatically
9667 	fs_vnode superVnode;
9668 	ino_t nodeID;
9669 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9670 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9671 	if (status != B_OK)
9672 		return status;
9673 
9674 	// We've got one reference to the node and need another one.
9675 	struct vnode* vnode;
9676 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9677 	if (status != B_OK) {
9678 		// that should not happen
9679 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9680 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9681 		return status;
9682 	}
9683 
9684 	// Everything looks good so far. Open two FDs for reading respectively
9685 	// writing.
9686 	int fds[2];
9687 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9688 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9689 
9690 	FDCloser closer0(fds[0], false);
9691 	FDCloser closer1(fds[1], false);
9692 
9693 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9694 
9695 	// copy FDs to userland
9696 	if (status == B_OK) {
9697 		if (!IS_USER_ADDRESS(userFDs)
9698 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9699 			status = B_BAD_ADDRESS;
9700 		}
9701 	}
9702 
9703 	// keep FDs, if everything went fine
9704 	if (status == B_OK) {
9705 		closer0.Detach();
9706 		closer1.Detach();
9707 	}
9708 
9709 	return status;
9710 }
9711 
9712 
9713 status_t
9714 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9715 {
9716 	KPath pathBuffer;
9717 	if (pathBuffer.InitCheck() != B_OK)
9718 		return B_NO_MEMORY;
9719 
9720 	char* path = pathBuffer.LockBuffer();
9721 
9722 	if (!IS_USER_ADDRESS(userPath))
9723 		return B_BAD_ADDRESS;
9724 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9725 	if (status != B_OK)
9726 		return status;
9727 
9728 	return common_access(fd, path, mode, effectiveUserGroup, false);
9729 }
9730 
9731 
9732 status_t
9733 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9734 	struct stat* userStat, size_t statSize)
9735 {
9736 	struct stat stat = {0};
9737 	status_t status;
9738 
9739 	if (statSize > sizeof(struct stat))
9740 		return B_BAD_VALUE;
9741 
9742 	if (!IS_USER_ADDRESS(userStat))
9743 		return B_BAD_ADDRESS;
9744 
9745 	if (userPath != NULL) {
9746 		// path given: get the stat of the node referred to by (fd, path)
9747 		if (!IS_USER_ADDRESS(userPath))
9748 			return B_BAD_ADDRESS;
9749 
9750 		KPath pathBuffer;
9751 		if (pathBuffer.InitCheck() != B_OK)
9752 			return B_NO_MEMORY;
9753 
9754 		char* path = pathBuffer.LockBuffer();
9755 
9756 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9757 		if (status != B_OK)
9758 			return status;
9759 
9760 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9761 	} else {
9762 		// no path given: get the FD and use the FD operation
9763 		struct file_descriptor* descriptor
9764 			= get_fd(get_current_io_context(false), fd);
9765 		if (descriptor == NULL)
9766 			return B_FILE_ERROR;
9767 
9768 		if (descriptor->ops->fd_read_stat)
9769 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9770 		else
9771 			status = B_UNSUPPORTED;
9772 
9773 		put_fd(descriptor);
9774 	}
9775 
9776 	if (status != B_OK)
9777 		return status;
9778 
9779 	return user_memcpy(userStat, &stat, statSize);
9780 }
9781 
9782 
9783 status_t
9784 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9785 	const struct stat* userStat, size_t statSize, int statMask)
9786 {
9787 	if (statSize > sizeof(struct stat))
9788 		return B_BAD_VALUE;
9789 
9790 	struct stat stat;
9791 
9792 	if (!IS_USER_ADDRESS(userStat)
9793 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9794 		return B_BAD_ADDRESS;
9795 
9796 	// clear additional stat fields
9797 	if (statSize < sizeof(struct stat))
9798 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9799 
9800 	status_t status;
9801 
9802 	if (userPath != NULL) {
9803 		// path given: write the stat of the node referred to by (fd, path)
9804 		if (!IS_USER_ADDRESS(userPath))
9805 			return B_BAD_ADDRESS;
9806 
9807 		KPath pathBuffer;
9808 		if (pathBuffer.InitCheck() != B_OK)
9809 			return B_NO_MEMORY;
9810 
9811 		char* path = pathBuffer.LockBuffer();
9812 
9813 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9814 		if (status != B_OK)
9815 			return status;
9816 
9817 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9818 			statMask, false);
9819 	} else {
9820 		// no path given: get the FD and use the FD operation
9821 		struct file_descriptor* descriptor
9822 			= get_fd(get_current_io_context(false), fd);
9823 		if (descriptor == NULL)
9824 			return B_FILE_ERROR;
9825 
9826 		if (descriptor->ops->fd_write_stat) {
9827 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9828 				statMask);
9829 		} else
9830 			status = B_UNSUPPORTED;
9831 
9832 		put_fd(descriptor);
9833 	}
9834 
9835 	return status;
9836 }
9837 
9838 
9839 int
9840 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9841 {
9842 	KPath pathBuffer;
9843 	if (pathBuffer.InitCheck() != B_OK)
9844 		return B_NO_MEMORY;
9845 
9846 	char* path = pathBuffer.LockBuffer();
9847 
9848 	if (userPath != NULL) {
9849 		if (!IS_USER_ADDRESS(userPath))
9850 			return B_BAD_ADDRESS;
9851 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9852 		if (status != B_OK)
9853 			return status;
9854 	}
9855 
9856 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9857 }
9858 
9859 
9860 ssize_t
9861 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9862 	size_t readBytes)
9863 {
9864 	char attribute[B_FILE_NAME_LENGTH];
9865 
9866 	if (userAttribute == NULL)
9867 		return B_BAD_VALUE;
9868 	if (!IS_USER_ADDRESS(userAttribute))
9869 		return B_BAD_ADDRESS;
9870 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9871 	if (status != B_OK)
9872 		return status;
9873 
9874 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9875 	if (attr < 0)
9876 		return attr;
9877 
9878 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9879 	_user_close(attr);
9880 
9881 	return bytes;
9882 }
9883 
9884 
9885 ssize_t
9886 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9887 	const void* buffer, size_t writeBytes)
9888 {
9889 	char attribute[B_FILE_NAME_LENGTH];
9890 
9891 	if (userAttribute == NULL)
9892 		return B_BAD_VALUE;
9893 	if (!IS_USER_ADDRESS(userAttribute))
9894 		return B_BAD_ADDRESS;
9895 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9896 	if (status != B_OK)
9897 		return status;
9898 
9899 	// Try to support the BeOS typical truncation as well as the position
9900 	// argument
9901 	int attr = attr_create(fd, NULL, attribute, type,
9902 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9903 	if (attr < 0)
9904 		return attr;
9905 
9906 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9907 	_user_close(attr);
9908 
9909 	return bytes;
9910 }
9911 
9912 
9913 status_t
9914 _user_stat_attr(int fd, const char* userAttribute,
9915 	struct attr_info* userAttrInfo)
9916 {
9917 	char attribute[B_FILE_NAME_LENGTH];
9918 
9919 	if (userAttribute == NULL || userAttrInfo == NULL)
9920 		return B_BAD_VALUE;
9921 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9922 		return B_BAD_ADDRESS;
9923 	status_t status = user_copy_name(attribute, userAttribute,
9924 		sizeof(attribute));
9925 	if (status != B_OK)
9926 		return status;
9927 
9928 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9929 	if (attr < 0)
9930 		return attr;
9931 
9932 	struct file_descriptor* descriptor
9933 		= get_fd(get_current_io_context(false), attr);
9934 	if (descriptor == NULL) {
9935 		_user_close(attr);
9936 		return B_FILE_ERROR;
9937 	}
9938 
9939 	struct stat stat;
9940 	if (descriptor->ops->fd_read_stat)
9941 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9942 	else
9943 		status = B_UNSUPPORTED;
9944 
9945 	put_fd(descriptor);
9946 	_user_close(attr);
9947 
9948 	if (status == B_OK) {
9949 		attr_info info;
9950 		info.type = stat.st_type;
9951 		info.size = stat.st_size;
9952 
9953 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9954 			return B_BAD_ADDRESS;
9955 	}
9956 
9957 	return status;
9958 }
9959 
9960 
9961 int
9962 _user_open_attr(int fd, const char* userPath, const char* userName,
9963 	uint32 type, int openMode)
9964 {
9965 	char name[B_FILE_NAME_LENGTH];
9966 
9967 	if (!IS_USER_ADDRESS(userName))
9968 		return B_BAD_ADDRESS;
9969 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9970 	if (status != B_OK)
9971 		return status;
9972 
9973 	KPath pathBuffer;
9974 	if (pathBuffer.InitCheck() != B_OK)
9975 		return B_NO_MEMORY;
9976 
9977 	char* path = pathBuffer.LockBuffer();
9978 
9979 	if (userPath != NULL) {
9980 		if (!IS_USER_ADDRESS(userPath))
9981 			return B_BAD_ADDRESS;
9982 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9983 		if (status != B_OK)
9984 			return status;
9985 	}
9986 
9987 	if ((openMode & O_CREAT) != 0) {
9988 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9989 			false);
9990 	}
9991 
9992 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9993 }
9994 
9995 
9996 status_t
9997 _user_remove_attr(int fd, const char* userName)
9998 {
9999 	char name[B_FILE_NAME_LENGTH];
10000 
10001 	if (!IS_USER_ADDRESS(userName))
10002 		return B_BAD_ADDRESS;
10003 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10004 	if (status != B_OK)
10005 		return status;
10006 
10007 	return attr_remove(fd, name, false);
10008 }
10009 
10010 
10011 status_t
10012 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
10013 	const char* userToName)
10014 {
10015 	if (!IS_USER_ADDRESS(userFromName)
10016 		|| !IS_USER_ADDRESS(userToName))
10017 		return B_BAD_ADDRESS;
10018 
10019 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
10020 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
10021 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
10022 		return B_NO_MEMORY;
10023 
10024 	char* fromName = fromNameBuffer.LockBuffer();
10025 	char* toName = toNameBuffer.LockBuffer();
10026 
10027 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
10028 	if (status != B_OK)
10029 		return status;
10030 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
10031 	if (status != B_OK)
10032 		return status;
10033 
10034 	return attr_rename(fromFile, fromName, toFile, toName, false);
10035 }
10036 
10037 
10038 int
10039 _user_open_index_dir(dev_t device)
10040 {
10041 	return index_dir_open(device, false);
10042 }
10043 
10044 
10045 status_t
10046 _user_create_index(dev_t device, const char* userName, uint32 type,
10047 	uint32 flags)
10048 {
10049 	char name[B_FILE_NAME_LENGTH];
10050 
10051 	if (!IS_USER_ADDRESS(userName))
10052 		return B_BAD_ADDRESS;
10053 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10054 	if (status != B_OK)
10055 		return status;
10056 
10057 	return index_create(device, name, type, flags, false);
10058 }
10059 
10060 
10061 status_t
10062 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
10063 {
10064 	char name[B_FILE_NAME_LENGTH];
10065 	struct stat stat = {0};
10066 	status_t status;
10067 
10068 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
10069 		return B_BAD_ADDRESS;
10070 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10071 	if (status != B_OK)
10072 		return status;
10073 
10074 	status = index_name_read_stat(device, name, &stat, false);
10075 	if (status == B_OK) {
10076 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
10077 			return B_BAD_ADDRESS;
10078 	}
10079 
10080 	return status;
10081 }
10082 
10083 
10084 status_t
10085 _user_remove_index(dev_t device, const char* userName)
10086 {
10087 	char name[B_FILE_NAME_LENGTH];
10088 
10089 	if (!IS_USER_ADDRESS(userName))
10090 		return B_BAD_ADDRESS;
10091 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10092 	if (status != B_OK)
10093 		return status;
10094 
10095 	return index_remove(device, name, false);
10096 }
10097 
10098 
10099 status_t
10100 _user_getcwd(char* userBuffer, size_t size)
10101 {
10102 	if (size == 0)
10103 		return B_BAD_VALUE;
10104 	if (!IS_USER_ADDRESS(userBuffer))
10105 		return B_BAD_ADDRESS;
10106 
10107 	if (size > kMaxPathLength)
10108 		size = kMaxPathLength;
10109 
10110 	KPath pathBuffer(size);
10111 	if (pathBuffer.InitCheck() != B_OK)
10112 		return B_NO_MEMORY;
10113 
10114 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10115 
10116 	char* path = pathBuffer.LockBuffer();
10117 
10118 	status_t status = get_cwd(path, size, false);
10119 	if (status != B_OK)
10120 		return status;
10121 
10122 	// Copy back the result
10123 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10124 		return B_BAD_ADDRESS;
10125 
10126 	return status;
10127 }
10128 
10129 
10130 status_t
10131 _user_setcwd(int fd, const char* userPath)
10132 {
10133 	TRACE(("user_setcwd: path = %p\n", userPath));
10134 
10135 	KPath pathBuffer;
10136 	if (pathBuffer.InitCheck() != B_OK)
10137 		return B_NO_MEMORY;
10138 
10139 	char* path = pathBuffer.LockBuffer();
10140 
10141 	if (userPath != NULL) {
10142 		if (!IS_USER_ADDRESS(userPath))
10143 			return B_BAD_ADDRESS;
10144 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10145 		if (status != B_OK)
10146 			return status;
10147 	}
10148 
10149 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10150 }
10151 
10152 
10153 status_t
10154 _user_change_root(const char* userPath)
10155 {
10156 	// only root is allowed to chroot()
10157 	if (geteuid() != 0)
10158 		return B_NOT_ALLOWED;
10159 
10160 	// alloc path buffer
10161 	KPath pathBuffer;
10162 	if (pathBuffer.InitCheck() != B_OK)
10163 		return B_NO_MEMORY;
10164 
10165 	// copy userland path to kernel
10166 	char* path = pathBuffer.LockBuffer();
10167 	if (userPath != NULL) {
10168 		if (!IS_USER_ADDRESS(userPath))
10169 			return B_BAD_ADDRESS;
10170 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10171 		if (status != B_OK)
10172 			return status;
10173 	}
10174 
10175 	// get the vnode
10176 	struct vnode* vnode;
10177 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
10178 	if (status != B_OK)
10179 		return status;
10180 
10181 	// set the new root
10182 	struct io_context* context = get_current_io_context(false);
10183 	mutex_lock(&sIOContextRootLock);
10184 	struct vnode* oldRoot = context->root;
10185 	context->root = vnode;
10186 	mutex_unlock(&sIOContextRootLock);
10187 
10188 	put_vnode(oldRoot);
10189 
10190 	return B_OK;
10191 }
10192 
10193 
10194 int
10195 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10196 	uint32 flags, port_id port, int32 token)
10197 {
10198 	if (device < 0 || userQuery == NULL || queryLength == 0)
10199 		return B_BAD_VALUE;
10200 
10201 	if (!IS_USER_ADDRESS(userQuery))
10202 		return B_BAD_ADDRESS;
10203 
10204 	// this is a safety restriction
10205 	if (queryLength >= 65536)
10206 		return B_NAME_TOO_LONG;
10207 
10208 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10209 	if (!query.IsValid())
10210 		return B_NO_MEMORY;
10211 
10212 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10213 		return B_BAD_ADDRESS;
10214 
10215 	return query_open(device, query, flags, port, token, false);
10216 }
10217 
10218 
10219 #include "vfs_request_io.cpp"
10220