xref: /haiku/src/system/kernel/fs/vfs.cpp (revision e7e3b6c14af93058fc5aab68ffa695bbcdd77053)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <block_cache.h>
36 #include <boot/kernel_args.h>
37 #include <debug_heap.h>
38 #include <disk_device_manager/KDiskDevice.h>
39 #include <disk_device_manager/KDiskDeviceManager.h>
40 #include <disk_device_manager/KDiskDeviceUtils.h>
41 #include <disk_device_manager/KDiskSystem.h>
42 #include <fd.h>
43 #include <file_cache.h>
44 #include <fs/node_monitor.h>
45 #include <KPath.h>
46 #include <lock.h>
47 #include <low_resource_manager.h>
48 #include <slab/Slab.h>
49 #include <StackOrHeapArray.h>
50 #include <syscalls.h>
51 #include <syscall_restart.h>
52 #include <tracing.h>
53 #include <util/atomic.h>
54 #include <util/AutoLock.h>
55 #include <util/DoublyLinkedList.h>
56 #include <vfs.h>
57 #include <vm/vm.h>
58 #include <vm/VMCache.h>
59 #include <wait_for_objects.h>
60 
61 #include "EntryCache.h"
62 #include "fifo.h"
63 #include "IORequest.h"
64 #include "unused_vnodes.h"
65 #include "vfs_tracing.h"
66 #include "Vnode.h"
67 #include "../cache/vnode_store.h"
68 
69 
70 //#define TRACE_VFS
71 #ifdef TRACE_VFS
72 #	define TRACE(x) dprintf x
73 #	define FUNCTION(x) dprintf x
74 #else
75 #	define TRACE(x) ;
76 #	define FUNCTION(x) ;
77 #endif
78 
79 #define ADD_DEBUGGER_COMMANDS
80 
81 
82 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
83 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
84 
85 #if KDEBUG
86 #	define FS_CALL(vnode, op, params...) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode, params) \
89 			: (panic("FS_CALL op " #op " is NULL"), 0))
90 #	define FS_CALL_NO_PARAMS(vnode, op) \
91 		( HAS_FS_CALL(vnode, op) ? \
92 			vnode->ops->op(vnode->mount->volume, vnode) \
93 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL(mount, op, params...) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume, params) \
97 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
98 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
99 		( HAS_FS_MOUNT_CALL(mount, op) ? \
100 			mount->volume->ops->op(mount->volume) \
101 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
102 #else
103 #	define FS_CALL(vnode, op, params...) \
104 			vnode->ops->op(vnode->mount->volume, vnode, params)
105 #	define FS_CALL_NO_PARAMS(vnode, op) \
106 			vnode->ops->op(vnode->mount->volume, vnode)
107 #	define FS_MOUNT_CALL(mount, op, params...) \
108 			mount->volume->ops->op(mount->volume, params)
109 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
110 			mount->volume->ops->op(mount->volume)
111 #endif
112 
113 
114 const static size_t kMaxPathLength = 65536;
115 	// The absolute maximum path length (for getcwd() - this is not depending
116 	// on PATH_MAX
117 
118 
119 typedef DoublyLinkedList<vnode> VnodeList;
120 
121 /*!	\brief Structure to manage a mounted file system
122 
123 	Note: The root_vnode and root_vnode->covers fields (what others?) are
124 	initialized in fs_mount() and not changed afterwards. That is as soon
125 	as the mount is mounted and it is made sure it won't be unmounted
126 	(e.g. by holding a reference to a vnode of that mount) (read) access
127 	to those fields is always safe, even without additional locking. Morever
128 	while mounted the mount holds a reference to the root_vnode->covers vnode,
129 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
130 	safe if a reference to vnode is held (note that for the root mount
131 	root_vnode->covers is NULL, though).
132 */
133 struct fs_mount {
134 	fs_mount()
135 		:
136 		volume(NULL),
137 		device_name(NULL)
138 	{
139 		mutex_init(&lock, "mount lock");
140 	}
141 
142 	~fs_mount()
143 	{
144 		mutex_destroy(&lock);
145 		free(device_name);
146 
147 		while (volume) {
148 			fs_volume* superVolume = volume->super_volume;
149 
150 			if (volume->file_system != NULL)
151 				put_module(volume->file_system->info.name);
152 
153 			free(volume->file_system_name);
154 			free(volume);
155 			volume = superVolume;
156 		}
157 	}
158 
159 	struct fs_mount* next;
160 	dev_t			id;
161 	fs_volume*		volume;
162 	char*			device_name;
163 	mutex			lock;	// guards the vnodes list
164 	struct vnode*	root_vnode;
165 	struct vnode*	covers_vnode;	// immutable
166 	KPartition*		partition;
167 	VnodeList		vnodes;
168 	EntryCache		entry_cache;
169 	bool			unmounting;
170 	bool			owns_file_device;
171 };
172 
173 
174 namespace {
175 
176 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
177 	list_link		link;
178 	void*			bound_to;
179 	team_id			team;
180 	pid_t			session;
181 	off_t			start;
182 	off_t			end;
183 	bool			shared;
184 };
185 
186 typedef DoublyLinkedList<advisory_lock> LockList;
187 
188 } // namespace
189 
190 
191 struct advisory_locking {
192 	sem_id			lock;
193 	sem_id			wait_sem;
194 	LockList		locks;
195 
196 	advisory_locking()
197 		:
198 		lock(-1),
199 		wait_sem(-1)
200 	{
201 	}
202 
203 	~advisory_locking()
204 	{
205 		if (lock >= 0)
206 			delete_sem(lock);
207 		if (wait_sem >= 0)
208 			delete_sem(wait_sem);
209 	}
210 };
211 
212 /*!	\brief Guards sMountsTable.
213 
214 	The holder is allowed to read/write access the sMountsTable.
215 	Manipulation of the fs_mount structures themselves
216 	(and their destruction) requires different locks though.
217 */
218 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
219 
220 /*!	\brief Guards mount/unmount operations.
221 
222 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
223 	That is locking the lock ensures that no FS is mounted/unmounted. In
224 	particular this means that
225 	- sMountsTable will not be modified,
226 	- the fields immutable after initialization of the fs_mount structures in
227 	  sMountsTable will not be modified,
228 
229 	The thread trying to lock the lock must not hold sVnodeLock or
230 	sMountLock.
231 */
232 static recursive_lock sMountOpLock;
233 
234 /*!	\brief Guards sVnodeTable.
235 
236 	The holder is allowed read/write access to sVnodeTable and to
237 	any unbusy vnode in that table, save to the immutable fields (device, id,
238 	private_node, mount) to which only read-only access is allowed.
239 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
240 	well as the busy, removed, unused flags, and the vnode's type can also be
241 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
242 	locked. Write access to covered_by and covers requires to write lock
243 	sVnodeLock.
244 
245 	The thread trying to acquire the lock must not hold sMountLock.
246 	You must not hold this lock when calling create_sem(), as this might call
247 	vfs_free_unused_vnodes() and thus cause a deadlock.
248 */
249 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
250 
251 /*!	\brief Guards io_context::root.
252 
253 	Must be held when setting or getting the io_context::root field.
254 	The only operation allowed while holding this lock besides getting or
255 	setting the field is inc_vnode_ref_count() on io_context::root.
256 */
257 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
258 
259 
260 namespace {
261 
262 struct vnode_hash_key {
263 	dev_t	device;
264 	ino_t	vnode;
265 };
266 
267 struct VnodeHash {
268 	typedef vnode_hash_key	KeyType;
269 	typedef	struct vnode	ValueType;
270 
271 #define VHASH(mountid, vnodeid) \
272 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
273 
274 	size_t HashKey(KeyType key) const
275 	{
276 		return VHASH(key.device, key.vnode);
277 	}
278 
279 	size_t Hash(ValueType* vnode) const
280 	{
281 		return VHASH(vnode->device, vnode->id);
282 	}
283 
284 #undef VHASH
285 
286 	bool Compare(KeyType key, ValueType* vnode) const
287 	{
288 		return vnode->device == key.device && vnode->id == key.vnode;
289 	}
290 
291 	ValueType*& GetLink(ValueType* value) const
292 	{
293 		return value->next;
294 	}
295 };
296 
297 typedef BOpenHashTable<VnodeHash> VnodeTable;
298 
299 
300 struct MountHash {
301 	typedef dev_t			KeyType;
302 	typedef	struct fs_mount	ValueType;
303 
304 	size_t HashKey(KeyType key) const
305 	{
306 		return key;
307 	}
308 
309 	size_t Hash(ValueType* mount) const
310 	{
311 		return mount->id;
312 	}
313 
314 	bool Compare(KeyType key, ValueType* mount) const
315 	{
316 		return mount->id == key;
317 	}
318 
319 	ValueType*& GetLink(ValueType* value) const
320 	{
321 		return value->next;
322 	}
323 };
324 
325 typedef BOpenHashTable<MountHash> MountTable;
326 
327 } // namespace
328 
329 
330 object_cache* sPathNameCache;
331 object_cache* sFileDescriptorCache;
332 
333 #define VNODE_HASH_TABLE_SIZE 1024
334 static VnodeTable* sVnodeTable;
335 static struct vnode* sRoot;
336 
337 #define MOUNTS_HASH_TABLE_SIZE 16
338 static MountTable* sMountsTable;
339 static dev_t sNextMountID = 1;
340 
341 #define MAX_TEMP_IO_VECS 8
342 
343 // How long to wait for busy vnodes (10s)
344 #define BUSY_VNODE_RETRIES 2000
345 #define BUSY_VNODE_DELAY 5000
346 
347 mode_t __gUmask = 022;
348 
349 /* function declarations */
350 
351 static void free_unused_vnodes();
352 
353 // file descriptor operation prototypes
354 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
355 	void* buffer, size_t* _bytes);
356 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
357 	const void* buffer, size_t* _bytes);
358 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
359 	int seekType);
360 static void file_free_fd(struct file_descriptor* descriptor);
361 static status_t file_close(struct file_descriptor* descriptor);
362 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
363 	struct selectsync* sync);
364 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
365 	struct selectsync* sync);
366 static status_t dir_read(struct io_context* context,
367 	struct file_descriptor* descriptor, struct dirent* buffer,
368 	size_t bufferSize, uint32* _count);
369 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
370 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
371 static status_t dir_rewind(struct file_descriptor* descriptor);
372 static void dir_free_fd(struct file_descriptor* descriptor);
373 static status_t dir_close(struct file_descriptor* descriptor);
374 static status_t attr_dir_read(struct io_context* context,
375 	struct file_descriptor* descriptor, struct dirent* buffer,
376 	size_t bufferSize, uint32* _count);
377 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
378 static void attr_dir_free_fd(struct file_descriptor* descriptor);
379 static status_t attr_dir_close(struct file_descriptor* descriptor);
380 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
381 	void* buffer, size_t* _bytes);
382 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
383 	const void* buffer, size_t* _bytes);
384 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
385 	int seekType);
386 static void attr_free_fd(struct file_descriptor* descriptor);
387 static status_t attr_close(struct file_descriptor* descriptor);
388 static status_t attr_read_stat(struct file_descriptor* descriptor,
389 	struct stat* statData);
390 static status_t attr_write_stat(struct file_descriptor* descriptor,
391 	const struct stat* stat, int statMask);
392 static status_t index_dir_read(struct io_context* context,
393 	struct file_descriptor* descriptor, struct dirent* buffer,
394 	size_t bufferSize, uint32* _count);
395 static status_t index_dir_rewind(struct file_descriptor* descriptor);
396 static void index_dir_free_fd(struct file_descriptor* descriptor);
397 static status_t index_dir_close(struct file_descriptor* descriptor);
398 static status_t query_read(struct io_context* context,
399 	struct file_descriptor* descriptor, struct dirent* buffer,
400 	size_t bufferSize, uint32* _count);
401 static status_t query_rewind(struct file_descriptor* descriptor);
402 static void query_free_fd(struct file_descriptor* descriptor);
403 static status_t query_close(struct file_descriptor* descriptor);
404 
405 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
406 	void* buffer, size_t length);
407 static status_t common_read_stat(struct file_descriptor* descriptor,
408 	struct stat* statData);
409 static status_t common_write_stat(struct file_descriptor* descriptor,
410 	const struct stat* statData, int statMask);
411 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
412 	struct stat* stat, bool kernel);
413 
414 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
415 	bool traverseLeafLink, int count, bool kernel,
416 	struct vnode** _vnode, ino_t* _parentID);
417 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
418 	size_t bufferSize, bool kernel);
419 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
420 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
421 static void inc_vnode_ref_count(struct vnode* vnode);
422 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
423 	bool reenter);
424 static inline void put_vnode(struct vnode* vnode);
425 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
426 	bool kernel);
427 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
428 
429 
430 static struct fd_ops sFileOps = {
431 	file_read,
432 	file_write,
433 	file_seek,
434 	common_ioctl,
435 	NULL,		// set_flags
436 	file_select,
437 	file_deselect,
438 	NULL,		// read_dir()
439 	NULL,		// rewind_dir()
440 	common_read_stat,
441 	common_write_stat,
442 	file_close,
443 	file_free_fd
444 };
445 
446 static struct fd_ops sDirectoryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	common_ioctl,
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	dir_read,
455 	dir_rewind,
456 	common_read_stat,
457 	common_write_stat,
458 	dir_close,
459 	dir_free_fd
460 };
461 
462 static struct fd_ops sAttributeDirectoryOps = {
463 	NULL,		// read()
464 	NULL,		// write()
465 	NULL,		// seek()
466 	common_ioctl,
467 	NULL,		// set_flags
468 	NULL,		// select()
469 	NULL,		// deselect()
470 	attr_dir_read,
471 	attr_dir_rewind,
472 	common_read_stat,
473 	common_write_stat,
474 	attr_dir_close,
475 	attr_dir_free_fd
476 };
477 
478 static struct fd_ops sAttributeOps = {
479 	attr_read,
480 	attr_write,
481 	attr_seek,
482 	common_ioctl,
483 	NULL,		// set_flags
484 	NULL,		// select()
485 	NULL,		// deselect()
486 	NULL,		// read_dir()
487 	NULL,		// rewind_dir()
488 	attr_read_stat,
489 	attr_write_stat,
490 	attr_close,
491 	attr_free_fd
492 };
493 
494 static struct fd_ops sIndexDirectoryOps = {
495 	NULL,		// read()
496 	NULL,		// write()
497 	NULL,		// seek()
498 	NULL,		// ioctl()
499 	NULL,		// set_flags
500 	NULL,		// select()
501 	NULL,		// deselect()
502 	index_dir_read,
503 	index_dir_rewind,
504 	NULL,		// read_stat()
505 	NULL,		// write_stat()
506 	index_dir_close,
507 	index_dir_free_fd
508 };
509 
510 #if 0
511 static struct fd_ops sIndexOps = {
512 	NULL,		// read()
513 	NULL,		// write()
514 	NULL,		// seek()
515 	NULL,		// ioctl()
516 	NULL,		// set_flags
517 	NULL,		// select()
518 	NULL,		// deselect()
519 	NULL,		// dir_read()
520 	NULL,		// dir_rewind()
521 	index_read_stat,	// read_stat()
522 	NULL,		// write_stat()
523 	NULL,		// dir_close()
524 	NULL		// free_fd()
525 };
526 #endif
527 
528 static struct fd_ops sQueryOps = {
529 	NULL,		// read()
530 	NULL,		// write()
531 	NULL,		// seek()
532 	NULL,		// ioctl()
533 	NULL,		// set_flags
534 	NULL,		// select()
535 	NULL,		// deselect()
536 	query_read,
537 	query_rewind,
538 	NULL,		// read_stat()
539 	NULL,		// write_stat()
540 	query_close,
541 	query_free_fd
542 };
543 
544 
545 namespace {
546 
547 class VNodePutter {
548 public:
549 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
550 
551 	~VNodePutter()
552 	{
553 		Put();
554 	}
555 
556 	void SetTo(struct vnode* vnode)
557 	{
558 		Put();
559 		fVNode = vnode;
560 	}
561 
562 	void Put()
563 	{
564 		if (fVNode) {
565 			put_vnode(fVNode);
566 			fVNode = NULL;
567 		}
568 	}
569 
570 	struct vnode* Detach()
571 	{
572 		struct vnode* vnode = fVNode;
573 		fVNode = NULL;
574 		return vnode;
575 	}
576 
577 private:
578 	struct vnode* fVNode;
579 };
580 
581 
582 class FDCloser {
583 public:
584 	FDCloser() : fFD(-1), fKernel(true) {}
585 
586 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
587 
588 	~FDCloser()
589 	{
590 		Close();
591 	}
592 
593 	void SetTo(int fd, bool kernel)
594 	{
595 		Close();
596 		fFD = fd;
597 		fKernel = kernel;
598 	}
599 
600 	void Close()
601 	{
602 		if (fFD >= 0) {
603 			if (fKernel)
604 				_kern_close(fFD);
605 			else
606 				_user_close(fFD);
607 			fFD = -1;
608 		}
609 	}
610 
611 	int Detach()
612 	{
613 		int fd = fFD;
614 		fFD = -1;
615 		return fd;
616 	}
617 
618 private:
619 	int		fFD;
620 	bool	fKernel;
621 };
622 
623 } // namespace
624 
625 
626 #if VFS_PAGES_IO_TRACING
627 
628 namespace VFSPagesIOTracing {
629 
630 class PagesIOTraceEntry : public AbstractTraceEntry {
631 protected:
632 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
633 		const generic_io_vec* vecs, uint32 count, uint32 flags,
634 		generic_size_t bytesRequested, status_t status,
635 		generic_size_t bytesTransferred)
636 		:
637 		fVnode(vnode),
638 		fMountID(vnode->mount->id),
639 		fNodeID(vnode->id),
640 		fCookie(cookie),
641 		fPos(pos),
642 		fCount(count),
643 		fFlags(flags),
644 		fBytesRequested(bytesRequested),
645 		fStatus(status),
646 		fBytesTransferred(bytesTransferred)
647 	{
648 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
649 			sizeof(generic_io_vec) * count, false);
650 	}
651 
652 	void AddDump(TraceOutput& out, const char* mode)
653 	{
654 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
655 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
656 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
657 			(uint64)fBytesRequested);
658 
659 		if (fVecs != NULL) {
660 			for (uint32 i = 0; i < fCount; i++) {
661 				if (i > 0)
662 					out.Print(", ");
663 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
664 					(uint64)fVecs[i].length);
665 			}
666 		}
667 
668 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
669 			"transferred: %" B_PRIu64, fFlags, fStatus,
670 			(uint64)fBytesTransferred);
671 	}
672 
673 protected:
674 	struct vnode*	fVnode;
675 	dev_t			fMountID;
676 	ino_t			fNodeID;
677 	void*			fCookie;
678 	off_t			fPos;
679 	generic_io_vec*	fVecs;
680 	uint32			fCount;
681 	uint32			fFlags;
682 	generic_size_t	fBytesRequested;
683 	status_t		fStatus;
684 	generic_size_t	fBytesTransferred;
685 };
686 
687 
688 class ReadPages : public PagesIOTraceEntry {
689 public:
690 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
691 		const generic_io_vec* vecs, uint32 count, uint32 flags,
692 		generic_size_t bytesRequested, status_t status,
693 		generic_size_t bytesTransferred)
694 		:
695 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
696 			bytesRequested, status, bytesTransferred)
697 	{
698 		Initialized();
699 	}
700 
701 	virtual void AddDump(TraceOutput& out)
702 	{
703 		PagesIOTraceEntry::AddDump(out, "read");
704 	}
705 };
706 
707 
708 class WritePages : public PagesIOTraceEntry {
709 public:
710 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
711 		const generic_io_vec* vecs, uint32 count, uint32 flags,
712 		generic_size_t bytesRequested, status_t status,
713 		generic_size_t bytesTransferred)
714 		:
715 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
716 			bytesRequested, status, bytesTransferred)
717 	{
718 		Initialized();
719 	}
720 
721 	virtual void AddDump(TraceOutput& out)
722 	{
723 		PagesIOTraceEntry::AddDump(out, "write");
724 	}
725 };
726 
727 }	// namespace VFSPagesIOTracing
728 
729 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
730 #else
731 #	define TPIO(x) ;
732 #endif	// VFS_PAGES_IO_TRACING
733 
734 
735 /*! Finds the mounted device (the fs_mount structure) with the given ID.
736 	Note, you must hold the sMountLock lock when you call this function.
737 */
738 static struct fs_mount*
739 find_mount(dev_t id)
740 {
741 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
742 
743 	return sMountsTable->Lookup(id);
744 }
745 
746 
747 static status_t
748 get_mount(dev_t id, struct fs_mount** _mount)
749 {
750 	struct fs_mount* mount;
751 
752 	ReadLocker nodeLocker(sVnodeLock);
753 	ReadLocker mountLocker(sMountLock);
754 
755 	mount = find_mount(id);
756 	if (mount == NULL)
757 		return B_BAD_VALUE;
758 
759 	struct vnode* rootNode = mount->root_vnode;
760 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
761 		|| rootNode->ref_count == 0) {
762 		// might have been called during a mount/unmount operation
763 		return B_BUSY;
764 	}
765 
766 	inc_vnode_ref_count(rootNode);
767 	*_mount = mount;
768 	return B_OK;
769 }
770 
771 
772 static void
773 put_mount(struct fs_mount* mount)
774 {
775 	if (mount)
776 		put_vnode(mount->root_vnode);
777 }
778 
779 
780 /*!	Tries to open the specified file system module.
781 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
782 	Returns a pointer to file system module interface, or NULL if it
783 	could not open the module.
784 */
785 static file_system_module_info*
786 get_file_system(const char* fsName)
787 {
788 	char name[B_FILE_NAME_LENGTH];
789 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
790 		// construct module name if we didn't get one
791 		// (we currently support only one API)
792 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
793 		fsName = NULL;
794 	}
795 
796 	file_system_module_info* info;
797 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
798 		return NULL;
799 
800 	return info;
801 }
802 
803 
804 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
805 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
806 	The name is allocated for you, and you have to free() it when you're
807 	done with it.
808 	Returns NULL if the required memory is not available.
809 */
810 static char*
811 get_file_system_name(const char* fsName)
812 {
813 	const size_t length = strlen("file_systems/");
814 
815 	if (strncmp(fsName, "file_systems/", length)) {
816 		// the name already seems to be the module's file name
817 		return strdup(fsName);
818 	}
819 
820 	fsName += length;
821 	const char* end = strchr(fsName, '/');
822 	if (end == NULL) {
823 		// this doesn't seem to be a valid name, but well...
824 		return strdup(fsName);
825 	}
826 
827 	// cut off the trailing /v1
828 
829 	char* name = (char*)malloc(end + 1 - fsName);
830 	if (name == NULL)
831 		return NULL;
832 
833 	strlcpy(name, fsName, end + 1 - fsName);
834 	return name;
835 }
836 
837 
838 /*!	Accepts a list of file system names separated by a colon, one for each
839 	layer and returns the file system name for the specified layer.
840 	The name is allocated for you, and you have to free() it when you're
841 	done with it.
842 	Returns NULL if the required memory is not available or if there is no
843 	name for the specified layer.
844 */
845 static char*
846 get_file_system_name_for_layer(const char* fsNames, int32 layer)
847 {
848 	while (layer >= 0) {
849 		const char* end = strchr(fsNames, ':');
850 		if (end == NULL) {
851 			if (layer == 0)
852 				return strdup(fsNames);
853 			return NULL;
854 		}
855 
856 		if (layer == 0) {
857 			size_t length = end - fsNames + 1;
858 			char* result = (char*)malloc(length);
859 			strlcpy(result, fsNames, length);
860 			return result;
861 		}
862 
863 		fsNames = end + 1;
864 		layer--;
865 	}
866 
867 	return NULL;
868 }
869 
870 
871 static void
872 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
873 {
874 	MutexLocker _(mount->lock);
875 	mount->vnodes.Add(vnode);
876 }
877 
878 
879 static void
880 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
881 {
882 	MutexLocker _(mount->lock);
883 	mount->vnodes.Remove(vnode);
884 }
885 
886 
887 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
888 
889 	The caller must hold the sVnodeLock (read lock at least).
890 
891 	\param mountID the mount ID.
892 	\param vnodeID the node ID.
893 
894 	\return The vnode structure, if it was found in the hash table, \c NULL
895 			otherwise.
896 */
897 static struct vnode*
898 lookup_vnode(dev_t mountID, ino_t vnodeID)
899 {
900 	struct vnode_hash_key key;
901 
902 	key.device = mountID;
903 	key.vnode = vnodeID;
904 
905 	return sVnodeTable->Lookup(key);
906 }
907 
908 
909 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
910 
911 	This will also wait for BUSY_VNODE_DELAY before returning if one should
912 	still wait for the vnode becoming unbusy.
913 
914 	\return \c true if one should retry, \c false if not.
915 */
916 static bool
917 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
918 {
919 	if (--tries < 0) {
920 		// vnode doesn't seem to become unbusy
921 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
922 			" is not becoming unbusy!\n", mountID, vnodeID);
923 		return false;
924 	}
925 	snooze(BUSY_VNODE_DELAY);
926 	return true;
927 }
928 
929 
930 /*!	Creates a new vnode with the given mount and node ID.
931 	If the node already exists, it is returned instead and no new node is
932 	created. In either case -- but not, if an error occurs -- the function write
933 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
934 	error the lock is not held on return.
935 
936 	\param mountID The mount ID.
937 	\param vnodeID The vnode ID.
938 	\param _vnode Will be set to the new vnode on success.
939 	\param _nodeCreated Will be set to \c true when the returned vnode has
940 		been newly created, \c false when it already existed. Will not be
941 		changed on error.
942 	\return \c B_OK, when the vnode was successfully created and inserted or
943 		a node with the given ID was found, \c B_NO_MEMORY or
944 		\c B_ENTRY_NOT_FOUND on error.
945 */
946 static status_t
947 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
948 	bool& _nodeCreated)
949 {
950 	FUNCTION(("create_new_vnode_and_lock()\n"));
951 
952 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
953 	if (vnode == NULL)
954 		return B_NO_MEMORY;
955 
956 	// initialize basic values
957 	memset(vnode, 0, sizeof(struct vnode));
958 	vnode->device = mountID;
959 	vnode->id = vnodeID;
960 	vnode->ref_count = 1;
961 	vnode->SetBusy(true);
962 
963 	// look up the node -- it might have been added by someone else in the
964 	// meantime
965 	rw_lock_write_lock(&sVnodeLock);
966 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
967 	if (existingVnode != NULL) {
968 		free(vnode);
969 		_vnode = existingVnode;
970 		_nodeCreated = false;
971 		return B_OK;
972 	}
973 
974 	// get the mount structure
975 	rw_lock_read_lock(&sMountLock);
976 	vnode->mount = find_mount(mountID);
977 	if (!vnode->mount || vnode->mount->unmounting) {
978 		rw_lock_read_unlock(&sMountLock);
979 		rw_lock_write_unlock(&sVnodeLock);
980 		free(vnode);
981 		return B_ENTRY_NOT_FOUND;
982 	}
983 
984 	// add the vnode to the mount's node list and the hash table
985 	sVnodeTable->Insert(vnode);
986 	add_vnode_to_mount_list(vnode, vnode->mount);
987 
988 	rw_lock_read_unlock(&sMountLock);
989 
990 	_vnode = vnode;
991 	_nodeCreated = true;
992 
993 	// keep the vnode lock locked
994 	return B_OK;
995 }
996 
997 
998 /*!	Frees the vnode and all resources it has acquired, and removes
999 	it from the vnode hash as well as from its mount structure.
1000 	Will also make sure that any cache modifications are written back.
1001 */
1002 static void
1003 free_vnode(struct vnode* vnode, bool reenter)
1004 {
1005 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1006 		vnode);
1007 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1008 
1009 	// write back any changes in this vnode's cache -- but only
1010 	// if the vnode won't be deleted, in which case the changes
1011 	// will be discarded
1012 
1013 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1014 		FS_CALL_NO_PARAMS(vnode, fsync);
1015 
1016 	// Note: If this vnode has a cache attached, there will still be two
1017 	// references to that cache at this point. The last one belongs to the vnode
1018 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1019 	// cache. Each but the last reference to a cache also includes a reference
1020 	// to the vnode. The file cache, however, released its reference (cf.
1021 	// file_cache_create()), so that this vnode's ref count has the chance to
1022 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1023 	// cache reference to be released, which will also release a (no longer
1024 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1025 	// count, so that it will neither become negative nor 0.
1026 	vnode->ref_count = 2;
1027 
1028 	if (!vnode->IsUnpublished()) {
1029 		if (vnode->IsRemoved())
1030 			FS_CALL(vnode, remove_vnode, reenter);
1031 		else
1032 			FS_CALL(vnode, put_vnode, reenter);
1033 	}
1034 
1035 	// If the vnode has a VMCache attached, make sure that it won't try to get
1036 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1037 	// long as the vnode is busy and in the hash, that won't happen, but as
1038 	// soon as we've removed it from the hash, it could reload the vnode -- with
1039 	// a new cache attached!
1040 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1041 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1042 
1043 	// The file system has removed the resources of the vnode now, so we can
1044 	// make it available again (by removing the busy vnode from the hash).
1045 	rw_lock_write_lock(&sVnodeLock);
1046 	sVnodeTable->Remove(vnode);
1047 	rw_lock_write_unlock(&sVnodeLock);
1048 
1049 	// if we have a VMCache attached, remove it
1050 	if (vnode->cache)
1051 		vnode->cache->ReleaseRef();
1052 
1053 	vnode->cache = NULL;
1054 
1055 	remove_vnode_from_mount_list(vnode, vnode->mount);
1056 
1057 	free(vnode);
1058 }
1059 
1060 
1061 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1062 	if the counter dropped to 0.
1063 
1064 	The caller must, of course, own a reference to the vnode to call this
1065 	function.
1066 	The caller must not hold the sVnodeLock or the sMountLock.
1067 
1068 	\param vnode the vnode.
1069 	\param alwaysFree don't move this vnode into the unused list, but really
1070 		   delete it if possible.
1071 	\param reenter \c true, if this function is called (indirectly) from within
1072 		   a file system. This will be passed to file system hooks only.
1073 	\return \c B_OK, if everything went fine, an error code otherwise.
1074 */
1075 static status_t
1076 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1077 {
1078 	ReadLocker locker(sVnodeLock);
1079 	AutoLocker<Vnode> nodeLocker(vnode);
1080 
1081 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1082 
1083 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1084 
1085 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1086 		vnode->ref_count));
1087 
1088 	if (oldRefCount != 1)
1089 		return B_OK;
1090 
1091 	if (vnode->IsBusy())
1092 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1093 
1094 	bool freeNode = false;
1095 	bool freeUnusedNodes = false;
1096 
1097 	// Just insert the vnode into an unused list if we don't need
1098 	// to delete it
1099 	if (vnode->IsRemoved() || alwaysFree) {
1100 		vnode_to_be_freed(vnode);
1101 		vnode->SetBusy(true);
1102 		freeNode = true;
1103 	} else
1104 		freeUnusedNodes = vnode_unused(vnode);
1105 
1106 	nodeLocker.Unlock();
1107 	locker.Unlock();
1108 
1109 	if (freeNode)
1110 		free_vnode(vnode, reenter);
1111 	else if (freeUnusedNodes)
1112 		free_unused_vnodes();
1113 
1114 	return B_OK;
1115 }
1116 
1117 
1118 /*!	\brief Increments the reference counter of the given vnode.
1119 
1120 	The caller must make sure that the node isn't deleted while this function
1121 	is called. This can be done either:
1122 	- by ensuring that a reference to the node exists and remains in existence,
1123 	  or
1124 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1125 	  or by holding sVnodeLock write locked.
1126 
1127 	In the second case the caller is responsible for dealing with the ref count
1128 	0 -> 1 transition. That is 1. this function must not be invoked when the
1129 	node is busy in the first place and 2. vnode_used() must be called for the
1130 	node.
1131 
1132 	\param vnode the vnode.
1133 */
1134 static void
1135 inc_vnode_ref_count(struct vnode* vnode)
1136 {
1137 	atomic_add(&vnode->ref_count, 1);
1138 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1139 		vnode->ref_count));
1140 }
1141 
1142 
1143 static bool
1144 is_special_node_type(int type)
1145 {
1146 	// at the moment only FIFOs are supported
1147 	return S_ISFIFO(type);
1148 }
1149 
1150 
1151 static status_t
1152 create_special_sub_node(struct vnode* vnode, uint32 flags)
1153 {
1154 	if (S_ISFIFO(vnode->Type()))
1155 		return create_fifo_vnode(vnode->mount->volume, vnode);
1156 
1157 	return B_BAD_VALUE;
1158 }
1159 
1160 
1161 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1162 
1163 	If the node is not yet in memory, it will be loaded.
1164 
1165 	The caller must not hold the sVnodeLock or the sMountLock.
1166 
1167 	\param mountID the mount ID.
1168 	\param vnodeID the node ID.
1169 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1170 		   retrieved vnode structure shall be written.
1171 	\param reenter \c true, if this function is called (indirectly) from within
1172 		   a file system.
1173 	\return \c B_OK, if everything when fine, an error code otherwise.
1174 */
1175 static status_t
1176 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1177 	int reenter)
1178 {
1179 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1180 		mountID, vnodeID, _vnode));
1181 
1182 	rw_lock_read_lock(&sVnodeLock);
1183 
1184 	int32 tries = BUSY_VNODE_RETRIES;
1185 restart:
1186 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1187 	AutoLocker<Vnode> nodeLocker(vnode);
1188 
1189 	if (vnode && vnode->IsBusy()) {
1190 		nodeLocker.Unlock();
1191 		rw_lock_read_unlock(&sVnodeLock);
1192 		if (!canWait) {
1193 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1194 				mountID, vnodeID);
1195 			return B_BUSY;
1196 		}
1197 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1198 			return B_BUSY;
1199 
1200 		rw_lock_read_lock(&sVnodeLock);
1201 		goto restart;
1202 	}
1203 
1204 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1205 
1206 	status_t status;
1207 
1208 	if (vnode) {
1209 		if (vnode->ref_count == 0) {
1210 			// this vnode has been unused before
1211 			vnode_used(vnode);
1212 		}
1213 		inc_vnode_ref_count(vnode);
1214 
1215 		nodeLocker.Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	} else {
1218 		// we need to create a new vnode and read it in
1219 		rw_lock_read_unlock(&sVnodeLock);
1220 			// unlock -- create_new_vnode_and_lock() write-locks on success
1221 		bool nodeCreated;
1222 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1223 			nodeCreated);
1224 		if (status != B_OK)
1225 			return status;
1226 
1227 		if (!nodeCreated) {
1228 			rw_lock_read_lock(&sVnodeLock);
1229 			rw_lock_write_unlock(&sVnodeLock);
1230 			goto restart;
1231 		}
1232 
1233 		rw_lock_write_unlock(&sVnodeLock);
1234 
1235 		int type;
1236 		uint32 flags;
1237 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1238 			&flags, reenter);
1239 		if (status == B_OK && vnode->private_node == NULL)
1240 			status = B_BAD_VALUE;
1241 
1242 		bool gotNode = status == B_OK;
1243 		bool publishSpecialSubNode = false;
1244 		if (gotNode) {
1245 			vnode->SetType(type);
1246 			publishSpecialSubNode = is_special_node_type(type)
1247 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1248 		}
1249 
1250 		if (gotNode && publishSpecialSubNode)
1251 			status = create_special_sub_node(vnode, flags);
1252 
1253 		if (status != B_OK) {
1254 			if (gotNode)
1255 				FS_CALL(vnode, put_vnode, reenter);
1256 
1257 			rw_lock_write_lock(&sVnodeLock);
1258 			sVnodeTable->Remove(vnode);
1259 			remove_vnode_from_mount_list(vnode, vnode->mount);
1260 			rw_lock_write_unlock(&sVnodeLock);
1261 
1262 			free(vnode);
1263 			return status;
1264 		}
1265 
1266 		rw_lock_read_lock(&sVnodeLock);
1267 		vnode->Lock();
1268 
1269 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1270 		vnode->SetBusy(false);
1271 
1272 		vnode->Unlock();
1273 		rw_lock_read_unlock(&sVnodeLock);
1274 	}
1275 
1276 	TRACE(("get_vnode: returning %p\n", vnode));
1277 
1278 	*_vnode = vnode;
1279 	return B_OK;
1280 }
1281 
1282 
1283 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1284 	if the counter dropped to 0.
1285 
1286 	The caller must, of course, own a reference to the vnode to call this
1287 	function.
1288 	The caller must not hold the sVnodeLock or the sMountLock.
1289 
1290 	\param vnode the vnode.
1291 */
1292 static inline void
1293 put_vnode(struct vnode* vnode)
1294 {
1295 	dec_vnode_ref_count(vnode, false, false);
1296 }
1297 
1298 
1299 static void
1300 free_unused_vnodes(int32 level)
1301 {
1302 	unused_vnodes_check_started();
1303 
1304 	if (level == B_NO_LOW_RESOURCE) {
1305 		unused_vnodes_check_done();
1306 		return;
1307 	}
1308 
1309 	flush_hot_vnodes();
1310 
1311 	// determine how many nodes to free
1312 	uint32 count = 1;
1313 	{
1314 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1315 
1316 		switch (level) {
1317 			case B_LOW_RESOURCE_NOTE:
1318 				count = sUnusedVnodes / 100;
1319 				break;
1320 			case B_LOW_RESOURCE_WARNING:
1321 				count = sUnusedVnodes / 10;
1322 				break;
1323 			case B_LOW_RESOURCE_CRITICAL:
1324 				count = sUnusedVnodes;
1325 				break;
1326 		}
1327 
1328 		if (count > sUnusedVnodes)
1329 			count = sUnusedVnodes;
1330 	}
1331 
1332 	// Write back the modified pages of some unused vnodes and free them.
1333 
1334 	for (uint32 i = 0; i < count; i++) {
1335 		ReadLocker vnodesReadLocker(sVnodeLock);
1336 
1337 		// get the first node
1338 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1339 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1340 			&sUnusedVnodeList);
1341 		unusedVnodesLocker.Unlock();
1342 
1343 		if (vnode == NULL)
1344 			break;
1345 
1346 		// lock the node
1347 		AutoLocker<Vnode> nodeLocker(vnode);
1348 
1349 		// Check whether the node is still unused -- since we only append to the
1350 		// tail of the unused queue, the vnode should still be at its head.
1351 		// Alternatively we could check its ref count for 0 and its busy flag,
1352 		// but if the node is no longer at the head of the queue, it means it
1353 		// has been touched in the meantime, i.e. it is no longer the least
1354 		// recently used unused vnode and we rather don't free it.
1355 		unusedVnodesLocker.Lock();
1356 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1357 			continue;
1358 		unusedVnodesLocker.Unlock();
1359 
1360 		ASSERT(!vnode->IsBusy());
1361 
1362 		// grab a reference
1363 		inc_vnode_ref_count(vnode);
1364 		vnode_used(vnode);
1365 
1366 		// write back changes and free the node
1367 		nodeLocker.Unlock();
1368 		vnodesReadLocker.Unlock();
1369 
1370 		if (vnode->cache != NULL)
1371 			vnode->cache->WriteModified();
1372 
1373 		dec_vnode_ref_count(vnode, true, false);
1374 			// this should free the vnode when it's still unused
1375 	}
1376 
1377 	unused_vnodes_check_done();
1378 }
1379 
1380 
1381 /*!	Gets the vnode the given vnode is covering.
1382 
1383 	The caller must have \c sVnodeLock read-locked at least.
1384 
1385 	The function returns a reference to the retrieved vnode (if any), the caller
1386 	is responsible to free.
1387 
1388 	\param vnode The vnode whose covered node shall be returned.
1389 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1390 		vnode.
1391 */
1392 static inline Vnode*
1393 get_covered_vnode_locked(Vnode* vnode)
1394 {
1395 	if (Vnode* coveredNode = vnode->covers) {
1396 		while (coveredNode->covers != NULL)
1397 			coveredNode = coveredNode->covers;
1398 
1399 		inc_vnode_ref_count(coveredNode);
1400 		return coveredNode;
1401 	}
1402 
1403 	return NULL;
1404 }
1405 
1406 
1407 /*!	Gets the vnode the given vnode is covering.
1408 
1409 	The caller must not hold \c sVnodeLock. Note that this implies a race
1410 	condition, since the situation can change at any time.
1411 
1412 	The function returns a reference to the retrieved vnode (if any), the caller
1413 	is responsible to free.
1414 
1415 	\param vnode The vnode whose covered node shall be returned.
1416 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1417 		vnode.
1418 */
1419 static inline Vnode*
1420 get_covered_vnode(Vnode* vnode)
1421 {
1422 	if (!vnode->IsCovering())
1423 		return NULL;
1424 
1425 	ReadLocker vnodeReadLocker(sVnodeLock);
1426 	return get_covered_vnode_locked(vnode);
1427 }
1428 
1429 
1430 /*!	Gets the vnode the given vnode is covered by.
1431 
1432 	The caller must have \c sVnodeLock read-locked at least.
1433 
1434 	The function returns a reference to the retrieved vnode (if any), the caller
1435 	is responsible to free.
1436 
1437 	\param vnode The vnode whose covering node shall be returned.
1438 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1439 		any vnode.
1440 */
1441 static Vnode*
1442 get_covering_vnode_locked(Vnode* vnode)
1443 {
1444 	if (Vnode* coveringNode = vnode->covered_by) {
1445 		while (coveringNode->covered_by != NULL)
1446 			coveringNode = coveringNode->covered_by;
1447 
1448 		inc_vnode_ref_count(coveringNode);
1449 		return coveringNode;
1450 	}
1451 
1452 	return NULL;
1453 }
1454 
1455 
1456 /*!	Gets the vnode the given vnode is covered by.
1457 
1458 	The caller must not hold \c sVnodeLock. Note that this implies a race
1459 	condition, since the situation can change at any time.
1460 
1461 	The function returns a reference to the retrieved vnode (if any), the caller
1462 	is responsible to free.
1463 
1464 	\param vnode The vnode whose covering node shall be returned.
1465 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1466 		any vnode.
1467 */
1468 static inline Vnode*
1469 get_covering_vnode(Vnode* vnode)
1470 {
1471 	if (!vnode->IsCovered())
1472 		return NULL;
1473 
1474 	ReadLocker vnodeReadLocker(sVnodeLock);
1475 	return get_covering_vnode_locked(vnode);
1476 }
1477 
1478 
1479 static void
1480 free_unused_vnodes()
1481 {
1482 	free_unused_vnodes(
1483 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1484 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1485 }
1486 
1487 
1488 static void
1489 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1490 {
1491 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1492 
1493 	free_unused_vnodes(level);
1494 }
1495 
1496 
1497 static inline void
1498 put_advisory_locking(struct advisory_locking* locking)
1499 {
1500 	release_sem(locking->lock);
1501 }
1502 
1503 
1504 /*!	Returns the advisory_locking object of the \a vnode in case it
1505 	has one, and locks it.
1506 	You have to call put_advisory_locking() when you're done with
1507 	it.
1508 	Note, you must not have the vnode mutex locked when calling
1509 	this function.
1510 */
1511 static struct advisory_locking*
1512 get_advisory_locking(struct vnode* vnode)
1513 {
1514 	rw_lock_read_lock(&sVnodeLock);
1515 	vnode->Lock();
1516 
1517 	struct advisory_locking* locking = vnode->advisory_locking;
1518 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1519 
1520 	vnode->Unlock();
1521 	rw_lock_read_unlock(&sVnodeLock);
1522 
1523 	if (lock >= 0)
1524 		lock = acquire_sem(lock);
1525 	if (lock < 0) {
1526 		// This means the locking has been deleted in the mean time
1527 		// or had never existed in the first place - otherwise, we
1528 		// would get the lock at some point.
1529 		return NULL;
1530 	}
1531 
1532 	return locking;
1533 }
1534 
1535 
1536 /*!	Creates a locked advisory_locking object, and attaches it to the
1537 	given \a vnode.
1538 	Returns B_OK in case of success - also if the vnode got such an
1539 	object from someone else in the mean time, you'll still get this
1540 	one locked then.
1541 */
1542 static status_t
1543 create_advisory_locking(struct vnode* vnode)
1544 {
1545 	if (vnode == NULL)
1546 		return B_FILE_ERROR;
1547 
1548 	ObjectDeleter<advisory_locking> lockingDeleter;
1549 	struct advisory_locking* locking = NULL;
1550 
1551 	while (get_advisory_locking(vnode) == NULL) {
1552 		// no locking object set on the vnode yet, create one
1553 		if (locking == NULL) {
1554 			locking = new(std::nothrow) advisory_locking;
1555 			if (locking == NULL)
1556 				return B_NO_MEMORY;
1557 			lockingDeleter.SetTo(locking);
1558 
1559 			locking->wait_sem = create_sem(0, "advisory lock");
1560 			if (locking->wait_sem < 0)
1561 				return locking->wait_sem;
1562 
1563 			locking->lock = create_sem(0, "advisory locking");
1564 			if (locking->lock < 0)
1565 				return locking->lock;
1566 		}
1567 
1568 		// set our newly created locking object
1569 		ReadLocker _(sVnodeLock);
1570 		AutoLocker<Vnode> nodeLocker(vnode);
1571 		if (vnode->advisory_locking == NULL) {
1572 			vnode->advisory_locking = locking;
1573 			lockingDeleter.Detach();
1574 			return B_OK;
1575 		}
1576 	}
1577 
1578 	// The vnode already had a locking object. That's just as well.
1579 
1580 	return B_OK;
1581 }
1582 
1583 
1584 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1585 	with the advisory_lock \a lock.
1586 */
1587 static bool
1588 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1589 {
1590 	if (flock == NULL)
1591 		return true;
1592 
1593 	return lock->start <= flock->l_start - 1 + flock->l_len
1594 		&& lock->end >= flock->l_start;
1595 }
1596 
1597 
1598 /*!	Tests whether acquiring a lock would block.
1599 */
1600 static status_t
1601 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1602 {
1603 	flock->l_type = F_UNLCK;
1604 
1605 	struct advisory_locking* locking = get_advisory_locking(vnode);
1606 	if (locking == NULL)
1607 		return B_OK;
1608 
1609 	team_id team = team_get_current_team_id();
1610 
1611 	LockList::Iterator iterator = locking->locks.GetIterator();
1612 	while (iterator.HasNext()) {
1613 		struct advisory_lock* lock = iterator.Next();
1614 
1615 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1616 			// locks do overlap
1617 			if (flock->l_type != F_RDLCK || !lock->shared) {
1618 				// collision
1619 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1620 				flock->l_whence = SEEK_SET;
1621 				flock->l_start = lock->start;
1622 				flock->l_len = lock->end - lock->start + 1;
1623 				flock->l_pid = lock->team;
1624 				break;
1625 			}
1626 		}
1627 	}
1628 
1629 	put_advisory_locking(locking);
1630 	return B_OK;
1631 }
1632 
1633 
1634 /*!	Removes the specified lock, or all locks of the calling team
1635 	if \a flock is NULL.
1636 */
1637 static status_t
1638 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1639 	struct file_descriptor* descriptor, struct flock* flock)
1640 {
1641 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1642 
1643 	struct advisory_locking* locking = get_advisory_locking(vnode);
1644 	if (locking == NULL)
1645 		return B_OK;
1646 
1647 	// find matching lock entries
1648 
1649 	LockList::Iterator iterator = locking->locks.GetIterator();
1650 	while (iterator.HasNext()) {
1651 		struct advisory_lock* lock = iterator.Next();
1652 		bool removeLock = false;
1653 
1654 		if (descriptor != NULL && lock->bound_to == descriptor) {
1655 			// Remove flock() locks
1656 			removeLock = true;
1657 		} else if (lock->bound_to == context
1658 				&& advisory_lock_intersects(lock, flock)) {
1659 			// Remove POSIX locks
1660 			bool endsBeyond = false;
1661 			bool startsBefore = false;
1662 			if (flock != NULL) {
1663 				startsBefore = lock->start < flock->l_start;
1664 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1665 			}
1666 
1667 			if (!startsBefore && !endsBeyond) {
1668 				// lock is completely contained in flock
1669 				removeLock = true;
1670 			} else if (startsBefore && !endsBeyond) {
1671 				// cut the end of the lock
1672 				lock->end = flock->l_start - 1;
1673 			} else if (!startsBefore && endsBeyond) {
1674 				// cut the start of the lock
1675 				lock->start = flock->l_start + flock->l_len;
1676 			} else {
1677 				// divide the lock into two locks
1678 				struct advisory_lock* secondLock = new advisory_lock;
1679 				if (secondLock == NULL) {
1680 					// TODO: we should probably revert the locks we already
1681 					// changed... (ie. allocate upfront)
1682 					put_advisory_locking(locking);
1683 					return B_NO_MEMORY;
1684 				}
1685 
1686 				lock->end = flock->l_start - 1;
1687 
1688 				secondLock->bound_to = context;
1689 				secondLock->team = lock->team;
1690 				secondLock->session = lock->session;
1691 				// values must already be normalized when getting here
1692 				secondLock->start = flock->l_start + flock->l_len;
1693 				secondLock->end = lock->end;
1694 				secondLock->shared = lock->shared;
1695 
1696 				locking->locks.Add(secondLock);
1697 			}
1698 		}
1699 
1700 		if (removeLock) {
1701 			// this lock is no longer used
1702 			iterator.Remove();
1703 			free(lock);
1704 		}
1705 	}
1706 
1707 	bool removeLocking = locking->locks.IsEmpty();
1708 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1709 
1710 	put_advisory_locking(locking);
1711 
1712 	if (removeLocking) {
1713 		// We can remove the whole advisory locking structure; it's no
1714 		// longer used
1715 		locking = get_advisory_locking(vnode);
1716 		if (locking != NULL) {
1717 			ReadLocker locker(sVnodeLock);
1718 			AutoLocker<Vnode> nodeLocker(vnode);
1719 
1720 			// the locking could have been changed in the mean time
1721 			if (locking->locks.IsEmpty()) {
1722 				vnode->advisory_locking = NULL;
1723 				nodeLocker.Unlock();
1724 				locker.Unlock();
1725 
1726 				// we've detached the locking from the vnode, so we can
1727 				// safely delete it
1728 				delete locking;
1729 			} else {
1730 				// the locking is in use again
1731 				nodeLocker.Unlock();
1732 				locker.Unlock();
1733 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1734 			}
1735 		}
1736 	}
1737 
1738 	return B_OK;
1739 }
1740 
1741 
1742 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1743 	will wait for the lock to become available, if there are any collisions
1744 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1745 
1746 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1747 	BSD flock() semantics are used, that is, all children can unlock the file
1748 	in question (we even allow parents to remove the lock, though, but that
1749 	seems to be in line to what the BSD's are doing).
1750 */
1751 static status_t
1752 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1753 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1754 {
1755 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1756 		vnode, flock, wait ? "yes" : "no"));
1757 
1758 	bool shared = flock->l_type == F_RDLCK;
1759 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1760 	status_t status = B_OK;
1761 
1762 	// TODO: do deadlock detection!
1763 
1764 	struct advisory_locking* locking;
1765 
1766 	while (true) {
1767 		// if this vnode has an advisory_locking structure attached,
1768 		// lock that one and search for any colliding file lock
1769 		status = create_advisory_locking(vnode);
1770 		if (status != B_OK)
1771 			return status;
1772 
1773 		locking = vnode->advisory_locking;
1774 		team_id team = team_get_current_team_id();
1775 		sem_id waitForLock = -1;
1776 
1777 		// test for collisions
1778 		LockList::Iterator iterator = locking->locks.GetIterator();
1779 		while (iterator.HasNext()) {
1780 			struct advisory_lock* lock = iterator.Next();
1781 
1782 			// TODO: locks from the same team might be joinable!
1783 			if ((lock->team != team || lock->bound_to != boundTo)
1784 					&& advisory_lock_intersects(lock, flock)) {
1785 				// locks do overlap
1786 				if (!shared || !lock->shared) {
1787 					// we need to wait
1788 					waitForLock = locking->wait_sem;
1789 					break;
1790 				}
1791 			}
1792 		}
1793 
1794 		if (waitForLock < 0)
1795 			break;
1796 
1797 		// We need to wait. Do that or fail now, if we've been asked not to.
1798 
1799 		if (!wait) {
1800 			put_advisory_locking(locking);
1801 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1802 		}
1803 
1804 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1805 			B_CAN_INTERRUPT, 0);
1806 		if (status != B_OK && status != B_BAD_SEM_ID)
1807 			return status;
1808 
1809 		// We have been notified, but we need to re-lock the locking object. So
1810 		// go another round...
1811 	}
1812 
1813 	// install new lock
1814 
1815 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1816 		sizeof(struct advisory_lock));
1817 	if (lock == NULL) {
1818 		put_advisory_locking(locking);
1819 		return B_NO_MEMORY;
1820 	}
1821 
1822 	lock->bound_to = boundTo;
1823 	lock->team = team_get_current_team_id();
1824 	lock->session = thread_get_current_thread()->team->session_id;
1825 	// values must already be normalized when getting here
1826 	lock->start = flock->l_start;
1827 	lock->end = flock->l_start - 1 + flock->l_len;
1828 	lock->shared = shared;
1829 
1830 	locking->locks.Add(lock);
1831 	put_advisory_locking(locking);
1832 
1833 	return status;
1834 }
1835 
1836 
1837 /*!	Normalizes the \a flock structure to make it easier to compare the
1838 	structure with others. The l_start and l_len fields are set to absolute
1839 	values according to the l_whence field.
1840 */
1841 static status_t
1842 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1843 {
1844 	switch (flock->l_whence) {
1845 		case SEEK_SET:
1846 			break;
1847 		case SEEK_CUR:
1848 			flock->l_start += descriptor->pos;
1849 			break;
1850 		case SEEK_END:
1851 		{
1852 			struct vnode* vnode = descriptor->u.vnode;
1853 			struct stat stat;
1854 			status_t status;
1855 
1856 			if (!HAS_FS_CALL(vnode, read_stat))
1857 				return B_UNSUPPORTED;
1858 
1859 			status = FS_CALL(vnode, read_stat, &stat);
1860 			if (status != B_OK)
1861 				return status;
1862 
1863 			flock->l_start += stat.st_size;
1864 			break;
1865 		}
1866 		default:
1867 			return B_BAD_VALUE;
1868 	}
1869 
1870 	if (flock->l_start < 0)
1871 		flock->l_start = 0;
1872 	if (flock->l_len == 0)
1873 		flock->l_len = OFF_MAX;
1874 
1875 	// don't let the offset and length overflow
1876 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1877 		flock->l_len = OFF_MAX - flock->l_start;
1878 
1879 	if (flock->l_len < 0) {
1880 		// a negative length reverses the region
1881 		flock->l_start += flock->l_len;
1882 		flock->l_len = -flock->l_len;
1883 	}
1884 
1885 	return B_OK;
1886 }
1887 
1888 
1889 static void
1890 replace_vnode_if_disconnected(struct fs_mount* mount,
1891 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1892 	struct vnode* fallBack, bool lockRootLock)
1893 {
1894 	struct vnode* givenVnode = vnode;
1895 	bool vnodeReplaced = false;
1896 
1897 	ReadLocker vnodeReadLocker(sVnodeLock);
1898 
1899 	if (lockRootLock)
1900 		mutex_lock(&sIOContextRootLock);
1901 
1902 	while (vnode != NULL && vnode->mount == mount
1903 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1904 		if (vnode->covers != NULL) {
1905 			// redirect the vnode to the covered vnode
1906 			vnode = vnode->covers;
1907 		} else
1908 			vnode = fallBack;
1909 
1910 		vnodeReplaced = true;
1911 	}
1912 
1913 	// If we've replaced the node, grab a reference for the new one.
1914 	if (vnodeReplaced && vnode != NULL)
1915 		inc_vnode_ref_count(vnode);
1916 
1917 	if (lockRootLock)
1918 		mutex_unlock(&sIOContextRootLock);
1919 
1920 	vnodeReadLocker.Unlock();
1921 
1922 	if (vnodeReplaced)
1923 		put_vnode(givenVnode);
1924 }
1925 
1926 
1927 /*!	Disconnects all file descriptors that are associated with the
1928 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1929 	\a mount object.
1930 
1931 	Note, after you've called this function, there might still be ongoing
1932 	accesses - they won't be interrupted if they already happened before.
1933 	However, any subsequent access will fail.
1934 
1935 	This is not a cheap function and should be used with care and rarely.
1936 	TODO: there is currently no means to stop a blocking read/write!
1937 */
1938 static void
1939 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1940 	struct vnode* vnodeToDisconnect)
1941 {
1942 	// iterate over all teams and peek into their file descriptors
1943 	TeamListIterator teamIterator;
1944 	while (Team* team = teamIterator.Next()) {
1945 		BReference<Team> teamReference(team, true);
1946 		TeamLocker teamLocker(team);
1947 
1948 		// lock the I/O context
1949 		io_context* context = team->io_context;
1950 		if (context == NULL)
1951 			continue;
1952 		MutexLocker contextLocker(context->io_mutex);
1953 
1954 		teamLocker.Unlock();
1955 
1956 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1957 			sRoot, true);
1958 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1959 			sRoot, false);
1960 
1961 		for (uint32 i = 0; i < context->table_size; i++) {
1962 			struct file_descriptor* descriptor = context->fds[i];
1963 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1964 				continue;
1965 
1966 			inc_fd_ref_count(descriptor);
1967 
1968 			// if this descriptor points at this mount, we
1969 			// need to disconnect it to be able to unmount
1970 			struct vnode* vnode = fd_vnode(descriptor);
1971 			if (vnodeToDisconnect != NULL) {
1972 				if (vnode == vnodeToDisconnect)
1973 					disconnect_fd(descriptor);
1974 			} else if ((vnode != NULL && vnode->mount == mount)
1975 				|| (vnode == NULL && descriptor->u.mount == mount))
1976 				disconnect_fd(descriptor);
1977 
1978 			put_fd(descriptor);
1979 		}
1980 	}
1981 }
1982 
1983 
1984 /*!	\brief Gets the root node of the current IO context.
1985 	If \a kernel is \c true, the kernel IO context will be used.
1986 	The caller obtains a reference to the returned node.
1987 */
1988 struct vnode*
1989 get_root_vnode(bool kernel)
1990 {
1991 	if (!kernel) {
1992 		// Get current working directory from io context
1993 		struct io_context* context = get_current_io_context(kernel);
1994 
1995 		mutex_lock(&sIOContextRootLock);
1996 
1997 		struct vnode* root = context->root;
1998 		if (root != NULL)
1999 			inc_vnode_ref_count(root);
2000 
2001 		mutex_unlock(&sIOContextRootLock);
2002 
2003 		if (root != NULL)
2004 			return root;
2005 
2006 		// That should never happen.
2007 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2008 			"have a root\n", team_get_current_team_id());
2009 	}
2010 
2011 	inc_vnode_ref_count(sRoot);
2012 	return sRoot;
2013 }
2014 
2015 
2016 /*!	\brief Gets the directory path and leaf name for a given path.
2017 
2018 	The supplied \a path is transformed to refer to the directory part of
2019 	the entry identified by the original path, and into the buffer \a filename
2020 	the leaf name of the original entry is written.
2021 	Neither the returned path nor the leaf name can be expected to be
2022 	canonical.
2023 
2024 	\param path The path to be analyzed. Must be able to store at least one
2025 		   additional character.
2026 	\param filename The buffer into which the leaf name will be written.
2027 		   Must be of size B_FILE_NAME_LENGTH at least.
2028 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2029 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2030 		   if the given path name is empty.
2031 */
2032 static status_t
2033 get_dir_path_and_leaf(char* path, char* filename)
2034 {
2035 	if (*path == '\0')
2036 		return B_ENTRY_NOT_FOUND;
2037 
2038 	char* last = strrchr(path, '/');
2039 		// '/' are not allowed in file names!
2040 
2041 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2042 
2043 	if (last == NULL) {
2044 		// this path is single segment with no '/' in it
2045 		// ex. "foo"
2046 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2047 			return B_NAME_TOO_LONG;
2048 
2049 		strcpy(path, ".");
2050 	} else {
2051 		last++;
2052 		if (last[0] == '\0') {
2053 			// special case: the path ends in one or more '/' - remove them
2054 			while (*--last == '/' && last != path);
2055 			last[1] = '\0';
2056 
2057 			if (last == path && last[0] == '/') {
2058 				// This path points to the root of the file system
2059 				strcpy(filename, ".");
2060 				return B_OK;
2061 			}
2062 			for (; last != path && *(last - 1) != '/'; last--);
2063 				// rewind to the start of the leaf before the '/'
2064 		}
2065 
2066 		// normal leaf: replace the leaf portion of the path with a '.'
2067 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2068 			return B_NAME_TOO_LONG;
2069 
2070 		last[0] = '.';
2071 		last[1] = '\0';
2072 	}
2073 	return B_OK;
2074 }
2075 
2076 
2077 static status_t
2078 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2079 	bool traverse, bool kernel, struct vnode** _vnode)
2080 {
2081 	char clonedName[B_FILE_NAME_LENGTH + 1];
2082 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2083 		return B_NAME_TOO_LONG;
2084 
2085 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2086 	struct vnode* directory;
2087 
2088 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2089 	if (status < 0)
2090 		return status;
2091 
2092 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2093 		_vnode, NULL);
2094 }
2095 
2096 
2097 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2098 	and returns the respective vnode.
2099 	On success a reference to the vnode is acquired for the caller.
2100 */
2101 static status_t
2102 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2103 {
2104 	ino_t id;
2105 	bool missing;
2106 
2107 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2108 		return missing ? B_ENTRY_NOT_FOUND
2109 			: get_vnode(dir->device, id, _vnode, true, false);
2110 	}
2111 
2112 	status_t status = FS_CALL(dir, lookup, name, &id);
2113 	if (status != B_OK)
2114 		return status;
2115 
2116 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2117 	// have a reference and just need to look the node up.
2118 	rw_lock_read_lock(&sVnodeLock);
2119 	*_vnode = lookup_vnode(dir->device, id);
2120 	rw_lock_read_unlock(&sVnodeLock);
2121 
2122 	if (*_vnode == NULL) {
2123 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2124 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2125 		return B_ENTRY_NOT_FOUND;
2126 	}
2127 
2128 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2129 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2130 //		(*_vnode)->mount->id, (*_vnode)->id);
2131 
2132 	return B_OK;
2133 }
2134 
2135 
2136 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2137 	\a path must not be NULL.
2138 	If it returns successfully, \a path contains the name of the last path
2139 	component. This function clobbers the buffer pointed to by \a path only
2140 	if it does contain more than one component.
2141 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2142 	it is successful or not!
2143 */
2144 static status_t
2145 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2146 	int count, struct io_context* ioContext, struct vnode** _vnode,
2147 	ino_t* _parentID)
2148 {
2149 	status_t status = B_OK;
2150 	ino_t lastParentID = vnode->id;
2151 
2152 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2153 
2154 	if (path == NULL) {
2155 		put_vnode(vnode);
2156 		return B_BAD_VALUE;
2157 	}
2158 
2159 	if (*path == '\0') {
2160 		put_vnode(vnode);
2161 		return B_ENTRY_NOT_FOUND;
2162 	}
2163 
2164 	while (true) {
2165 		struct vnode* nextVnode;
2166 		char* nextPath;
2167 
2168 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2169 			path));
2170 
2171 		// done?
2172 		if (path[0] == '\0')
2173 			break;
2174 
2175 		// walk to find the next path component ("path" will point to a single
2176 		// path component), and filter out multiple slashes
2177 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2178 				nextPath++);
2179 
2180 		if (*nextPath == '/') {
2181 			*nextPath = '\0';
2182 			do
2183 				nextPath++;
2184 			while (*nextPath == '/');
2185 		}
2186 
2187 		// See if the '..' is at a covering vnode move to the covered
2188 		// vnode so we pass the '..' path to the underlying filesystem.
2189 		// Also prevent breaking the root of the IO context.
2190 		if (strcmp("..", path) == 0) {
2191 			if (vnode == ioContext->root) {
2192 				// Attempted prison break! Keep it contained.
2193 				path = nextPath;
2194 				continue;
2195 			}
2196 
2197 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2198 				nextVnode = coveredVnode;
2199 				put_vnode(vnode);
2200 				vnode = nextVnode;
2201 			}
2202 		}
2203 
2204 		// check if vnode is really a directory
2205 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2206 			status = B_NOT_A_DIRECTORY;
2207 
2208 		// Check if we have the right to search the current directory vnode.
2209 		// If a file system doesn't have the access() function, we assume that
2210 		// searching a directory is always allowed
2211 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2212 			status = FS_CALL(vnode, access, X_OK);
2213 
2214 		// Tell the filesystem to get the vnode of this path component (if we
2215 		// got the permission from the call above)
2216 		if (status == B_OK)
2217 			status = lookup_dir_entry(vnode, path, &nextVnode);
2218 
2219 		if (status != B_OK) {
2220 			put_vnode(vnode);
2221 			return status;
2222 		}
2223 
2224 		// If the new node is a symbolic link, resolve it (if we've been told
2225 		// to do it)
2226 		if (S_ISLNK(nextVnode->Type())
2227 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2228 			size_t bufferSize;
2229 			char* buffer;
2230 
2231 			TRACE(("traverse link\n"));
2232 
2233 			// it's not exactly nice style using goto in this way, but hey,
2234 			// it works :-/
2235 			if (count + 1 > B_MAX_SYMLINKS) {
2236 				status = B_LINK_LIMIT;
2237 				goto resolve_link_error;
2238 			}
2239 
2240 			bufferSize = B_PATH_NAME_LENGTH;
2241 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2242 			if (buffer == NULL) {
2243 				status = B_NO_MEMORY;
2244 				goto resolve_link_error;
2245 			}
2246 
2247 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2248 				bufferSize--;
2249 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2250 				// null-terminate
2251 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2252 					buffer[bufferSize] = '\0';
2253 			} else
2254 				status = B_BAD_VALUE;
2255 
2256 			if (status != B_OK) {
2257 				free(buffer);
2258 
2259 		resolve_link_error:
2260 				put_vnode(vnode);
2261 				put_vnode(nextVnode);
2262 
2263 				return status;
2264 			}
2265 			put_vnode(nextVnode);
2266 
2267 			// Check if we start from the root directory or the current
2268 			// directory ("vnode" still points to that one).
2269 			// Cut off all leading slashes if it's the root directory
2270 			path = buffer;
2271 			bool absoluteSymlink = false;
2272 			if (path[0] == '/') {
2273 				// we don't need the old directory anymore
2274 				put_vnode(vnode);
2275 
2276 				while (*++path == '/')
2277 					;
2278 
2279 				mutex_lock(&sIOContextRootLock);
2280 				vnode = ioContext->root;
2281 				inc_vnode_ref_count(vnode);
2282 				mutex_unlock(&sIOContextRootLock);
2283 
2284 				absoluteSymlink = true;
2285 			}
2286 
2287 			inc_vnode_ref_count(vnode);
2288 				// balance the next recursion - we will decrement the
2289 				// ref_count of the vnode, no matter if we succeeded or not
2290 
2291 			if (absoluteSymlink && *path == '\0') {
2292 				// symlink was just "/"
2293 				nextVnode = vnode;
2294 			} else {
2295 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2296 					ioContext, &nextVnode, &lastParentID);
2297 			}
2298 
2299 			object_cache_free(sPathNameCache, buffer, 0);
2300 
2301 			if (status != B_OK) {
2302 				put_vnode(vnode);
2303 				return status;
2304 			}
2305 		} else
2306 			lastParentID = vnode->id;
2307 
2308 		// decrease the ref count on the old dir we just looked up into
2309 		put_vnode(vnode);
2310 
2311 		path = nextPath;
2312 		vnode = nextVnode;
2313 
2314 		// see if we hit a covered node
2315 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2316 			put_vnode(vnode);
2317 			vnode = coveringNode;
2318 		}
2319 	}
2320 
2321 	*_vnode = vnode;
2322 	if (_parentID)
2323 		*_parentID = lastParentID;
2324 
2325 	return B_OK;
2326 }
2327 
2328 
2329 static status_t
2330 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2331 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2332 {
2333 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2334 		get_current_io_context(kernel), _vnode, _parentID);
2335 }
2336 
2337 
2338 static status_t
2339 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2340 	ino_t* _parentID, bool kernel)
2341 {
2342 	struct vnode* start = NULL;
2343 
2344 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2345 
2346 	if (!path)
2347 		return B_BAD_VALUE;
2348 
2349 	if (*path == '\0')
2350 		return B_ENTRY_NOT_FOUND;
2351 
2352 	// figure out if we need to start at root or at cwd
2353 	if (*path == '/') {
2354 		if (sRoot == NULL) {
2355 			// we're a bit early, aren't we?
2356 			return B_ERROR;
2357 		}
2358 
2359 		while (*++path == '/')
2360 			;
2361 		start = get_root_vnode(kernel);
2362 
2363 		if (*path == '\0') {
2364 			*_vnode = start;
2365 			return B_OK;
2366 		}
2367 
2368 	} else {
2369 		struct io_context* context = get_current_io_context(kernel);
2370 
2371 		mutex_lock(&context->io_mutex);
2372 		start = context->cwd;
2373 		if (start != NULL)
2374 			inc_vnode_ref_count(start);
2375 		mutex_unlock(&context->io_mutex);
2376 
2377 		if (start == NULL)
2378 			return B_ERROR;
2379 	}
2380 
2381 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2382 		_parentID);
2383 }
2384 
2385 
2386 /*! Returns the vnode in the next to last segment of the path, and returns
2387 	the last portion in filename.
2388 	The path buffer must be able to store at least one additional character.
2389 */
2390 static status_t
2391 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2392 	bool kernel)
2393 {
2394 	status_t status = get_dir_path_and_leaf(path, filename);
2395 	if (status != B_OK)
2396 		return status;
2397 
2398 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2399 }
2400 
2401 
2402 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2403 		   to by a FD + path pair.
2404 
2405 	\a path must be given in either case. \a fd might be omitted, in which
2406 	case \a path is either an absolute path or one relative to the current
2407 	directory. If both a supplied and \a path is relative it is reckoned off
2408 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2409 	ignored.
2410 
2411 	The caller has the responsibility to call put_vnode() on the returned
2412 	directory vnode.
2413 
2414 	\param fd The FD. May be < 0.
2415 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2416 	       is modified by this function. It must have at least room for a
2417 	       string one character longer than the path it contains.
2418 	\param _vnode A pointer to a variable the directory vnode shall be written
2419 		   into.
2420 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2421 		   the leaf name of the specified entry will be written.
2422 	\param kernel \c true, if invoked from inside the kernel, \c false if
2423 		   invoked from userland.
2424 	\return \c B_OK, if everything went fine, another error code otherwise.
2425 */
2426 static status_t
2427 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2428 	char* filename, bool kernel)
2429 {
2430 	if (!path)
2431 		return B_BAD_VALUE;
2432 	if (*path == '\0')
2433 		return B_ENTRY_NOT_FOUND;
2434 	if (fd < 0)
2435 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2436 
2437 	status_t status = get_dir_path_and_leaf(path, filename);
2438 	if (status != B_OK)
2439 		return status;
2440 
2441 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2442 }
2443 
2444 
2445 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2446 		   to by a vnode + path pair.
2447 
2448 	\a path must be given in either case. \a vnode might be omitted, in which
2449 	case \a path is either an absolute path or one relative to the current
2450 	directory. If both a supplied and \a path is relative it is reckoned off
2451 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2452 	ignored.
2453 
2454 	The caller has the responsibility to call put_vnode() on the returned
2455 	directory vnode.
2456 
2457 	\param vnode The vnode. May be \c NULL.
2458 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2459 	       is modified by this function. It must have at least room for a
2460 	       string one character longer than the path it contains.
2461 	\param _vnode A pointer to a variable the directory vnode shall be written
2462 		   into.
2463 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2464 		   the leaf name of the specified entry will be written.
2465 	\param kernel \c true, if invoked from inside the kernel, \c false if
2466 		   invoked from userland.
2467 	\return \c B_OK, if everything went fine, another error code otherwise.
2468 */
2469 static status_t
2470 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2471 	struct vnode** _vnode, char* filename, bool kernel)
2472 {
2473 	if (!path)
2474 		return B_BAD_VALUE;
2475 	if (*path == '\0')
2476 		return B_ENTRY_NOT_FOUND;
2477 	if (vnode == NULL || path[0] == '/')
2478 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2479 
2480 	status_t status = get_dir_path_and_leaf(path, filename);
2481 	if (status != B_OK)
2482 		return status;
2483 
2484 	inc_vnode_ref_count(vnode);
2485 		// vnode_path_to_vnode() always decrements the ref count
2486 
2487 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2488 }
2489 
2490 
2491 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2492 */
2493 static status_t
2494 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2495 	size_t bufferSize, struct io_context* ioContext)
2496 {
2497 	if (bufferSize < sizeof(struct dirent))
2498 		return B_BAD_VALUE;
2499 
2500 	// See if the vnode is covering another vnode and move to the covered
2501 	// vnode so we get the underlying file system
2502 	VNodePutter vnodePutter;
2503 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2504 		vnode = coveredVnode;
2505 		vnodePutter.SetTo(vnode);
2506 	}
2507 
2508 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2509 		// The FS supports getting the name of a vnode.
2510 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2511 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2512 			return B_OK;
2513 	}
2514 
2515 	// The FS doesn't support getting the name of a vnode. So we search the
2516 	// parent directory for the vnode, if the caller let us.
2517 
2518 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2519 		return B_UNSUPPORTED;
2520 
2521 	void* cookie;
2522 
2523 	status_t status = FS_CALL(parent, open_dir, &cookie);
2524 	if (status >= B_OK) {
2525 		while (true) {
2526 			uint32 num = 1;
2527 			// We use the FS hook directly instead of dir_read(), since we don't
2528 			// want the entries to be fixed. We have already resolved vnode to
2529 			// the covered node.
2530 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2531 				&num);
2532 			if (status != B_OK)
2533 				break;
2534 			if (num == 0) {
2535 				status = B_ENTRY_NOT_FOUND;
2536 				break;
2537 			}
2538 
2539 			if (vnode->id == buffer->d_ino) {
2540 				// found correct entry!
2541 				break;
2542 			}
2543 		}
2544 
2545 		FS_CALL(parent, close_dir, cookie);
2546 		FS_CALL(parent, free_dir_cookie, cookie);
2547 	}
2548 	return status;
2549 }
2550 
2551 
2552 static status_t
2553 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2554 	size_t nameSize, bool kernel)
2555 {
2556 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2557 	struct dirent* dirent = (struct dirent*)buffer;
2558 
2559 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2560 		get_current_io_context(kernel));
2561 	if (status != B_OK)
2562 		return status;
2563 
2564 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2565 		return B_BUFFER_OVERFLOW;
2566 
2567 	return B_OK;
2568 }
2569 
2570 
2571 /*!	Gets the full path to a given directory vnode.
2572 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2573 	file system doesn't support this call, it will fall back to iterating
2574 	through the parent directory to get the name of the child.
2575 
2576 	To protect against circular loops, it supports a maximum tree depth
2577 	of 256 levels.
2578 
2579 	Note that the path may not be correct the time this function returns!
2580 	It doesn't use any locking to prevent returning the correct path, as
2581 	paths aren't safe anyway: the path to a file can change at any time.
2582 
2583 	It might be a good idea, though, to check if the returned path exists
2584 	in the calling function (it's not done here because of efficiency)
2585 */
2586 static status_t
2587 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2588 	bool kernel)
2589 {
2590 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2591 
2592 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2593 		return B_BAD_VALUE;
2594 
2595 	if (!S_ISDIR(vnode->Type()))
2596 		return B_NOT_A_DIRECTORY;
2597 
2598 	char* path = buffer;
2599 	int32 insert = bufferSize;
2600 	int32 maxLevel = 256;
2601 	int32 length;
2602 	status_t status = B_OK;
2603 	struct io_context* ioContext = get_current_io_context(kernel);
2604 
2605 	// we don't use get_vnode() here because this call is more
2606 	// efficient and does all we need from get_vnode()
2607 	inc_vnode_ref_count(vnode);
2608 
2609 	path[--insert] = '\0';
2610 		// the path is filled right to left
2611 
2612 	while (true) {
2613 		// If the node is the context's root, bail out. Otherwise resolve mount
2614 		// points.
2615 		if (vnode == ioContext->root)
2616 			break;
2617 
2618 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2619 			put_vnode(vnode);
2620 			vnode = coveredVnode;
2621 		}
2622 
2623 		// lookup the parent vnode
2624 		struct vnode* parentVnode;
2625 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2626 		if (status != B_OK)
2627 			goto out;
2628 
2629 		if (parentVnode == vnode) {
2630 			// The caller apparently got their hands on a node outside of their
2631 			// context's root. Now we've hit the global root.
2632 			put_vnode(parentVnode);
2633 			break;
2634 		}
2635 
2636 		// get the node's name
2637 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2638 			// also used for fs_read_dir()
2639 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2640 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2641 			sizeof(nameBuffer), ioContext);
2642 
2643 		// release the current vnode, we only need its parent from now on
2644 		put_vnode(vnode);
2645 		vnode = parentVnode;
2646 
2647 		if (status != B_OK)
2648 			goto out;
2649 
2650 		// TODO: add an explicit check for loops in about 10 levels to do
2651 		// real loop detection
2652 
2653 		// don't go deeper as 'maxLevel' to prevent circular loops
2654 		if (maxLevel-- < 0) {
2655 			status = B_LINK_LIMIT;
2656 			goto out;
2657 		}
2658 
2659 		// add the name in front of the current path
2660 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2661 		length = strlen(name);
2662 		insert -= length;
2663 		if (insert <= 0) {
2664 			status = B_RESULT_NOT_REPRESENTABLE;
2665 			goto out;
2666 		}
2667 		memcpy(path + insert, name, length);
2668 		path[--insert] = '/';
2669 	}
2670 
2671 	// the root dir will result in an empty path: fix it
2672 	if (path[insert] == '\0')
2673 		path[--insert] = '/';
2674 
2675 	TRACE(("  path is: %s\n", path + insert));
2676 
2677 	// move the path to the start of the buffer
2678 	length = bufferSize - insert;
2679 	memmove(buffer, path + insert, length);
2680 
2681 out:
2682 	put_vnode(vnode);
2683 	return status;
2684 }
2685 
2686 
2687 /*!	Checks the length of every path component, and adds a '.'
2688 	if the path ends in a slash.
2689 	The given path buffer must be able to store at least one
2690 	additional character.
2691 */
2692 static status_t
2693 check_path(char* to)
2694 {
2695 	int32 length = 0;
2696 
2697 	// check length of every path component
2698 
2699 	while (*to) {
2700 		char* begin;
2701 		if (*to == '/')
2702 			to++, length++;
2703 
2704 		begin = to;
2705 		while (*to != '/' && *to)
2706 			to++, length++;
2707 
2708 		if (to - begin > B_FILE_NAME_LENGTH)
2709 			return B_NAME_TOO_LONG;
2710 	}
2711 
2712 	if (length == 0)
2713 		return B_ENTRY_NOT_FOUND;
2714 
2715 	// complete path if there is a slash at the end
2716 
2717 	if (*(to - 1) == '/') {
2718 		if (length > B_PATH_NAME_LENGTH - 2)
2719 			return B_NAME_TOO_LONG;
2720 
2721 		to[0] = '.';
2722 		to[1] = '\0';
2723 	}
2724 
2725 	return B_OK;
2726 }
2727 
2728 
2729 static struct file_descriptor*
2730 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2731 {
2732 	struct file_descriptor* descriptor
2733 		= get_fd(get_current_io_context(kernel), fd);
2734 	if (descriptor == NULL)
2735 		return NULL;
2736 
2737 	struct vnode* vnode = fd_vnode(descriptor);
2738 	if (vnode == NULL) {
2739 		put_fd(descriptor);
2740 		return NULL;
2741 	}
2742 
2743 	// ToDo: when we can close a file descriptor at any point, investigate
2744 	//	if this is still valid to do (accessing the vnode without ref_count
2745 	//	or locking)
2746 	*_vnode = vnode;
2747 	return descriptor;
2748 }
2749 
2750 
2751 static struct vnode*
2752 get_vnode_from_fd(int fd, bool kernel)
2753 {
2754 	struct file_descriptor* descriptor;
2755 	struct vnode* vnode;
2756 
2757 	descriptor = get_fd(get_current_io_context(kernel), fd);
2758 	if (descriptor == NULL)
2759 		return NULL;
2760 
2761 	vnode = fd_vnode(descriptor);
2762 	if (vnode != NULL)
2763 		inc_vnode_ref_count(vnode);
2764 
2765 	put_fd(descriptor);
2766 	return vnode;
2767 }
2768 
2769 
2770 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2771 	only the path will be considered. In this case, the \a path must not be
2772 	NULL.
2773 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2774 	and should be NULL for files.
2775 */
2776 static status_t
2777 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2778 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2779 {
2780 	if (fd < 0 && !path)
2781 		return B_BAD_VALUE;
2782 
2783 	if (path != NULL && *path == '\0')
2784 		return B_ENTRY_NOT_FOUND;
2785 
2786 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2787 		// no FD or absolute path
2788 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2789 	}
2790 
2791 	// FD only, or FD + relative path
2792 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2793 	if (vnode == NULL)
2794 		return B_FILE_ERROR;
2795 
2796 	if (path != NULL) {
2797 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2798 			_vnode, _parentID);
2799 	}
2800 
2801 	// there is no relative path to take into account
2802 
2803 	*_vnode = vnode;
2804 	if (_parentID)
2805 		*_parentID = -1;
2806 
2807 	return B_OK;
2808 }
2809 
2810 
2811 static int
2812 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2813 	void* cookie, int openMode, bool kernel)
2814 {
2815 	struct file_descriptor* descriptor;
2816 	int fd;
2817 
2818 	// If the vnode is locked, we don't allow creating a new file/directory
2819 	// file_descriptor for it
2820 	if (vnode && vnode->mandatory_locked_by != NULL
2821 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2822 		return B_BUSY;
2823 
2824 	descriptor = alloc_fd();
2825 	if (!descriptor)
2826 		return B_NO_MEMORY;
2827 
2828 	if (vnode)
2829 		descriptor->u.vnode = vnode;
2830 	else
2831 		descriptor->u.mount = mount;
2832 	descriptor->cookie = cookie;
2833 
2834 	switch (type) {
2835 		// vnode types
2836 		case FDTYPE_FILE:
2837 			descriptor->ops = &sFileOps;
2838 			break;
2839 		case FDTYPE_DIR:
2840 			descriptor->ops = &sDirectoryOps;
2841 			break;
2842 		case FDTYPE_ATTR:
2843 			descriptor->ops = &sAttributeOps;
2844 			break;
2845 		case FDTYPE_ATTR_DIR:
2846 			descriptor->ops = &sAttributeDirectoryOps;
2847 			break;
2848 
2849 		// mount types
2850 		case FDTYPE_INDEX_DIR:
2851 			descriptor->ops = &sIndexDirectoryOps;
2852 			break;
2853 		case FDTYPE_QUERY:
2854 			descriptor->ops = &sQueryOps;
2855 			break;
2856 
2857 		default:
2858 			panic("get_new_fd() called with unknown type %d\n", type);
2859 			break;
2860 	}
2861 	descriptor->type = type;
2862 	descriptor->open_mode = openMode;
2863 
2864 	io_context* context = get_current_io_context(kernel);
2865 	fd = new_fd(context, descriptor);
2866 	if (fd < 0) {
2867 		descriptor->ops = NULL;
2868 		put_fd(descriptor);
2869 		return B_NO_MORE_FDS;
2870 	}
2871 
2872 	mutex_lock(&context->io_mutex);
2873 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2874 	mutex_unlock(&context->io_mutex);
2875 
2876 	return fd;
2877 }
2878 
2879 
2880 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2881 	vfs_normalize_path(). See there for more documentation.
2882 */
2883 static status_t
2884 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2885 {
2886 	VNodePutter dirPutter;
2887 	struct vnode* dir = NULL;
2888 	status_t error;
2889 
2890 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2891 		// get dir vnode + leaf name
2892 		struct vnode* nextDir;
2893 		char leaf[B_FILE_NAME_LENGTH];
2894 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2895 		if (error != B_OK)
2896 			return error;
2897 
2898 		dir = nextDir;
2899 		strcpy(path, leaf);
2900 		dirPutter.SetTo(dir);
2901 
2902 		// get file vnode, if we shall resolve links
2903 		bool fileExists = false;
2904 		struct vnode* fileVnode;
2905 		VNodePutter fileVnodePutter;
2906 		if (traverseLink) {
2907 			inc_vnode_ref_count(dir);
2908 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2909 					NULL) == B_OK) {
2910 				fileVnodePutter.SetTo(fileVnode);
2911 				fileExists = true;
2912 			}
2913 		}
2914 
2915 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2916 			// we're done -- construct the path
2917 			bool hasLeaf = true;
2918 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2919 				// special cases "." and ".." -- get the dir, forget the leaf
2920 				inc_vnode_ref_count(dir);
2921 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2922 					&nextDir, NULL);
2923 				if (error != B_OK)
2924 					return error;
2925 				dir = nextDir;
2926 				dirPutter.SetTo(dir);
2927 				hasLeaf = false;
2928 			}
2929 
2930 			// get the directory path
2931 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2932 			if (error != B_OK)
2933 				return error;
2934 
2935 			// append the leaf name
2936 			if (hasLeaf) {
2937 				// insert a directory separator if this is not the file system
2938 				// root
2939 				if ((strcmp(path, "/") != 0
2940 					&& strlcat(path, "/", pathSize) >= pathSize)
2941 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2942 					return B_NAME_TOO_LONG;
2943 				}
2944 			}
2945 
2946 			return B_OK;
2947 		}
2948 
2949 		// read link
2950 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2951 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2952 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2953 			if (error != B_OK)
2954 				return error;
2955 			if (bufferSize < B_PATH_NAME_LENGTH)
2956 				path[bufferSize] = '\0';
2957 		} else
2958 			return B_BAD_VALUE;
2959 	}
2960 
2961 	return B_LINK_LIMIT;
2962 }
2963 
2964 
2965 static status_t
2966 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2967 	struct io_context* ioContext)
2968 {
2969 	// Make sure the IO context root is not bypassed.
2970 	if (parent == ioContext->root) {
2971 		*_device = parent->device;
2972 		*_node = parent->id;
2973 		return B_OK;
2974 	}
2975 
2976 	inc_vnode_ref_count(parent);
2977 		// vnode_path_to_vnode() puts the node
2978 
2979 	// ".." is guaranteed not to be clobbered by this call
2980 	struct vnode* vnode;
2981 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2982 		ioContext, &vnode, NULL);
2983 	if (status == B_OK) {
2984 		*_device = vnode->device;
2985 		*_node = vnode->id;
2986 		put_vnode(vnode);
2987 	}
2988 
2989 	return status;
2990 }
2991 
2992 
2993 #ifdef ADD_DEBUGGER_COMMANDS
2994 
2995 
2996 static void
2997 _dump_advisory_locking(advisory_locking* locking)
2998 {
2999 	if (locking == NULL)
3000 		return;
3001 
3002 	kprintf("   lock:        %" B_PRId32, locking->lock);
3003 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
3004 
3005 	int32 index = 0;
3006 	LockList::Iterator iterator = locking->locks.GetIterator();
3007 	while (iterator.HasNext()) {
3008 		struct advisory_lock* lock = iterator.Next();
3009 
3010 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3011 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3012 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3013 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3014 	}
3015 }
3016 
3017 
3018 static void
3019 _dump_mount(struct fs_mount* mount)
3020 {
3021 	kprintf("MOUNT: %p\n", mount);
3022 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3023 	kprintf(" device_name:   %s\n", mount->device_name);
3024 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3025 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3026 	kprintf(" partition:     %p\n", mount->partition);
3027 	kprintf(" lock:          %p\n", &mount->lock);
3028 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3029 		mount->owns_file_device ? " owns_file_device" : "");
3030 
3031 	fs_volume* volume = mount->volume;
3032 	while (volume != NULL) {
3033 		kprintf(" volume %p:\n", volume);
3034 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3035 		kprintf("  private_volume:   %p\n", volume->private_volume);
3036 		kprintf("  ops:              %p\n", volume->ops);
3037 		kprintf("  file_system:      %p\n", volume->file_system);
3038 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3039 		volume = volume->super_volume;
3040 	}
3041 
3042 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3043 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3044 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3045 	set_debug_variable("_partition", (addr_t)mount->partition);
3046 }
3047 
3048 
3049 static bool
3050 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3051 	const char* name)
3052 {
3053 	bool insertSlash = buffer[bufferSize] != '\0';
3054 	size_t nameLength = strlen(name);
3055 
3056 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3057 		return false;
3058 
3059 	if (insertSlash)
3060 		buffer[--bufferSize] = '/';
3061 
3062 	bufferSize -= nameLength;
3063 	memcpy(buffer + bufferSize, name, nameLength);
3064 
3065 	return true;
3066 }
3067 
3068 
3069 static bool
3070 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3071 	ino_t nodeID)
3072 {
3073 	if (bufferSize == 0)
3074 		return false;
3075 
3076 	bool insertSlash = buffer[bufferSize] != '\0';
3077 	if (insertSlash)
3078 		buffer[--bufferSize] = '/';
3079 
3080 	size_t size = snprintf(buffer, bufferSize,
3081 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3082 	if (size > bufferSize) {
3083 		if (insertSlash)
3084 			bufferSize++;
3085 		return false;
3086 	}
3087 
3088 	if (size < bufferSize)
3089 		memmove(buffer + bufferSize - size, buffer, size);
3090 
3091 	bufferSize -= size;
3092 	return true;
3093 }
3094 
3095 
3096 static char*
3097 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3098 	bool& _truncated)
3099 {
3100 	// null-terminate the path
3101 	buffer[--bufferSize] = '\0';
3102 
3103 	while (true) {
3104 		while (vnode->covers != NULL)
3105 			vnode = vnode->covers;
3106 
3107 		if (vnode == sRoot) {
3108 			_truncated = bufferSize == 0;
3109 			if (!_truncated)
3110 				buffer[--bufferSize] = '/';
3111 			return buffer + bufferSize;
3112 		}
3113 
3114 		// resolve the name
3115 		ino_t dirID;
3116 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3117 			vnode->id, dirID);
3118 		if (name == NULL) {
3119 			// Failed to resolve the name -- prepend "<dev,node>/".
3120 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3121 				vnode->mount->id, vnode->id);
3122 			return buffer + bufferSize;
3123 		}
3124 
3125 		// prepend the name
3126 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3127 			_truncated = true;
3128 			return buffer + bufferSize;
3129 		}
3130 
3131 		// resolve the directory node
3132 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3133 		if (nextVnode == NULL) {
3134 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3135 				vnode->mount->id, dirID);
3136 			return buffer + bufferSize;
3137 		}
3138 
3139 		vnode = nextVnode;
3140 	}
3141 }
3142 
3143 
3144 static void
3145 _dump_vnode(struct vnode* vnode, bool printPath)
3146 {
3147 	kprintf("VNODE: %p\n", vnode);
3148 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3149 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3150 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3151 	kprintf(" private_node:  %p\n", vnode->private_node);
3152 	kprintf(" mount:         %p\n", vnode->mount);
3153 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3154 	kprintf(" covers:        %p\n", vnode->covers);
3155 	kprintf(" cache:         %p\n", vnode->cache);
3156 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3157 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3158 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3159 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3160 
3161 	_dump_advisory_locking(vnode->advisory_locking);
3162 
3163 	if (printPath) {
3164 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3165 		if (buffer != NULL) {
3166 			bool truncated;
3167 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3168 				B_PATH_NAME_LENGTH, truncated);
3169 			if (path != NULL) {
3170 				kprintf(" path:          ");
3171 				if (truncated)
3172 					kputs("<truncated>/");
3173 				kputs(path);
3174 				kputs("\n");
3175 			} else
3176 				kprintf("Failed to resolve vnode path.\n");
3177 
3178 			debug_free(buffer);
3179 		} else
3180 			kprintf("Failed to allocate memory for constructing the path.\n");
3181 	}
3182 
3183 	set_debug_variable("_node", (addr_t)vnode->private_node);
3184 	set_debug_variable("_mount", (addr_t)vnode->mount);
3185 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3186 	set_debug_variable("_covers", (addr_t)vnode->covers);
3187 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3188 }
3189 
3190 
3191 static int
3192 dump_mount(int argc, char** argv)
3193 {
3194 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3195 		kprintf("usage: %s [id|address]\n", argv[0]);
3196 		return 0;
3197 	}
3198 
3199 	ulong val = parse_expression(argv[1]);
3200 	uint32 id = val;
3201 
3202 	struct fs_mount* mount = sMountsTable->Lookup(id);
3203 	if (mount == NULL) {
3204 		if (IS_USER_ADDRESS(id)) {
3205 			kprintf("fs_mount not found\n");
3206 			return 0;
3207 		}
3208 		mount = (fs_mount*)val;
3209 	}
3210 
3211 	_dump_mount(mount);
3212 	return 0;
3213 }
3214 
3215 
3216 static int
3217 dump_mounts(int argc, char** argv)
3218 {
3219 	if (argc != 1) {
3220 		kprintf("usage: %s\n", argv[0]);
3221 		return 0;
3222 	}
3223 
3224 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3225 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3226 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3227 
3228 	struct fs_mount* mount;
3229 
3230 	MountTable::Iterator iterator(sMountsTable);
3231 	while (iterator.HasNext()) {
3232 		mount = iterator.Next();
3233 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3234 			mount->root_vnode->covers, mount->volume->private_volume,
3235 			mount->volume->file_system_name);
3236 
3237 		fs_volume* volume = mount->volume;
3238 		while (volume->super_volume != NULL) {
3239 			volume = volume->super_volume;
3240 			kprintf("                                     %p %s\n",
3241 				volume->private_volume, volume->file_system_name);
3242 		}
3243 	}
3244 
3245 	return 0;
3246 }
3247 
3248 
3249 static int
3250 dump_vnode(int argc, char** argv)
3251 {
3252 	bool printPath = false;
3253 	int argi = 1;
3254 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3255 		printPath = true;
3256 		argi++;
3257 	}
3258 
3259 	if (argi >= argc || argi + 2 < argc) {
3260 		print_debugger_command_usage(argv[0]);
3261 		return 0;
3262 	}
3263 
3264 	struct vnode* vnode = NULL;
3265 
3266 	if (argi + 1 == argc) {
3267 		vnode = (struct vnode*)parse_expression(argv[argi]);
3268 		if (IS_USER_ADDRESS(vnode)) {
3269 			kprintf("invalid vnode address\n");
3270 			return 0;
3271 		}
3272 		_dump_vnode(vnode, printPath);
3273 		return 0;
3274 	}
3275 
3276 	dev_t device = parse_expression(argv[argi]);
3277 	ino_t id = parse_expression(argv[argi + 1]);
3278 
3279 	VnodeTable::Iterator iterator(sVnodeTable);
3280 	while (iterator.HasNext()) {
3281 		vnode = iterator.Next();
3282 		if (vnode->id != id || vnode->device != device)
3283 			continue;
3284 
3285 		_dump_vnode(vnode, printPath);
3286 	}
3287 
3288 	return 0;
3289 }
3290 
3291 
3292 static int
3293 dump_vnodes(int argc, char** argv)
3294 {
3295 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3296 		kprintf("usage: %s [device]\n", argv[0]);
3297 		return 0;
3298 	}
3299 
3300 	// restrict dumped nodes to a certain device if requested
3301 	dev_t device = parse_expression(argv[1]);
3302 
3303 	struct vnode* vnode;
3304 
3305 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3306 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3307 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3308 
3309 	VnodeTable::Iterator iterator(sVnodeTable);
3310 	while (iterator.HasNext()) {
3311 		vnode = iterator.Next();
3312 		if (vnode->device != device)
3313 			continue;
3314 
3315 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3316 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3317 			vnode->private_node, vnode->advisory_locking,
3318 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3319 			vnode->IsUnpublished() ? "u" : "-");
3320 	}
3321 
3322 	return 0;
3323 }
3324 
3325 
3326 static int
3327 dump_vnode_caches(int argc, char** argv)
3328 {
3329 	struct vnode* vnode;
3330 
3331 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3332 		kprintf("usage: %s [device]\n", argv[0]);
3333 		return 0;
3334 	}
3335 
3336 	// restrict dumped nodes to a certain device if requested
3337 	dev_t device = -1;
3338 	if (argc > 1)
3339 		device = parse_expression(argv[1]);
3340 
3341 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3342 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3343 
3344 	VnodeTable::Iterator iterator(sVnodeTable);
3345 	while (iterator.HasNext()) {
3346 		vnode = iterator.Next();
3347 		if (vnode->cache == NULL)
3348 			continue;
3349 		if (device != -1 && vnode->device != device)
3350 			continue;
3351 
3352 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3353 			vnode, vnode->device, vnode->id, vnode->cache,
3354 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3355 			vnode->cache->page_count);
3356 	}
3357 
3358 	return 0;
3359 }
3360 
3361 
3362 int
3363 dump_io_context(int argc, char** argv)
3364 {
3365 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3366 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3367 		return 0;
3368 	}
3369 
3370 	struct io_context* context = NULL;
3371 
3372 	if (argc > 1) {
3373 		ulong num = parse_expression(argv[1]);
3374 		if (IS_KERNEL_ADDRESS(num))
3375 			context = (struct io_context*)num;
3376 		else {
3377 			Team* team = team_get_team_struct_locked(num);
3378 			if (team == NULL) {
3379 				kprintf("could not find team with ID %lu\n", num);
3380 				return 0;
3381 			}
3382 			context = (struct io_context*)team->io_context;
3383 		}
3384 	} else
3385 		context = get_current_io_context(true);
3386 
3387 	kprintf("I/O CONTEXT: %p\n", context);
3388 	kprintf(" root vnode:\t%p\n", context->root);
3389 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3390 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3391 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3392 
3393 	if (context->num_used_fds) {
3394 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3395 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3396 	}
3397 
3398 	for (uint32 i = 0; i < context->table_size; i++) {
3399 		struct file_descriptor* fd = context->fds[i];
3400 		if (fd == NULL)
3401 			continue;
3402 
3403 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3404 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3405 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3406 			fd->pos, fd->cookie,
3407 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3408 				? "mount" : "vnode",
3409 			fd->u.vnode);
3410 	}
3411 
3412 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3413 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3414 
3415 	set_debug_variable("_cwd", (addr_t)context->cwd);
3416 
3417 	return 0;
3418 }
3419 
3420 
3421 int
3422 dump_vnode_usage(int argc, char** argv)
3423 {
3424 	if (argc != 1) {
3425 		kprintf("usage: %s\n", argv[0]);
3426 		return 0;
3427 	}
3428 
3429 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3430 		sUnusedVnodes, kMaxUnusedVnodes);
3431 
3432 	uint32 count = sVnodeTable->CountElements();
3433 
3434 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3435 		count - sUnusedVnodes);
3436 	return 0;
3437 }
3438 
3439 #endif	// ADD_DEBUGGER_COMMANDS
3440 
3441 
3442 /*!	Clears memory specified by an iovec array.
3443 */
3444 static void
3445 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3446 {
3447 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3448 		size_t length = std::min(vecs[i].iov_len, bytes);
3449 		memset(vecs[i].iov_base, 0, length);
3450 		bytes -= length;
3451 	}
3452 }
3453 
3454 
3455 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3456 	and calls the file system hooks to read/write the request to disk.
3457 */
3458 static status_t
3459 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3460 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3461 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3462 	bool doWrite)
3463 {
3464 	if (fileVecCount == 0) {
3465 		// There are no file vecs at this offset, so we're obviously trying
3466 		// to access the file outside of its bounds
3467 		return B_BAD_VALUE;
3468 	}
3469 
3470 	size_t numBytes = *_numBytes;
3471 	uint32 fileVecIndex;
3472 	size_t vecOffset = *_vecOffset;
3473 	uint32 vecIndex = *_vecIndex;
3474 	status_t status;
3475 	size_t size;
3476 
3477 	if (!doWrite && vecOffset == 0) {
3478 		// now directly read the data from the device
3479 		// the first file_io_vec can be read directly
3480 
3481 		if (fileVecs[0].length < (off_t)numBytes)
3482 			size = fileVecs[0].length;
3483 		else
3484 			size = numBytes;
3485 
3486 		if (fileVecs[0].offset >= 0) {
3487 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3488 				&vecs[vecIndex], vecCount - vecIndex, &size);
3489 		} else {
3490 			// sparse read
3491 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3492 			status = B_OK;
3493 		}
3494 		if (status != B_OK)
3495 			return status;
3496 
3497 		// TODO: this is a work-around for buggy device drivers!
3498 		//	When our own drivers honour the length, we can:
3499 		//	a) also use this direct I/O for writes (otherwise, it would
3500 		//	   overwrite precious data)
3501 		//	b) panic if the term below is true (at least for writes)
3502 		if ((off_t)size > fileVecs[0].length) {
3503 			//dprintf("warning: device driver %p doesn't respect total length "
3504 			//	"in read_pages() call!\n", ref->device);
3505 			size = fileVecs[0].length;
3506 		}
3507 
3508 		ASSERT((off_t)size <= fileVecs[0].length);
3509 
3510 		// If the file portion was contiguous, we're already done now
3511 		if (size == numBytes)
3512 			return B_OK;
3513 
3514 		// if we reached the end of the file, we can return as well
3515 		if ((off_t)size != fileVecs[0].length) {
3516 			*_numBytes = size;
3517 			return B_OK;
3518 		}
3519 
3520 		fileVecIndex = 1;
3521 
3522 		// first, find out where we have to continue in our iovecs
3523 		for (; vecIndex < vecCount; vecIndex++) {
3524 			if (size < vecs[vecIndex].iov_len)
3525 				break;
3526 
3527 			size -= vecs[vecIndex].iov_len;
3528 		}
3529 
3530 		vecOffset = size;
3531 	} else {
3532 		fileVecIndex = 0;
3533 		size = 0;
3534 	}
3535 
3536 	// Too bad, let's process the rest of the file_io_vecs
3537 
3538 	size_t totalSize = size;
3539 	size_t bytesLeft = numBytes - size;
3540 
3541 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3542 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3543 		off_t fileOffset = fileVec.offset;
3544 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3545 
3546 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3547 			fileLeft));
3548 
3549 		// process the complete fileVec
3550 		while (fileLeft > 0) {
3551 			iovec tempVecs[MAX_TEMP_IO_VECS];
3552 			uint32 tempCount = 0;
3553 
3554 			// size tracks how much of what is left of the current fileVec
3555 			// (fileLeft) has been assigned to tempVecs
3556 			size = 0;
3557 
3558 			// assign what is left of the current fileVec to the tempVecs
3559 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3560 					&& tempCount < MAX_TEMP_IO_VECS;) {
3561 				// try to satisfy one iovec per iteration (or as much as
3562 				// possible)
3563 
3564 				// bytes left of the current iovec
3565 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3566 				if (vecLeft == 0) {
3567 					vecOffset = 0;
3568 					vecIndex++;
3569 					continue;
3570 				}
3571 
3572 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3573 					vecIndex, vecOffset, size));
3574 
3575 				// actually available bytes
3576 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3577 
3578 				tempVecs[tempCount].iov_base
3579 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3580 				tempVecs[tempCount].iov_len = tempVecSize;
3581 				tempCount++;
3582 
3583 				size += tempVecSize;
3584 				vecOffset += tempVecSize;
3585 			}
3586 
3587 			size_t bytes = size;
3588 
3589 			if (fileOffset == -1) {
3590 				if (doWrite) {
3591 					panic("sparse write attempt: vnode %p", vnode);
3592 					status = B_IO_ERROR;
3593 				} else {
3594 					// sparse read
3595 					zero_iovecs(tempVecs, tempCount, bytes);
3596 					status = B_OK;
3597 				}
3598 			} else if (doWrite) {
3599 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3600 					tempVecs, tempCount, &bytes);
3601 			} else {
3602 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3603 					tempVecs, tempCount, &bytes);
3604 			}
3605 			if (status != B_OK)
3606 				return status;
3607 
3608 			totalSize += bytes;
3609 			bytesLeft -= size;
3610 			if (fileOffset >= 0)
3611 				fileOffset += size;
3612 			fileLeft -= size;
3613 			//dprintf("-> file left = %Lu\n", fileLeft);
3614 
3615 			if (size != bytes || vecIndex >= vecCount) {
3616 				// there are no more bytes or iovecs, let's bail out
3617 				*_numBytes = totalSize;
3618 				return B_OK;
3619 			}
3620 		}
3621 	}
3622 
3623 	*_vecIndex = vecIndex;
3624 	*_vecOffset = vecOffset;
3625 	*_numBytes = totalSize;
3626 	return B_OK;
3627 }
3628 
3629 
3630 static bool
3631 is_user_in_group(gid_t gid)
3632 {
3633 	if (gid == getegid())
3634 		return true;
3635 
3636 	gid_t groups[NGROUPS_MAX];
3637 	int groupCount = getgroups(NGROUPS_MAX, groups);
3638 	for (int i = 0; i < groupCount; i++) {
3639 		if (gid == groups[i])
3640 			return true;
3641 	}
3642 
3643 	return false;
3644 }
3645 
3646 
3647 static status_t
3648 free_io_context(io_context* context)
3649 {
3650 	uint32 i;
3651 
3652 	TIOC(FreeIOContext(context));
3653 
3654 	if (context->root)
3655 		put_vnode(context->root);
3656 
3657 	if (context->cwd)
3658 		put_vnode(context->cwd);
3659 
3660 	mutex_lock(&context->io_mutex);
3661 
3662 	for (i = 0; i < context->table_size; i++) {
3663 		if (struct file_descriptor* descriptor = context->fds[i]) {
3664 			close_fd(context, descriptor);
3665 			put_fd(descriptor);
3666 		}
3667 	}
3668 
3669 	mutex_destroy(&context->io_mutex);
3670 
3671 	remove_node_monitors(context);
3672 	free(context->fds);
3673 	free(context);
3674 
3675 	return B_OK;
3676 }
3677 
3678 
3679 static status_t
3680 resize_monitor_table(struct io_context* context, const int newSize)
3681 {
3682 	int	status = B_OK;
3683 
3684 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3685 		return B_BAD_VALUE;
3686 
3687 	mutex_lock(&context->io_mutex);
3688 
3689 	if ((size_t)newSize < context->num_monitors) {
3690 		status = B_BUSY;
3691 		goto out;
3692 	}
3693 	context->max_monitors = newSize;
3694 
3695 out:
3696 	mutex_unlock(&context->io_mutex);
3697 	return status;
3698 }
3699 
3700 
3701 //	#pragma mark - public API for file systems
3702 
3703 
3704 extern "C" status_t
3705 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3706 	fs_vnode_ops* ops)
3707 {
3708 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3709 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3710 
3711 	if (privateNode == NULL)
3712 		return B_BAD_VALUE;
3713 
3714 	int32 tries = BUSY_VNODE_RETRIES;
3715 restart:
3716 	// create the node
3717 	bool nodeCreated;
3718 	struct vnode* vnode;
3719 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3720 		nodeCreated);
3721 	if (status != B_OK)
3722 		return status;
3723 
3724 	WriteLocker nodeLocker(sVnodeLock, true);
3725 		// create_new_vnode_and_lock() has locked for us
3726 
3727 	if (!nodeCreated && vnode->IsBusy()) {
3728 		nodeLocker.Unlock();
3729 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3730 			return B_BUSY;
3731 		goto restart;
3732 	}
3733 
3734 	// file system integrity check:
3735 	// test if the vnode already exists and bail out if this is the case!
3736 	if (!nodeCreated) {
3737 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3738 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3739 			vnode->private_node);
3740 		return B_ERROR;
3741 	}
3742 
3743 	vnode->private_node = privateNode;
3744 	vnode->ops = ops;
3745 	vnode->SetUnpublished(true);
3746 
3747 	TRACE(("returns: %s\n", strerror(status)));
3748 
3749 	return status;
3750 }
3751 
3752 
3753 extern "C" status_t
3754 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3755 	fs_vnode_ops* ops, int type, uint32 flags)
3756 {
3757 	FUNCTION(("publish_vnode()\n"));
3758 
3759 	int32 tries = BUSY_VNODE_RETRIES;
3760 restart:
3761 	WriteLocker locker(sVnodeLock);
3762 
3763 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3764 
3765 	bool nodeCreated = false;
3766 	if (vnode == NULL) {
3767 		if (privateNode == NULL)
3768 			return B_BAD_VALUE;
3769 
3770 		// create the node
3771 		locker.Unlock();
3772 			// create_new_vnode_and_lock() will re-lock for us on success
3773 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3774 			nodeCreated);
3775 		if (status != B_OK)
3776 			return status;
3777 
3778 		locker.SetTo(sVnodeLock, true);
3779 	}
3780 
3781 	if (nodeCreated) {
3782 		vnode->private_node = privateNode;
3783 		vnode->ops = ops;
3784 		vnode->SetUnpublished(true);
3785 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3786 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3787 		// already known, but not published
3788 	} else if (vnode->IsBusy()) {
3789 		locker.Unlock();
3790 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3791 			return B_BUSY;
3792 		goto restart;
3793 	} else
3794 		return B_BAD_VALUE;
3795 
3796 	bool publishSpecialSubNode = false;
3797 
3798 	vnode->SetType(type);
3799 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3800 	publishSpecialSubNode = is_special_node_type(type)
3801 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3802 
3803 	status_t status = B_OK;
3804 
3805 	// create sub vnodes, if necessary
3806 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3807 		locker.Unlock();
3808 
3809 		fs_volume* subVolume = volume;
3810 		if (volume->sub_volume != NULL) {
3811 			while (status == B_OK && subVolume->sub_volume != NULL) {
3812 				subVolume = subVolume->sub_volume;
3813 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3814 					vnode);
3815 			}
3816 		}
3817 
3818 		if (status == B_OK && publishSpecialSubNode)
3819 			status = create_special_sub_node(vnode, flags);
3820 
3821 		if (status != B_OK) {
3822 			// error -- clean up the created sub vnodes
3823 			while (subVolume->super_volume != volume) {
3824 				subVolume = subVolume->super_volume;
3825 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3826 			}
3827 		}
3828 
3829 		if (status == B_OK) {
3830 			ReadLocker vnodesReadLocker(sVnodeLock);
3831 			AutoLocker<Vnode> nodeLocker(vnode);
3832 			vnode->SetBusy(false);
3833 			vnode->SetUnpublished(false);
3834 		} else {
3835 			locker.Lock();
3836 			sVnodeTable->Remove(vnode);
3837 			remove_vnode_from_mount_list(vnode, vnode->mount);
3838 			free(vnode);
3839 		}
3840 	} else {
3841 		// we still hold the write lock -- mark the node unbusy and published
3842 		vnode->SetBusy(false);
3843 		vnode->SetUnpublished(false);
3844 	}
3845 
3846 	TRACE(("returns: %s\n", strerror(status)));
3847 
3848 	return status;
3849 }
3850 
3851 
3852 extern "C" status_t
3853 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3854 {
3855 	struct vnode* vnode;
3856 
3857 	if (volume == NULL)
3858 		return B_BAD_VALUE;
3859 
3860 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3861 	if (status != B_OK)
3862 		return status;
3863 
3864 	// If this is a layered FS, we need to get the node cookie for the requested
3865 	// layer.
3866 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3867 		fs_vnode resolvedNode;
3868 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3869 			&resolvedNode);
3870 		if (status != B_OK) {
3871 			panic("get_vnode(): Failed to get super node for vnode %p, "
3872 				"volume: %p", vnode, volume);
3873 			put_vnode(vnode);
3874 			return status;
3875 		}
3876 
3877 		if (_privateNode != NULL)
3878 			*_privateNode = resolvedNode.private_node;
3879 	} else if (_privateNode != NULL)
3880 		*_privateNode = vnode->private_node;
3881 
3882 	return B_OK;
3883 }
3884 
3885 
3886 extern "C" status_t
3887 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3888 {
3889 	struct vnode* vnode;
3890 
3891 	rw_lock_read_lock(&sVnodeLock);
3892 	vnode = lookup_vnode(volume->id, vnodeID);
3893 	rw_lock_read_unlock(&sVnodeLock);
3894 
3895 	if (vnode == NULL)
3896 		return B_BAD_VALUE;
3897 
3898 	inc_vnode_ref_count(vnode);
3899 	return B_OK;
3900 }
3901 
3902 
3903 extern "C" status_t
3904 put_vnode(fs_volume* volume, ino_t vnodeID)
3905 {
3906 	struct vnode* vnode;
3907 
3908 	rw_lock_read_lock(&sVnodeLock);
3909 	vnode = lookup_vnode(volume->id, vnodeID);
3910 	rw_lock_read_unlock(&sVnodeLock);
3911 
3912 	if (vnode == NULL)
3913 		return B_BAD_VALUE;
3914 
3915 	dec_vnode_ref_count(vnode, false, true);
3916 	return B_OK;
3917 }
3918 
3919 
3920 extern "C" status_t
3921 remove_vnode(fs_volume* volume, ino_t vnodeID)
3922 {
3923 	ReadLocker locker(sVnodeLock);
3924 
3925 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3926 	if (vnode == NULL)
3927 		return B_ENTRY_NOT_FOUND;
3928 
3929 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3930 		// this vnode is in use
3931 		return B_BUSY;
3932 	}
3933 
3934 	vnode->Lock();
3935 
3936 	vnode->SetRemoved(true);
3937 	bool removeUnpublished = false;
3938 
3939 	if (vnode->IsUnpublished()) {
3940 		// prepare the vnode for deletion
3941 		removeUnpublished = true;
3942 		vnode->SetBusy(true);
3943 	}
3944 
3945 	vnode->Unlock();
3946 	locker.Unlock();
3947 
3948 	if (removeUnpublished) {
3949 		// If the vnode hasn't been published yet, we delete it here
3950 		atomic_add(&vnode->ref_count, -1);
3951 		free_vnode(vnode, true);
3952 	}
3953 
3954 	return B_OK;
3955 }
3956 
3957 
3958 extern "C" status_t
3959 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3960 {
3961 	struct vnode* vnode;
3962 
3963 	rw_lock_read_lock(&sVnodeLock);
3964 
3965 	vnode = lookup_vnode(volume->id, vnodeID);
3966 	if (vnode) {
3967 		AutoLocker<Vnode> nodeLocker(vnode);
3968 		vnode->SetRemoved(false);
3969 	}
3970 
3971 	rw_lock_read_unlock(&sVnodeLock);
3972 	return B_OK;
3973 }
3974 
3975 
3976 extern "C" status_t
3977 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3978 {
3979 	ReadLocker _(sVnodeLock);
3980 
3981 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3982 		if (_removed != NULL)
3983 			*_removed = vnode->IsRemoved();
3984 		return B_OK;
3985 	}
3986 
3987 	return B_BAD_VALUE;
3988 }
3989 
3990 
3991 extern "C" status_t
3992 mark_vnode_busy(fs_volume* volume, ino_t vnodeID, bool busy)
3993 {
3994 	ReadLocker locker(sVnodeLock);
3995 
3996 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3997 	if (vnode == NULL)
3998 		return B_ENTRY_NOT_FOUND;
3999 
4000 	// are we trying to mark an already busy node busy again?
4001 	if (busy && vnode->IsBusy())
4002 		return B_BUSY;
4003 
4004 	vnode->Lock();
4005 	vnode->SetBusy(busy);
4006 	vnode->Unlock();
4007 
4008 	return B_OK;
4009 }
4010 
4011 
4012 extern "C" status_t
4013 change_vnode_id(fs_volume* volume, ino_t vnodeID, ino_t newID)
4014 {
4015 	WriteLocker locker(sVnodeLock);
4016 
4017 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
4018 	if (vnode == NULL)
4019 		return B_ENTRY_NOT_FOUND;
4020 
4021 	sVnodeTable->Remove(vnode);
4022 	vnode->id = newID;
4023 	sVnodeTable->Insert(vnode);
4024 
4025 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
4026 		((VMVnodeCache*)vnode->cache)->SetVnodeID(newID);
4027 
4028 	return B_OK;
4029 }
4030 
4031 
4032 extern "C" fs_volume*
4033 volume_for_vnode(fs_vnode* _vnode)
4034 {
4035 	if (_vnode == NULL)
4036 		return NULL;
4037 
4038 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
4039 	return vnode->mount->volume;
4040 }
4041 
4042 
4043 extern "C" status_t
4044 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
4045 	uid_t nodeUserID)
4046 {
4047 	// get node permissions
4048 	int userPermissions = (mode & S_IRWXU) >> 6;
4049 	int groupPermissions = (mode & S_IRWXG) >> 3;
4050 	int otherPermissions = mode & S_IRWXO;
4051 
4052 	// get the node permissions for this uid/gid
4053 	int permissions = 0;
4054 	uid_t uid = geteuid();
4055 
4056 	if (uid == 0) {
4057 		// user is root
4058 		// root has always read/write permission, but at least one of the
4059 		// X bits must be set for execute permission
4060 		permissions = userPermissions | groupPermissions | otherPermissions
4061 			| S_IROTH | S_IWOTH;
4062 		if (S_ISDIR(mode))
4063 			permissions |= S_IXOTH;
4064 	} else if (uid == nodeUserID) {
4065 		// user is node owner
4066 		permissions = userPermissions;
4067 	} else if (is_user_in_group(nodeGroupID)) {
4068 		// user is in owning group
4069 		permissions = groupPermissions;
4070 	} else {
4071 		// user is one of the others
4072 		permissions = otherPermissions;
4073 	}
4074 
4075 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4076 }
4077 
4078 
4079 #if 0
4080 extern "C" status_t
4081 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4082 	size_t* _numBytes)
4083 {
4084 	struct file_descriptor* descriptor;
4085 	struct vnode* vnode;
4086 
4087 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4088 	if (descriptor == NULL)
4089 		return B_FILE_ERROR;
4090 
4091 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4092 		count, 0, _numBytes);
4093 
4094 	put_fd(descriptor);
4095 	return status;
4096 }
4097 
4098 
4099 extern "C" status_t
4100 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4101 	size_t* _numBytes)
4102 {
4103 	struct file_descriptor* descriptor;
4104 	struct vnode* vnode;
4105 
4106 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4107 	if (descriptor == NULL)
4108 		return B_FILE_ERROR;
4109 
4110 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4111 		count, 0, _numBytes);
4112 
4113 	put_fd(descriptor);
4114 	return status;
4115 }
4116 #endif
4117 
4118 
4119 extern "C" status_t
4120 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4121 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4122 	size_t* _bytes)
4123 {
4124 	struct file_descriptor* descriptor;
4125 	struct vnode* vnode;
4126 
4127 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4128 	if (descriptor == NULL)
4129 		return B_FILE_ERROR;
4130 
4131 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4132 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4133 		false);
4134 
4135 	put_fd(descriptor);
4136 	return status;
4137 }
4138 
4139 
4140 extern "C" status_t
4141 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4142 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4143 	size_t* _bytes)
4144 {
4145 	struct file_descriptor* descriptor;
4146 	struct vnode* vnode;
4147 
4148 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4149 	if (descriptor == NULL)
4150 		return B_FILE_ERROR;
4151 
4152 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4153 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4154 		true);
4155 
4156 	put_fd(descriptor);
4157 	return status;
4158 }
4159 
4160 
4161 extern "C" status_t
4162 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4163 {
4164 	// lookup mount -- the caller is required to make sure that the mount
4165 	// won't go away
4166 	ReadLocker locker(sMountLock);
4167 	struct fs_mount* mount = find_mount(mountID);
4168 	if (mount == NULL)
4169 		return B_BAD_VALUE;
4170 	locker.Unlock();
4171 
4172 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4173 }
4174 
4175 
4176 extern "C" status_t
4177 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4178 {
4179 	// lookup mount -- the caller is required to make sure that the mount
4180 	// won't go away
4181 	ReadLocker locker(sMountLock);
4182 	struct fs_mount* mount = find_mount(mountID);
4183 	if (mount == NULL)
4184 		return B_BAD_VALUE;
4185 	locker.Unlock();
4186 
4187 	return mount->entry_cache.Add(dirID, name, -1, true);
4188 }
4189 
4190 
4191 extern "C" status_t
4192 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4193 {
4194 	// lookup mount -- the caller is required to make sure that the mount
4195 	// won't go away
4196 	ReadLocker locker(sMountLock);
4197 	struct fs_mount* mount = find_mount(mountID);
4198 	if (mount == NULL)
4199 		return B_BAD_VALUE;
4200 	locker.Unlock();
4201 
4202 	return mount->entry_cache.Remove(dirID, name);
4203 }
4204 
4205 
4206 //	#pragma mark - private VFS API
4207 //	Functions the VFS exports for other parts of the kernel
4208 
4209 
4210 /*! Acquires another reference to the vnode that has to be released
4211 	by calling vfs_put_vnode().
4212 */
4213 void
4214 vfs_acquire_vnode(struct vnode* vnode)
4215 {
4216 	inc_vnode_ref_count(vnode);
4217 }
4218 
4219 
4220 /*! This is currently called from file_cache_create() only.
4221 	It's probably a temporary solution as long as devfs requires that
4222 	fs_read_pages()/fs_write_pages() are called with the standard
4223 	open cookie and not with a device cookie.
4224 	If that's done differently, remove this call; it has no other
4225 	purpose.
4226 */
4227 extern "C" status_t
4228 vfs_get_cookie_from_fd(int fd, void** _cookie)
4229 {
4230 	struct file_descriptor* descriptor;
4231 
4232 	descriptor = get_fd(get_current_io_context(true), fd);
4233 	if (descriptor == NULL)
4234 		return B_FILE_ERROR;
4235 
4236 	*_cookie = descriptor->cookie;
4237 	return B_OK;
4238 }
4239 
4240 
4241 extern "C" status_t
4242 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4243 {
4244 	*vnode = get_vnode_from_fd(fd, kernel);
4245 
4246 	if (*vnode == NULL)
4247 		return B_FILE_ERROR;
4248 
4249 	return B_NO_ERROR;
4250 }
4251 
4252 
4253 extern "C" status_t
4254 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4255 {
4256 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4257 		path, kernel));
4258 
4259 	KPath pathBuffer;
4260 	if (pathBuffer.InitCheck() != B_OK)
4261 		return B_NO_MEMORY;
4262 
4263 	char* buffer = pathBuffer.LockBuffer();
4264 	strlcpy(buffer, path, pathBuffer.BufferSize());
4265 
4266 	struct vnode* vnode;
4267 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4268 	if (status != B_OK)
4269 		return status;
4270 
4271 	*_vnode = vnode;
4272 	return B_OK;
4273 }
4274 
4275 
4276 extern "C" status_t
4277 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4278 {
4279 	struct vnode* vnode = NULL;
4280 
4281 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4282 	if (status != B_OK)
4283 		return status;
4284 
4285 	*_vnode = vnode;
4286 	return B_OK;
4287 }
4288 
4289 
4290 extern "C" status_t
4291 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4292 	const char* name, struct vnode** _vnode)
4293 {
4294 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4295 }
4296 
4297 
4298 extern "C" void
4299 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4300 {
4301 	*_mountID = vnode->device;
4302 	*_vnodeID = vnode->id;
4303 }
4304 
4305 
4306 /*!
4307 	Helper function abstracting the process of "converting" a given
4308 	vnode-pointer to a fs_vnode-pointer.
4309 	Currently only used in bindfs.
4310 */
4311 extern "C" fs_vnode*
4312 vfs_fsnode_for_vnode(struct vnode* vnode)
4313 {
4314 	return vnode;
4315 }
4316 
4317 
4318 /*!
4319 	Calls fs_open() on the given vnode and returns a new
4320 	file descriptor for it
4321 */
4322 int
4323 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4324 {
4325 	return open_vnode(vnode, openMode, kernel);
4326 }
4327 
4328 
4329 /*!	Looks up a vnode with the given mount and vnode ID.
4330 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4331 	to the node.
4332 	It's currently only be used by file_cache_create().
4333 */
4334 extern "C" status_t
4335 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4336 {
4337 	rw_lock_read_lock(&sVnodeLock);
4338 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4339 	rw_lock_read_unlock(&sVnodeLock);
4340 
4341 	if (vnode == NULL)
4342 		return B_ERROR;
4343 
4344 	*_vnode = vnode;
4345 	return B_OK;
4346 }
4347 
4348 
4349 extern "C" status_t
4350 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4351 	bool traverseLeafLink, bool kernel, void** _node)
4352 {
4353 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4354 		volume, path, kernel));
4355 
4356 	KPath pathBuffer;
4357 	if (pathBuffer.InitCheck() != B_OK)
4358 		return B_NO_MEMORY;
4359 
4360 	fs_mount* mount;
4361 	status_t status = get_mount(volume->id, &mount);
4362 	if (status != B_OK)
4363 		return status;
4364 
4365 	char* buffer = pathBuffer.LockBuffer();
4366 	strlcpy(buffer, path, pathBuffer.BufferSize());
4367 
4368 	struct vnode* vnode = mount->root_vnode;
4369 
4370 	if (buffer[0] == '/')
4371 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4372 	else {
4373 		inc_vnode_ref_count(vnode);
4374 			// vnode_path_to_vnode() releases a reference to the starting vnode
4375 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4376 			kernel, &vnode, NULL);
4377 	}
4378 
4379 	put_mount(mount);
4380 
4381 	if (status != B_OK)
4382 		return status;
4383 
4384 	if (vnode->device != volume->id) {
4385 		// wrong mount ID - must not gain access on foreign file system nodes
4386 		put_vnode(vnode);
4387 		return B_BAD_VALUE;
4388 	}
4389 
4390 	// Use get_vnode() to resolve the cookie for the right layer.
4391 	status = get_vnode(volume, vnode->id, _node);
4392 	put_vnode(vnode);
4393 
4394 	return status;
4395 }
4396 
4397 
4398 status_t
4399 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4400 	struct stat* stat, bool kernel)
4401 {
4402 	status_t status;
4403 
4404 	if (path != NULL) {
4405 		// path given: get the stat of the node referred to by (fd, path)
4406 		KPath pathBuffer(path);
4407 		if (pathBuffer.InitCheck() != B_OK)
4408 			return B_NO_MEMORY;
4409 
4410 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4411 			traverseLeafLink, stat, kernel);
4412 	} else {
4413 		// no path given: get the FD and use the FD operation
4414 		struct file_descriptor* descriptor
4415 			= get_fd(get_current_io_context(kernel), fd);
4416 		if (descriptor == NULL)
4417 			return B_FILE_ERROR;
4418 
4419 		if (descriptor->ops->fd_read_stat)
4420 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4421 		else
4422 			status = B_UNSUPPORTED;
4423 
4424 		put_fd(descriptor);
4425 	}
4426 
4427 	return status;
4428 }
4429 
4430 
4431 /*!	Finds the full path to the file that contains the module \a moduleName,
4432 	puts it into \a pathBuffer, and returns B_OK for success.
4433 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4434 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4435 	\a pathBuffer is clobbered in any case and must not be relied on if this
4436 	functions returns unsuccessfully.
4437 	\a basePath and \a pathBuffer must not point to the same space.
4438 */
4439 status_t
4440 vfs_get_module_path(const char* basePath, const char* moduleName,
4441 	char* pathBuffer, size_t bufferSize)
4442 {
4443 	struct vnode* dir;
4444 	struct vnode* file;
4445 	status_t status;
4446 	size_t length;
4447 	char* path;
4448 
4449 	if (bufferSize == 0
4450 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4451 		return B_BUFFER_OVERFLOW;
4452 
4453 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4454 	if (status != B_OK)
4455 		return status;
4456 
4457 	// the path buffer had been clobbered by the above call
4458 	length = strlcpy(pathBuffer, basePath, bufferSize);
4459 	if (pathBuffer[length - 1] != '/')
4460 		pathBuffer[length++] = '/';
4461 
4462 	path = pathBuffer + length;
4463 	bufferSize -= length;
4464 
4465 	while (moduleName) {
4466 		char* nextPath = strchr(moduleName, '/');
4467 		if (nextPath == NULL)
4468 			length = strlen(moduleName);
4469 		else {
4470 			length = nextPath - moduleName;
4471 			nextPath++;
4472 		}
4473 
4474 		if (length + 1 >= bufferSize) {
4475 			status = B_BUFFER_OVERFLOW;
4476 			goto err;
4477 		}
4478 
4479 		memcpy(path, moduleName, length);
4480 		path[length] = '\0';
4481 		moduleName = nextPath;
4482 
4483 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4484 		if (status != B_OK) {
4485 			// vnode_path_to_vnode() has already released the reference to dir
4486 			return status;
4487 		}
4488 
4489 		if (S_ISDIR(file->Type())) {
4490 			// goto the next directory
4491 			path[length] = '/';
4492 			path[length + 1] = '\0';
4493 			path += length + 1;
4494 			bufferSize -= length + 1;
4495 
4496 			dir = file;
4497 		} else if (S_ISREG(file->Type())) {
4498 			// it's a file so it should be what we've searched for
4499 			put_vnode(file);
4500 
4501 			return B_OK;
4502 		} else {
4503 			TRACE(("vfs_get_module_path(): something is strange here: "
4504 				"0x%08" B_PRIx32 "...\n", file->Type()));
4505 			status = B_ERROR;
4506 			dir = file;
4507 			goto err;
4508 		}
4509 	}
4510 
4511 	// if we got here, the moduleName just pointed to a directory, not to
4512 	// a real module - what should we do in this case?
4513 	status = B_ENTRY_NOT_FOUND;
4514 
4515 err:
4516 	put_vnode(dir);
4517 	return status;
4518 }
4519 
4520 
4521 /*!	\brief Normalizes a given path.
4522 
4523 	The path must refer to an existing or non-existing entry in an existing
4524 	directory, that is chopping off the leaf component the remaining path must
4525 	refer to an existing directory.
4526 
4527 	The returned will be canonical in that it will be absolute, will not
4528 	contain any "." or ".." components or duplicate occurrences of '/'s,
4529 	and none of the directory components will by symbolic links.
4530 
4531 	Any two paths referring to the same entry, will result in the same
4532 	normalized path (well, that is pretty much the definition of `normalized',
4533 	isn't it :-).
4534 
4535 	\param path The path to be normalized.
4536 	\param buffer The buffer into which the normalized path will be written.
4537 		   May be the same one as \a path.
4538 	\param bufferSize The size of \a buffer.
4539 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4540 	\param kernel \c true, if the IO context of the kernel shall be used,
4541 		   otherwise that of the team this thread belongs to. Only relevant,
4542 		   if the path is relative (to get the CWD).
4543 	\return \c B_OK if everything went fine, another error code otherwise.
4544 */
4545 status_t
4546 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4547 	bool traverseLink, bool kernel)
4548 {
4549 	if (!path || !buffer || bufferSize < 1)
4550 		return B_BAD_VALUE;
4551 
4552 	if (path != buffer) {
4553 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4554 			return B_BUFFER_OVERFLOW;
4555 	}
4556 
4557 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4558 }
4559 
4560 
4561 /*!	\brief Gets the parent of the passed in node.
4562 
4563 	Gets the parent of the passed in node, and correctly resolves covered
4564 	nodes.
4565 */
4566 extern "C" status_t
4567 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4568 {
4569 	return resolve_covered_parent(parent, device, node,
4570 		get_current_io_context(true));
4571 }
4572 
4573 
4574 /*!	\brief Creates a special node in the file system.
4575 
4576 	The caller gets a reference to the newly created node (which is passed
4577 	back through \a _createdVnode) and is responsible for releasing it.
4578 
4579 	\param path The path where to create the entry for the node. Can be \c NULL,
4580 		in which case the node is created without an entry in the root FS -- it
4581 		will automatically be deleted when the last reference has been released.
4582 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4583 		the target file system will just create the node with its standard
4584 		operations. Depending on the type of the node a subnode might be created
4585 		automatically, though.
4586 	\param mode The type and permissions for the node to be created.
4587 	\param flags Flags to be passed to the creating FS.
4588 	\param kernel \c true, if called in the kernel context (relevant only if
4589 		\a path is not \c NULL and not absolute).
4590 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4591 		file system creating the node, with the private data pointer and
4592 		operations for the super node. Can be \c NULL.
4593 	\param _createVnode Pointer to pre-allocated storage where to store the
4594 		pointer to the newly created node.
4595 	\return \c B_OK, if everything went fine, another error code otherwise.
4596 */
4597 status_t
4598 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4599 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4600 	struct vnode** _createdVnode)
4601 {
4602 	struct vnode* dirNode;
4603 	char _leaf[B_FILE_NAME_LENGTH];
4604 	char* leaf = NULL;
4605 
4606 	if (path) {
4607 		// We've got a path. Get the dir vnode and the leaf name.
4608 		KPath tmpPathBuffer;
4609 		if (tmpPathBuffer.InitCheck() != B_OK)
4610 			return B_NO_MEMORY;
4611 
4612 		char* tmpPath = tmpPathBuffer.LockBuffer();
4613 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4614 			return B_NAME_TOO_LONG;
4615 
4616 		// get the dir vnode and the leaf name
4617 		leaf = _leaf;
4618 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4619 		if (error != B_OK)
4620 			return error;
4621 	} else {
4622 		// No path. Create the node in the root FS.
4623 		dirNode = sRoot;
4624 		inc_vnode_ref_count(dirNode);
4625 	}
4626 
4627 	VNodePutter _(dirNode);
4628 
4629 	// check support for creating special nodes
4630 	if (!HAS_FS_CALL(dirNode, create_special_node))
4631 		return B_UNSUPPORTED;
4632 
4633 	// create the node
4634 	fs_vnode superVnode;
4635 	ino_t nodeID;
4636 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4637 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4638 	if (status != B_OK)
4639 		return status;
4640 
4641 	// lookup the node
4642 	rw_lock_read_lock(&sVnodeLock);
4643 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4644 	rw_lock_read_unlock(&sVnodeLock);
4645 
4646 	if (*_createdVnode == NULL) {
4647 		panic("vfs_create_special_node(): lookup of node failed");
4648 		return B_ERROR;
4649 	}
4650 
4651 	return B_OK;
4652 }
4653 
4654 
4655 extern "C" void
4656 vfs_put_vnode(struct vnode* vnode)
4657 {
4658 	put_vnode(vnode);
4659 }
4660 
4661 
4662 extern "C" status_t
4663 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4664 {
4665 	// Get current working directory from io context
4666 	struct io_context* context = get_current_io_context(false);
4667 	status_t status = B_OK;
4668 
4669 	mutex_lock(&context->io_mutex);
4670 
4671 	if (context->cwd != NULL) {
4672 		*_mountID = context->cwd->device;
4673 		*_vnodeID = context->cwd->id;
4674 	} else
4675 		status = B_ERROR;
4676 
4677 	mutex_unlock(&context->io_mutex);
4678 	return status;
4679 }
4680 
4681 
4682 status_t
4683 vfs_unmount(dev_t mountID, uint32 flags)
4684 {
4685 	return fs_unmount(NULL, mountID, flags, true);
4686 }
4687 
4688 
4689 extern "C" status_t
4690 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4691 {
4692 	struct vnode* vnode;
4693 
4694 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4695 	if (status != B_OK)
4696 		return status;
4697 
4698 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4699 	put_vnode(vnode);
4700 	return B_OK;
4701 }
4702 
4703 
4704 extern "C" void
4705 vfs_free_unused_vnodes(int32 level)
4706 {
4707 	vnode_low_resource_handler(NULL,
4708 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4709 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4710 		level);
4711 }
4712 
4713 
4714 extern "C" bool
4715 vfs_can_page(struct vnode* vnode, void* cookie)
4716 {
4717 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4718 
4719 	if (HAS_FS_CALL(vnode, can_page))
4720 		return FS_CALL(vnode, can_page, cookie);
4721 	return false;
4722 }
4723 
4724 
4725 extern "C" status_t
4726 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4727 	const generic_io_vec* vecs, size_t count, uint32 flags,
4728 	generic_size_t* _numBytes)
4729 {
4730 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4731 		vecs, pos));
4732 
4733 #if VFS_PAGES_IO_TRACING
4734 	generic_size_t bytesRequested = *_numBytes;
4735 #endif
4736 
4737 	IORequest request;
4738 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4739 	if (status == B_OK) {
4740 		status = vfs_vnode_io(vnode, cookie, &request);
4741 		if (status == B_OK)
4742 			status = request.Wait();
4743 		*_numBytes = request.TransferredBytes();
4744 	}
4745 
4746 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4747 		status, *_numBytes));
4748 
4749 	return status;
4750 }
4751 
4752 
4753 extern "C" status_t
4754 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4755 	const generic_io_vec* vecs, size_t count, uint32 flags,
4756 	generic_size_t* _numBytes)
4757 {
4758 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4759 		vecs, pos));
4760 
4761 #if VFS_PAGES_IO_TRACING
4762 	generic_size_t bytesRequested = *_numBytes;
4763 #endif
4764 
4765 	IORequest request;
4766 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4767 	if (status == B_OK) {
4768 		status = vfs_vnode_io(vnode, cookie, &request);
4769 		if (status == B_OK)
4770 			status = request.Wait();
4771 		*_numBytes = request.TransferredBytes();
4772 	}
4773 
4774 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4775 		status, *_numBytes));
4776 
4777 	return status;
4778 }
4779 
4780 
4781 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4782 	created if \a allocate is \c true.
4783 	In case it's successful, it will also grab a reference to the cache
4784 	it returns.
4785 */
4786 extern "C" status_t
4787 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4788 {
4789 	if (vnode->cache != NULL) {
4790 		vnode->cache->AcquireRef();
4791 		*_cache = vnode->cache;
4792 		return B_OK;
4793 	}
4794 
4795 	rw_lock_read_lock(&sVnodeLock);
4796 	vnode->Lock();
4797 
4798 	status_t status = B_OK;
4799 
4800 	// The cache could have been created in the meantime
4801 	if (vnode->cache == NULL) {
4802 		if (allocate) {
4803 			// TODO: actually the vnode needs to be busy already here, or
4804 			//	else this won't work...
4805 			bool wasBusy = vnode->IsBusy();
4806 			vnode->SetBusy(true);
4807 
4808 			vnode->Unlock();
4809 			rw_lock_read_unlock(&sVnodeLock);
4810 
4811 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4812 
4813 			rw_lock_read_lock(&sVnodeLock);
4814 			vnode->Lock();
4815 			vnode->SetBusy(wasBusy);
4816 		} else
4817 			status = B_BAD_VALUE;
4818 	}
4819 
4820 	vnode->Unlock();
4821 	rw_lock_read_unlock(&sVnodeLock);
4822 
4823 	if (status == B_OK) {
4824 		vnode->cache->AcquireRef();
4825 		*_cache = vnode->cache;
4826 	}
4827 
4828 	return status;
4829 }
4830 
4831 
4832 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4833 	their own.
4834 	In case it's successful, it will also grab a reference to the cache
4835 	it returns.
4836 */
4837 extern "C" status_t
4838 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4839 {
4840 	rw_lock_read_lock(&sVnodeLock);
4841 	vnode->Lock();
4842 
4843 	status_t status = B_OK;
4844 	if (vnode->cache != NULL) {
4845 		status = B_NOT_ALLOWED;
4846 	} else {
4847 		vnode->cache = _cache;
4848 		_cache->AcquireRef();
4849 	}
4850 
4851 	vnode->Unlock();
4852 	rw_lock_read_unlock(&sVnodeLock);
4853 	return status;
4854 }
4855 
4856 
4857 status_t
4858 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4859 	file_io_vec* vecs, size_t* _count)
4860 {
4861 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4862 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4863 
4864 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4865 }
4866 
4867 
4868 status_t
4869 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4870 {
4871 	status_t status = FS_CALL(vnode, read_stat, stat);
4872 
4873 	// fill in the st_dev and st_ino fields
4874 	if (status == B_OK) {
4875 		stat->st_dev = vnode->device;
4876 		stat->st_ino = vnode->id;
4877 		// the rdev field must stay unset for non-special files
4878 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4879 			stat->st_rdev = -1;
4880 	}
4881 
4882 	return status;
4883 }
4884 
4885 
4886 status_t
4887 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4888 {
4889 	struct vnode* vnode;
4890 	status_t status = get_vnode(device, inode, &vnode, true, false);
4891 	if (status != B_OK)
4892 		return status;
4893 
4894 	status = vfs_stat_vnode(vnode, stat);
4895 
4896 	put_vnode(vnode);
4897 	return status;
4898 }
4899 
4900 
4901 status_t
4902 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4903 {
4904 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4905 }
4906 
4907 
4908 status_t
4909 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4910 	bool kernel, char* path, size_t pathLength)
4911 {
4912 	struct vnode* vnode;
4913 	status_t status;
4914 
4915 	// filter invalid leaf names
4916 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4917 		return B_BAD_VALUE;
4918 
4919 	// get the vnode matching the dir's node_ref
4920 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4921 		// special cases "." and "..": we can directly get the vnode of the
4922 		// referenced directory
4923 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4924 		leaf = NULL;
4925 	} else
4926 		status = get_vnode(device, inode, &vnode, true, false);
4927 	if (status != B_OK)
4928 		return status;
4929 
4930 	// get the directory path
4931 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4932 	put_vnode(vnode);
4933 		// we don't need the vnode anymore
4934 	if (status != B_OK)
4935 		return status;
4936 
4937 	// append the leaf name
4938 	if (leaf) {
4939 		// insert a directory separator if this is not the file system root
4940 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4941 				>= pathLength)
4942 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4943 			return B_NAME_TOO_LONG;
4944 		}
4945 	}
4946 
4947 	return B_OK;
4948 }
4949 
4950 
4951 /*!	If the given descriptor locked its vnode, that lock will be released. */
4952 void
4953 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4954 {
4955 	struct vnode* vnode = fd_vnode(descriptor);
4956 
4957 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4958 		vnode->mandatory_locked_by = NULL;
4959 }
4960 
4961 
4962 /*!	Releases any POSIX locks on the file descriptor. */
4963 status_t
4964 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4965 {
4966 	struct vnode* vnode = descriptor->u.vnode;
4967 	if (vnode == NULL)
4968 		return B_OK;
4969 
4970 	if (HAS_FS_CALL(vnode, release_lock))
4971 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4972 
4973 	return release_advisory_lock(vnode, context, NULL, NULL);
4974 }
4975 
4976 
4977 /*!	Closes all file descriptors of the specified I/O context that
4978 	have the O_CLOEXEC flag set.
4979 */
4980 void
4981 vfs_exec_io_context(io_context* context)
4982 {
4983 	uint32 i;
4984 
4985 	for (i = 0; i < context->table_size; i++) {
4986 		mutex_lock(&context->io_mutex);
4987 
4988 		struct file_descriptor* descriptor = context->fds[i];
4989 		bool remove = false;
4990 
4991 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4992 			context->fds[i] = NULL;
4993 			context->num_used_fds--;
4994 
4995 			remove = true;
4996 		}
4997 
4998 		mutex_unlock(&context->io_mutex);
4999 
5000 		if (remove) {
5001 			close_fd(context, descriptor);
5002 			put_fd(descriptor);
5003 		}
5004 	}
5005 }
5006 
5007 
5008 /*! Sets up a new io_control structure, and inherits the properties
5009 	of the parent io_control if it is given.
5010 */
5011 io_context*
5012 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
5013 {
5014 	io_context* context = (io_context*)malloc(sizeof(io_context));
5015 	if (context == NULL)
5016 		return NULL;
5017 
5018 	TIOC(NewIOContext(context, parentContext));
5019 
5020 	memset(context, 0, sizeof(io_context));
5021 	context->ref_count = 1;
5022 
5023 	MutexLocker parentLocker;
5024 
5025 	size_t tableSize;
5026 	if (parentContext != NULL) {
5027 		parentLocker.SetTo(parentContext->io_mutex, false);
5028 		tableSize = parentContext->table_size;
5029 	} else
5030 		tableSize = DEFAULT_FD_TABLE_SIZE;
5031 
5032 	// allocate space for FDs and their close-on-exec flag
5033 	context->fds = (file_descriptor**)malloc(
5034 		sizeof(struct file_descriptor*) * tableSize
5035 		+ sizeof(struct select_sync*) * tableSize
5036 		+ (tableSize + 7) / 8);
5037 	if (context->fds == NULL) {
5038 		free(context);
5039 		return NULL;
5040 	}
5041 
5042 	context->select_infos = (select_info**)(context->fds + tableSize);
5043 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
5044 
5045 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
5046 		+ sizeof(struct select_sync*) * tableSize
5047 		+ (tableSize + 7) / 8);
5048 
5049 	mutex_init(&context->io_mutex, "I/O context");
5050 
5051 	// Copy all parent file descriptors
5052 
5053 	if (parentContext != NULL) {
5054 		size_t i;
5055 
5056 		mutex_lock(&sIOContextRootLock);
5057 		context->root = parentContext->root;
5058 		if (context->root)
5059 			inc_vnode_ref_count(context->root);
5060 		mutex_unlock(&sIOContextRootLock);
5061 
5062 		context->cwd = parentContext->cwd;
5063 		if (context->cwd)
5064 			inc_vnode_ref_count(context->cwd);
5065 
5066 		if (parentContext->inherit_fds) {
5067 			for (i = 0; i < tableSize; i++) {
5068 				struct file_descriptor* descriptor = parentContext->fds[i];
5069 
5070 				if (descriptor != NULL
5071 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
5072 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5073 					if (closeOnExec && purgeCloseOnExec)
5074 						continue;
5075 
5076 					TFD(InheritFD(context, i, descriptor, parentContext));
5077 
5078 					context->fds[i] = descriptor;
5079 					context->num_used_fds++;
5080 					atomic_add(&descriptor->ref_count, 1);
5081 					atomic_add(&descriptor->open_count, 1);
5082 
5083 					if (closeOnExec)
5084 						fd_set_close_on_exec(context, i, true);
5085 				}
5086 			}
5087 		}
5088 
5089 		parentLocker.Unlock();
5090 	} else {
5091 		context->root = sRoot;
5092 		context->cwd = sRoot;
5093 
5094 		if (context->root)
5095 			inc_vnode_ref_count(context->root);
5096 
5097 		if (context->cwd)
5098 			inc_vnode_ref_count(context->cwd);
5099 	}
5100 
5101 	context->table_size = tableSize;
5102 	context->inherit_fds = parentContext != NULL;
5103 
5104 	list_init(&context->node_monitors);
5105 	context->max_monitors = DEFAULT_NODE_MONITORS;
5106 
5107 	return context;
5108 }
5109 
5110 
5111 void
5112 vfs_get_io_context(io_context* context)
5113 {
5114 	atomic_add(&context->ref_count, 1);
5115 }
5116 
5117 
5118 void
5119 vfs_put_io_context(io_context* context)
5120 {
5121 	if (atomic_add(&context->ref_count, -1) == 1)
5122 		free_io_context(context);
5123 }
5124 
5125 
5126 status_t
5127 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5128 {
5129 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5130 		return B_BAD_VALUE;
5131 
5132 	TIOC(ResizeIOContext(context, newSize));
5133 
5134 	MutexLocker _(context->io_mutex);
5135 
5136 	uint32 oldSize = context->table_size;
5137 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5138 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5139 
5140 	// If the tables shrink, make sure none of the fds being dropped are in use.
5141 	if (newSize < oldSize) {
5142 		for (uint32 i = oldSize; i-- > newSize;) {
5143 			if (context->fds[i])
5144 				return B_BUSY;
5145 		}
5146 	}
5147 
5148 	// store pointers to the old tables
5149 	file_descriptor** oldFDs = context->fds;
5150 	select_info** oldSelectInfos = context->select_infos;
5151 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5152 
5153 	// allocate new tables
5154 	file_descriptor** newFDs = (file_descriptor**)malloc(
5155 		sizeof(struct file_descriptor*) * newSize
5156 		+ sizeof(struct select_sync*) * newSize
5157 		+ newCloseOnExitBitmapSize);
5158 	if (newFDs == NULL)
5159 		return B_NO_MEMORY;
5160 
5161 	context->fds = newFDs;
5162 	context->select_infos = (select_info**)(context->fds + newSize);
5163 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5164 	context->table_size = newSize;
5165 
5166 	// copy entries from old tables
5167 	uint32 toCopy = min_c(oldSize, newSize);
5168 
5169 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5170 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5171 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5172 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5173 
5174 	// clear additional entries, if the tables grow
5175 	if (newSize > oldSize) {
5176 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5177 		memset(context->select_infos + oldSize, 0,
5178 			sizeof(void*) * (newSize - oldSize));
5179 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5180 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5181 	}
5182 
5183 	free(oldFDs);
5184 
5185 	return B_OK;
5186 }
5187 
5188 
5189 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5190 
5191 	Given an arbitrary vnode (identified by mount and node ID), the function
5192 	checks, whether the vnode is covered by another vnode. If it is, the
5193 	function returns the mount and node ID of the covering vnode. Otherwise
5194 	it simply returns the supplied mount and node ID.
5195 
5196 	In case of error (e.g. the supplied node could not be found) the variables
5197 	for storing the resolved mount and node ID remain untouched and an error
5198 	code is returned.
5199 
5200 	\param mountID The mount ID of the vnode in question.
5201 	\param nodeID The node ID of the vnode in question.
5202 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5203 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5204 	\return
5205 	- \c B_OK, if everything went fine,
5206 	- another error code, if something went wrong.
5207 */
5208 status_t
5209 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5210 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5211 {
5212 	// get the node
5213 	struct vnode* node;
5214 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5215 	if (error != B_OK)
5216 		return error;
5217 
5218 	// resolve the node
5219 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5220 		put_vnode(node);
5221 		node = coveringNode;
5222 	}
5223 
5224 	// set the return values
5225 	*resolvedMountID = node->device;
5226 	*resolvedNodeID = node->id;
5227 
5228 	put_vnode(node);
5229 
5230 	return B_OK;
5231 }
5232 
5233 
5234 status_t
5235 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5236 	ino_t* _mountPointNodeID)
5237 {
5238 	ReadLocker nodeLocker(sVnodeLock);
5239 	ReadLocker mountLocker(sMountLock);
5240 
5241 	struct fs_mount* mount = find_mount(mountID);
5242 	if (mount == NULL)
5243 		return B_BAD_VALUE;
5244 
5245 	Vnode* mountPoint = mount->covers_vnode;
5246 
5247 	*_mountPointMountID = mountPoint->device;
5248 	*_mountPointNodeID = mountPoint->id;
5249 
5250 	return B_OK;
5251 }
5252 
5253 
5254 status_t
5255 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5256 	ino_t coveredNodeID)
5257 {
5258 	// get the vnodes
5259 	Vnode* vnode;
5260 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5261 	if (error != B_OK)
5262 		return B_BAD_VALUE;
5263 	VNodePutter vnodePutter(vnode);
5264 
5265 	Vnode* coveredVnode;
5266 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5267 		false);
5268 	if (error != B_OK)
5269 		return B_BAD_VALUE;
5270 	VNodePutter coveredVnodePutter(coveredVnode);
5271 
5272 	// establish the covered/covering links
5273 	WriteLocker locker(sVnodeLock);
5274 
5275 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5276 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5277 		return B_BUSY;
5278 	}
5279 
5280 	vnode->covers = coveredVnode;
5281 	vnode->SetCovering(true);
5282 
5283 	coveredVnode->covered_by = vnode;
5284 	coveredVnode->SetCovered(true);
5285 
5286 	// the vnodes do now reference each other
5287 	inc_vnode_ref_count(vnode);
5288 	inc_vnode_ref_count(coveredVnode);
5289 
5290 	return B_OK;
5291 }
5292 
5293 
5294 int
5295 vfs_getrlimit(int resource, struct rlimit* rlp)
5296 {
5297 	if (!rlp)
5298 		return B_BAD_ADDRESS;
5299 
5300 	switch (resource) {
5301 		case RLIMIT_NOFILE:
5302 		{
5303 			struct io_context* context = get_current_io_context(false);
5304 			MutexLocker _(context->io_mutex);
5305 
5306 			rlp->rlim_cur = context->table_size;
5307 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5308 			return 0;
5309 		}
5310 
5311 		case RLIMIT_NOVMON:
5312 		{
5313 			struct io_context* context = get_current_io_context(false);
5314 			MutexLocker _(context->io_mutex);
5315 
5316 			rlp->rlim_cur = context->max_monitors;
5317 			rlp->rlim_max = MAX_NODE_MONITORS;
5318 			return 0;
5319 		}
5320 
5321 		default:
5322 			return B_BAD_VALUE;
5323 	}
5324 }
5325 
5326 
5327 int
5328 vfs_setrlimit(int resource, const struct rlimit* rlp)
5329 {
5330 	if (!rlp)
5331 		return B_BAD_ADDRESS;
5332 
5333 	switch (resource) {
5334 		case RLIMIT_NOFILE:
5335 			/* TODO: check getuid() */
5336 			if (rlp->rlim_max != RLIM_SAVED_MAX
5337 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5338 				return B_NOT_ALLOWED;
5339 
5340 			return vfs_resize_fd_table(get_current_io_context(false),
5341 				rlp->rlim_cur);
5342 
5343 		case RLIMIT_NOVMON:
5344 			/* TODO: check getuid() */
5345 			if (rlp->rlim_max != RLIM_SAVED_MAX
5346 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5347 				return B_NOT_ALLOWED;
5348 
5349 			return resize_monitor_table(get_current_io_context(false),
5350 				rlp->rlim_cur);
5351 
5352 		default:
5353 			return B_BAD_VALUE;
5354 	}
5355 }
5356 
5357 
5358 status_t
5359 vfs_init(kernel_args* args)
5360 {
5361 	vnode::StaticInit();
5362 
5363 	sVnodeTable = new(std::nothrow) VnodeTable();
5364 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5365 		panic("vfs_init: error creating vnode hash table\n");
5366 
5367 	struct vnode dummy_vnode;
5368 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5369 
5370 	struct fs_mount dummyMount;
5371 	sMountsTable = new(std::nothrow) MountTable();
5372 	if (sMountsTable == NULL
5373 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5374 		panic("vfs_init: error creating mounts hash table\n");
5375 
5376 	sPathNameCache = create_object_cache("vfs path names",
5377 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5378 	if (sPathNameCache == NULL)
5379 		panic("vfs_init: error creating path name object_cache\n");
5380 
5381 	sFileDescriptorCache = create_object_cache("vfs fds",
5382 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5383 	if (sFileDescriptorCache == NULL)
5384 		panic("vfs_init: error creating file descriptor object_cache\n");
5385 
5386 	node_monitor_init();
5387 
5388 	sRoot = NULL;
5389 
5390 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5391 
5392 	if (block_cache_init() != B_OK)
5393 		return B_ERROR;
5394 
5395 #ifdef ADD_DEBUGGER_COMMANDS
5396 	// add some debugger commands
5397 	add_debugger_command_etc("vnode", &dump_vnode,
5398 		"Print info about the specified vnode",
5399 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5400 		"Prints information about the vnode specified by address <vnode> or\n"
5401 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5402 		"constructed and printed. It might not be possible to construct a\n"
5403 		"complete path, though.\n",
5404 		0);
5405 	add_debugger_command("vnodes", &dump_vnodes,
5406 		"list all vnodes (from the specified device)");
5407 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5408 		"list all vnode caches");
5409 	add_debugger_command("mount", &dump_mount,
5410 		"info about the specified fs_mount");
5411 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5412 	add_debugger_command("io_context", &dump_io_context,
5413 		"info about the I/O context");
5414 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5415 		"info about vnode usage");
5416 #endif
5417 
5418 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5419 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5420 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5421 		0);
5422 
5423 	fifo_init();
5424 	file_map_init();
5425 
5426 	return file_cache_init();
5427 }
5428 
5429 
5430 //	#pragma mark - fd_ops implementations
5431 
5432 
5433 /*!
5434 	Calls fs_open() on the given vnode and returns a new
5435 	file descriptor for it
5436 */
5437 static int
5438 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5439 {
5440 	void* cookie;
5441 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5442 	if (status != B_OK)
5443 		return status;
5444 
5445 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5446 	if (fd < 0) {
5447 		FS_CALL(vnode, close, cookie);
5448 		FS_CALL(vnode, free_cookie, cookie);
5449 	}
5450 	return fd;
5451 }
5452 
5453 
5454 /*!
5455 	Calls fs_open() on the given vnode and returns a new
5456 	file descriptor for it
5457 */
5458 static int
5459 create_vnode(struct vnode* directory, const char* name, int openMode,
5460 	int perms, bool kernel)
5461 {
5462 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5463 	status_t status = B_ERROR;
5464 	struct vnode* vnode;
5465 	void* cookie;
5466 	ino_t newID;
5467 
5468 	// This is somewhat tricky: If the entry already exists, the FS responsible
5469 	// for the directory might not necessarily also be the one responsible for
5470 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5471 	// we can actually never call the create() hook without O_EXCL. Instead we
5472 	// try to look the entry up first. If it already exists, we just open the
5473 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5474 	// introduces a race condition, since someone else might have created the
5475 	// entry in the meantime. We hope the respective FS returns the correct
5476 	// error code and retry (up to 3 times) again.
5477 
5478 	for (int i = 0; i < 3 && status != B_OK; i++) {
5479 		// look the node up
5480 		status = lookup_dir_entry(directory, name, &vnode);
5481 		if (status == B_OK) {
5482 			VNodePutter putter(vnode);
5483 
5484 			if ((openMode & O_EXCL) != 0)
5485 				return B_FILE_EXISTS;
5486 
5487 			// If the node is a symlink, we have to follow it, unless
5488 			// O_NOTRAVERSE is set.
5489 			if (S_ISLNK(vnode->Type()) && traverse) {
5490 				putter.Put();
5491 				char clonedName[B_FILE_NAME_LENGTH + 1];
5492 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5493 						>= B_FILE_NAME_LENGTH) {
5494 					return B_NAME_TOO_LONG;
5495 				}
5496 
5497 				inc_vnode_ref_count(directory);
5498 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5499 					kernel, &vnode, NULL);
5500 				if (status != B_OK)
5501 					return status;
5502 
5503 				putter.SetTo(vnode);
5504 			}
5505 
5506 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5507 				return B_LINK_LIMIT;
5508 
5509 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5510 			// on success keep the vnode reference for the FD
5511 			if (fd >= 0)
5512 				putter.Detach();
5513 
5514 			return fd;
5515 		}
5516 
5517 		// it doesn't exist yet -- try to create it
5518 
5519 		if (!HAS_FS_CALL(directory, create))
5520 			return B_READ_ONLY_DEVICE;
5521 
5522 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5523 			&cookie, &newID);
5524 		if (status != B_OK
5525 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5526 			return status;
5527 		}
5528 	}
5529 
5530 	if (status != B_OK)
5531 		return status;
5532 
5533 	// the node has been created successfully
5534 
5535 	rw_lock_read_lock(&sVnodeLock);
5536 	vnode = lookup_vnode(directory->device, newID);
5537 	rw_lock_read_unlock(&sVnodeLock);
5538 
5539 	if (vnode == NULL) {
5540 		panic("vfs: fs_create() returned success but there is no vnode, "
5541 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5542 		return B_BAD_VALUE;
5543 	}
5544 
5545 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5546 	if (fd >= 0)
5547 		return fd;
5548 
5549 	status = fd;
5550 
5551 	// something went wrong, clean up
5552 
5553 	FS_CALL(vnode, close, cookie);
5554 	FS_CALL(vnode, free_cookie, cookie);
5555 	put_vnode(vnode);
5556 
5557 	FS_CALL(directory, unlink, name);
5558 
5559 	return status;
5560 }
5561 
5562 
5563 /*! Calls fs open_dir() on the given vnode and returns a new
5564 	file descriptor for it
5565 */
5566 static int
5567 open_dir_vnode(struct vnode* vnode, bool kernel)
5568 {
5569 	void* cookie;
5570 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5571 	if (status != B_OK)
5572 		return status;
5573 
5574 	// directory is opened, create a fd
5575 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5576 	if (status >= 0)
5577 		return status;
5578 
5579 	FS_CALL(vnode, close_dir, cookie);
5580 	FS_CALL(vnode, free_dir_cookie, cookie);
5581 
5582 	return status;
5583 }
5584 
5585 
5586 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5587 	file descriptor for it.
5588 	Used by attr_dir_open(), and attr_dir_open_fd().
5589 */
5590 static int
5591 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5592 {
5593 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5594 		return B_UNSUPPORTED;
5595 
5596 	void* cookie;
5597 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5598 	if (status != B_OK)
5599 		return status;
5600 
5601 	// directory is opened, create a fd
5602 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5603 		kernel);
5604 	if (status >= 0)
5605 		return status;
5606 
5607 	FS_CALL(vnode, close_attr_dir, cookie);
5608 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5609 
5610 	return status;
5611 }
5612 
5613 
5614 static int
5615 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5616 	int openMode, int perms, bool kernel)
5617 {
5618 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5619 		"kernel %d\n", name, openMode, perms, kernel));
5620 
5621 	// get directory to put the new file in
5622 	struct vnode* directory;
5623 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5624 	if (status != B_OK)
5625 		return status;
5626 
5627 	status = create_vnode(directory, name, openMode, perms, kernel);
5628 	put_vnode(directory);
5629 
5630 	return status;
5631 }
5632 
5633 
5634 static int
5635 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5636 {
5637 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5638 		openMode, perms, kernel));
5639 
5640 	// get directory to put the new file in
5641 	char name[B_FILE_NAME_LENGTH];
5642 	struct vnode* directory;
5643 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5644 		kernel);
5645 	if (status < 0)
5646 		return status;
5647 
5648 	status = create_vnode(directory, name, openMode, perms, kernel);
5649 
5650 	put_vnode(directory);
5651 	return status;
5652 }
5653 
5654 
5655 static int
5656 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5657 	int openMode, bool kernel)
5658 {
5659 	if (name == NULL || *name == '\0')
5660 		return B_BAD_VALUE;
5661 
5662 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5663 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5664 
5665 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5666 
5667 	// get the vnode matching the entry_ref
5668 	struct vnode* vnode;
5669 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5670 		kernel, &vnode);
5671 	if (status != B_OK)
5672 		return status;
5673 
5674 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5675 		put_vnode(vnode);
5676 		return B_LINK_LIMIT;
5677 	}
5678 
5679 	int newFD = open_vnode(vnode, openMode, kernel);
5680 	if (newFD >= 0) {
5681 		// The vnode reference has been transferred to the FD
5682 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5683 			directoryID, vnode->id, name);
5684 	} else
5685 		put_vnode(vnode);
5686 
5687 	return newFD;
5688 }
5689 
5690 
5691 static int
5692 file_open(int fd, char* path, int openMode, bool kernel)
5693 {
5694 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5695 
5696 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5697 		fd, path, openMode, kernel));
5698 
5699 	// get the vnode matching the vnode + path combination
5700 	struct vnode* vnode;
5701 	ino_t parentID;
5702 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5703 		&parentID, kernel);
5704 	if (status != B_OK)
5705 		return status;
5706 
5707 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5708 		put_vnode(vnode);
5709 		return B_LINK_LIMIT;
5710 	}
5711 
5712 	// open the vnode
5713 	int newFD = open_vnode(vnode, openMode, kernel);
5714 	if (newFD >= 0) {
5715 		// The vnode reference has been transferred to the FD
5716 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5717 			vnode->device, parentID, vnode->id, NULL);
5718 	} else
5719 		put_vnode(vnode);
5720 
5721 	return newFD;
5722 }
5723 
5724 
5725 static status_t
5726 file_close(struct file_descriptor* descriptor)
5727 {
5728 	struct vnode* vnode = descriptor->u.vnode;
5729 	status_t status = B_OK;
5730 
5731 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5732 
5733 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5734 		vnode->id);
5735 	if (HAS_FS_CALL(vnode, close)) {
5736 		status = FS_CALL(vnode, close, descriptor->cookie);
5737 	}
5738 
5739 	if (status == B_OK) {
5740 		// remove all outstanding locks for this team
5741 		if (HAS_FS_CALL(vnode, release_lock))
5742 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5743 		else
5744 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5745 	}
5746 	return status;
5747 }
5748 
5749 
5750 static void
5751 file_free_fd(struct file_descriptor* descriptor)
5752 {
5753 	struct vnode* vnode = descriptor->u.vnode;
5754 
5755 	if (vnode != NULL) {
5756 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5757 		put_vnode(vnode);
5758 	}
5759 }
5760 
5761 
5762 static status_t
5763 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5764 	size_t* length)
5765 {
5766 	struct vnode* vnode = descriptor->u.vnode;
5767 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5768 		pos, length, *length));
5769 
5770 	if (S_ISDIR(vnode->Type()))
5771 		return B_IS_A_DIRECTORY;
5772 
5773 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5774 }
5775 
5776 
5777 static status_t
5778 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5779 	size_t* length)
5780 {
5781 	struct vnode* vnode = descriptor->u.vnode;
5782 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5783 		length));
5784 
5785 	if (S_ISDIR(vnode->Type()))
5786 		return B_IS_A_DIRECTORY;
5787 	if (!HAS_FS_CALL(vnode, write))
5788 		return B_READ_ONLY_DEVICE;
5789 
5790 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5791 }
5792 
5793 
5794 static off_t
5795 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5796 {
5797 	struct vnode* vnode = descriptor->u.vnode;
5798 	off_t offset;
5799 	bool isDevice = false;
5800 
5801 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5802 		seekType));
5803 
5804 	// some kinds of files are not seekable
5805 	switch (vnode->Type() & S_IFMT) {
5806 		case S_IFIFO:
5807 		case S_IFSOCK:
5808 			return ESPIPE;
5809 
5810 		// drivers publish block devices as chr, so pick both
5811 		case S_IFBLK:
5812 		case S_IFCHR:
5813 			isDevice = true;
5814 			break;
5815 		// The Open Group Base Specs don't mention any file types besides pipes,
5816 		// fifos, and sockets specially, so we allow seeking them.
5817 		case S_IFREG:
5818 		case S_IFDIR:
5819 		case S_IFLNK:
5820 			break;
5821 	}
5822 
5823 	switch (seekType) {
5824 		case SEEK_SET:
5825 			offset = 0;
5826 			break;
5827 		case SEEK_CUR:
5828 			offset = descriptor->pos;
5829 			break;
5830 		case SEEK_END:
5831 		{
5832 			// stat() the node
5833 			if (!HAS_FS_CALL(vnode, read_stat))
5834 				return B_UNSUPPORTED;
5835 
5836 			struct stat stat;
5837 			status_t status = FS_CALL(vnode, read_stat, &stat);
5838 			if (status != B_OK)
5839 				return status;
5840 
5841 			offset = stat.st_size;
5842 
5843 			if (offset == 0 && isDevice) {
5844 				// stat() on regular drivers doesn't report size
5845 				device_geometry geometry;
5846 
5847 				if (HAS_FS_CALL(vnode, ioctl)) {
5848 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5849 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5850 					if (status == B_OK)
5851 						offset = (off_t)geometry.bytes_per_sector
5852 							* geometry.sectors_per_track
5853 							* geometry.cylinder_count
5854 							* geometry.head_count;
5855 				}
5856 			}
5857 
5858 			break;
5859 		}
5860 		case SEEK_DATA:
5861 		case SEEK_HOLE:
5862 		{
5863 			status_t status = B_BAD_VALUE;
5864 			if (HAS_FS_CALL(vnode, ioctl)) {
5865 				offset = pos;
5866 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5867 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5868 					&offset, sizeof(offset));
5869 				if (status == B_OK) {
5870 					if (offset > pos)
5871 						offset -= pos;
5872 					break;
5873 				}
5874 			}
5875 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5876 				return status;
5877 
5878 			// basic implementation with stat() the node
5879 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5880 				return B_BAD_VALUE;
5881 
5882 			struct stat stat;
5883 			status = FS_CALL(vnode, read_stat, &stat);
5884 			if (status != B_OK)
5885 				return status;
5886 
5887 			off_t end = stat.st_size;
5888 			if (pos >= end)
5889 				return ENXIO;
5890 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5891 			break;
5892 		}
5893 		default:
5894 			return B_BAD_VALUE;
5895 	}
5896 
5897 	// assumes off_t is 64 bits wide
5898 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5899 		return B_BUFFER_OVERFLOW;
5900 
5901 	pos += offset;
5902 	if (pos < 0)
5903 		return B_BAD_VALUE;
5904 
5905 	return descriptor->pos = pos;
5906 }
5907 
5908 
5909 static status_t
5910 file_select(struct file_descriptor* descriptor, uint8 event,
5911 	struct selectsync* sync)
5912 {
5913 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5914 
5915 	struct vnode* vnode = descriptor->u.vnode;
5916 
5917 	// If the FS has no select() hook, notify select() now.
5918 	if (!HAS_FS_CALL(vnode, select)) {
5919 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5920 			return notify_select_event(sync, event);
5921 		else
5922 			return B_OK;
5923 	}
5924 
5925 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5926 }
5927 
5928 
5929 static status_t
5930 file_deselect(struct file_descriptor* descriptor, uint8 event,
5931 	struct selectsync* sync)
5932 {
5933 	struct vnode* vnode = descriptor->u.vnode;
5934 
5935 	if (!HAS_FS_CALL(vnode, deselect))
5936 		return B_OK;
5937 
5938 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5939 }
5940 
5941 
5942 static status_t
5943 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5944 	bool kernel)
5945 {
5946 	struct vnode* vnode;
5947 	status_t status;
5948 
5949 	if (name == NULL || *name == '\0')
5950 		return B_BAD_VALUE;
5951 
5952 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5953 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5954 
5955 	status = get_vnode(mountID, parentID, &vnode, true, false);
5956 	if (status != B_OK)
5957 		return status;
5958 
5959 	if (HAS_FS_CALL(vnode, create_dir))
5960 		status = FS_CALL(vnode, create_dir, name, perms);
5961 	else
5962 		status = B_READ_ONLY_DEVICE;
5963 
5964 	put_vnode(vnode);
5965 	return status;
5966 }
5967 
5968 
5969 static status_t
5970 dir_create(int fd, char* path, int perms, bool kernel)
5971 {
5972 	char filename[B_FILE_NAME_LENGTH];
5973 	struct vnode* vnode;
5974 	status_t status;
5975 
5976 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5977 		kernel));
5978 
5979 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5980 	if (status < 0)
5981 		return status;
5982 
5983 	if (HAS_FS_CALL(vnode, create_dir)) {
5984 		status = FS_CALL(vnode, create_dir, filename, perms);
5985 	} else
5986 		status = B_READ_ONLY_DEVICE;
5987 
5988 	put_vnode(vnode);
5989 	return status;
5990 }
5991 
5992 
5993 static int
5994 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5995 {
5996 	FUNCTION(("dir_open_entry_ref()\n"));
5997 
5998 	if (name && name[0] == '\0')
5999 		return B_BAD_VALUE;
6000 
6001 	// get the vnode matching the entry_ref/node_ref
6002 	struct vnode* vnode;
6003 	status_t status;
6004 	if (name) {
6005 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
6006 			&vnode);
6007 	} else
6008 		status = get_vnode(mountID, parentID, &vnode, true, false);
6009 	if (status != B_OK)
6010 		return status;
6011 
6012 	int newFD = open_dir_vnode(vnode, kernel);
6013 	if (newFD >= 0) {
6014 		// The vnode reference has been transferred to the FD
6015 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
6016 			vnode->id, name);
6017 	} else
6018 		put_vnode(vnode);
6019 
6020 	return newFD;
6021 }
6022 
6023 
6024 static int
6025 dir_open(int fd, char* path, bool kernel)
6026 {
6027 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
6028 		kernel));
6029 
6030 	// get the vnode matching the vnode + path combination
6031 	struct vnode* vnode = NULL;
6032 	ino_t parentID;
6033 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
6034 		kernel);
6035 	if (status != B_OK)
6036 		return status;
6037 
6038 	// open the dir
6039 	int newFD = open_dir_vnode(vnode, kernel);
6040 	if (newFD >= 0) {
6041 		// The vnode reference has been transferred to the FD
6042 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6043 			parentID, vnode->id, NULL);
6044 	} else
6045 		put_vnode(vnode);
6046 
6047 	return newFD;
6048 }
6049 
6050 
6051 static status_t
6052 dir_close(struct file_descriptor* descriptor)
6053 {
6054 	struct vnode* vnode = descriptor->u.vnode;
6055 
6056 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6057 
6058 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6059 		vnode->id);
6060 	if (HAS_FS_CALL(vnode, close_dir))
6061 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6062 
6063 	return B_OK;
6064 }
6065 
6066 
6067 static void
6068 dir_free_fd(struct file_descriptor* descriptor)
6069 {
6070 	struct vnode* vnode = descriptor->u.vnode;
6071 
6072 	if (vnode != NULL) {
6073 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6074 		put_vnode(vnode);
6075 	}
6076 }
6077 
6078 
6079 static status_t
6080 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6081 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6082 {
6083 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6084 		bufferSize, _count);
6085 }
6086 
6087 
6088 static status_t
6089 fix_dirent(struct vnode* parent, struct dirent* entry,
6090 	struct io_context* ioContext)
6091 {
6092 	// set d_pdev and d_pino
6093 	entry->d_pdev = parent->device;
6094 	entry->d_pino = parent->id;
6095 
6096 	// If this is the ".." entry and the directory covering another vnode,
6097 	// we need to replace d_dev and d_ino with the actual values.
6098 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6099 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6100 			ioContext);
6101 	}
6102 
6103 	// resolve covered vnodes
6104 	ReadLocker _(&sVnodeLock);
6105 
6106 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6107 	if (vnode != NULL && vnode->covered_by != NULL) {
6108 		do {
6109 			vnode = vnode->covered_by;
6110 		} while (vnode->covered_by != NULL);
6111 
6112 		entry->d_dev = vnode->device;
6113 		entry->d_ino = vnode->id;
6114 	}
6115 
6116 	return B_OK;
6117 }
6118 
6119 
6120 static status_t
6121 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6122 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6123 {
6124 	if (!HAS_FS_CALL(vnode, read_dir))
6125 		return B_UNSUPPORTED;
6126 
6127 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6128 		_count);
6129 	if (error != B_OK)
6130 		return error;
6131 
6132 	// we need to adjust the read dirents
6133 	uint32 count = *_count;
6134 	for (uint32 i = 0; i < count; i++) {
6135 		error = fix_dirent(vnode, buffer, ioContext);
6136 		if (error != B_OK)
6137 			return error;
6138 
6139 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6140 	}
6141 
6142 	return error;
6143 }
6144 
6145 
6146 static status_t
6147 dir_rewind(struct file_descriptor* descriptor)
6148 {
6149 	struct vnode* vnode = descriptor->u.vnode;
6150 
6151 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6152 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6153 	}
6154 
6155 	return B_UNSUPPORTED;
6156 }
6157 
6158 
6159 static status_t
6160 dir_remove(int fd, char* path, bool kernel)
6161 {
6162 	char name[B_FILE_NAME_LENGTH];
6163 	struct vnode* directory;
6164 	status_t status;
6165 
6166 	if (path != NULL) {
6167 		// we need to make sure our path name doesn't stop with "/", ".",
6168 		// or ".."
6169 		char* lastSlash;
6170 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6171 			char* leaf = lastSlash + 1;
6172 			if (!strcmp(leaf, ".."))
6173 				return B_NOT_ALLOWED;
6174 
6175 			// omit multiple slashes
6176 			while (lastSlash > path && lastSlash[-1] == '/')
6177 				lastSlash--;
6178 
6179 			if (leaf[0]
6180 				&& strcmp(leaf, ".")) {
6181 				break;
6182 			}
6183 			// "name/" -> "name", or "name/." -> "name"
6184 			lastSlash[0] = '\0';
6185 		}
6186 
6187 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6188 			return B_NOT_ALLOWED;
6189 	}
6190 
6191 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6192 	if (status != B_OK)
6193 		return status;
6194 
6195 	if (HAS_FS_CALL(directory, remove_dir))
6196 		status = FS_CALL(directory, remove_dir, name);
6197 	else
6198 		status = B_READ_ONLY_DEVICE;
6199 
6200 	put_vnode(directory);
6201 	return status;
6202 }
6203 
6204 
6205 static status_t
6206 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6207 	size_t length)
6208 {
6209 	struct vnode* vnode = descriptor->u.vnode;
6210 
6211 	if (HAS_FS_CALL(vnode, ioctl))
6212 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6213 
6214 	return B_DEV_INVALID_IOCTL;
6215 }
6216 
6217 
6218 static status_t
6219 common_fcntl(int fd, int op, size_t argument, bool kernel)
6220 {
6221 	struct flock flock;
6222 
6223 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6224 		fd, op, argument, kernel ? "kernel" : "user"));
6225 
6226 	struct io_context* context = get_current_io_context(kernel);
6227 
6228 	struct file_descriptor* descriptor = get_fd(context, fd);
6229 	if (descriptor == NULL)
6230 		return B_FILE_ERROR;
6231 
6232 	struct vnode* vnode = fd_vnode(descriptor);
6233 
6234 	status_t status = B_OK;
6235 
6236 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6237 		if (descriptor->type != FDTYPE_FILE)
6238 			status = B_BAD_VALUE;
6239 		else if (kernel)
6240 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6241 		else if (user_memcpy(&flock, (struct flock*)argument,
6242 				sizeof(struct flock)) != B_OK)
6243 			status = B_BAD_ADDRESS;
6244 		if (status != B_OK) {
6245 			put_fd(descriptor);
6246 			return status;
6247 		}
6248 	}
6249 
6250 	switch (op) {
6251 		case F_SETFD:
6252 		{
6253 			// Set file descriptor flags
6254 
6255 			// O_CLOEXEC is the only flag available at this time
6256 			mutex_lock(&context->io_mutex);
6257 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6258 			mutex_unlock(&context->io_mutex);
6259 
6260 			status = B_OK;
6261 			break;
6262 		}
6263 
6264 		case F_GETFD:
6265 		{
6266 			// Get file descriptor flags
6267 			mutex_lock(&context->io_mutex);
6268 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6269 			mutex_unlock(&context->io_mutex);
6270 			break;
6271 		}
6272 
6273 		case F_SETFL:
6274 			// Set file descriptor open mode
6275 
6276 			// we only accept changes to O_APPEND and O_NONBLOCK
6277 			argument &= O_APPEND | O_NONBLOCK;
6278 			if (descriptor->ops->fd_set_flags != NULL) {
6279 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6280 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6281 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6282 					(int)argument);
6283 			} else
6284 				status = B_UNSUPPORTED;
6285 
6286 			if (status == B_OK) {
6287 				// update this descriptor's open_mode field
6288 				descriptor->open_mode = (descriptor->open_mode
6289 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6290 			}
6291 
6292 			break;
6293 
6294 		case F_GETFL:
6295 			// Get file descriptor open mode
6296 			status = descriptor->open_mode;
6297 			break;
6298 
6299 		case F_DUPFD:
6300 		case F_DUPFD_CLOEXEC:
6301 		{
6302 			status = new_fd_etc(context, descriptor, (int)argument);
6303 			if (status >= 0) {
6304 				mutex_lock(&context->io_mutex);
6305 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6306 				mutex_unlock(&context->io_mutex);
6307 
6308 				atomic_add(&descriptor->ref_count, 1);
6309 			}
6310 			break;
6311 		}
6312 
6313 		case F_GETLK:
6314 			if (vnode != NULL) {
6315 				struct flock normalizedLock;
6316 
6317 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6318 				status = normalize_flock(descriptor, &normalizedLock);
6319 				if (status != B_OK)
6320 					break;
6321 
6322 				if (HAS_FS_CALL(vnode, test_lock)) {
6323 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6324 						&normalizedLock);
6325 				} else
6326 					status = test_advisory_lock(vnode, &normalizedLock);
6327 				if (status == B_OK) {
6328 					if (normalizedLock.l_type == F_UNLCK) {
6329 						// no conflicting lock found, copy back the same struct
6330 						// we were given except change type to F_UNLCK
6331 						flock.l_type = F_UNLCK;
6332 						if (kernel) {
6333 							memcpy((struct flock*)argument, &flock,
6334 								sizeof(struct flock));
6335 						} else {
6336 							status = user_memcpy((struct flock*)argument,
6337 								&flock, sizeof(struct flock));
6338 						}
6339 					} else {
6340 						// a conflicting lock was found, copy back its range and
6341 						// type
6342 						if (normalizedLock.l_len == OFF_MAX)
6343 							normalizedLock.l_len = 0;
6344 
6345 						if (kernel) {
6346 							memcpy((struct flock*)argument,
6347 								&normalizedLock, sizeof(struct flock));
6348 						} else {
6349 							status = user_memcpy((struct flock*)argument,
6350 								&normalizedLock, sizeof(struct flock));
6351 						}
6352 					}
6353 				}
6354 			} else
6355 				status = B_BAD_VALUE;
6356 			break;
6357 
6358 		case F_SETLK:
6359 		case F_SETLKW:
6360 			status = normalize_flock(descriptor, &flock);
6361 			if (status != B_OK)
6362 				break;
6363 
6364 			if (vnode == NULL) {
6365 				status = B_BAD_VALUE;
6366 			} else if (flock.l_type == F_UNLCK) {
6367 				if (HAS_FS_CALL(vnode, release_lock)) {
6368 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6369 						&flock);
6370 				} else {
6371 					status = release_advisory_lock(vnode, context, NULL,
6372 						&flock);
6373 				}
6374 			} else {
6375 				// the open mode must match the lock type
6376 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6377 						&& flock.l_type == F_WRLCK)
6378 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6379 						&& flock.l_type == F_RDLCK))
6380 					status = B_FILE_ERROR;
6381 				else {
6382 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6383 						status = FS_CALL(vnode, acquire_lock,
6384 							descriptor->cookie, &flock, op == F_SETLKW);
6385 					} else {
6386 						status = acquire_advisory_lock(vnode, context, NULL,
6387 							&flock, op == F_SETLKW);
6388 					}
6389 				}
6390 			}
6391 			break;
6392 
6393 		// ToDo: add support for more ops?
6394 
6395 		default:
6396 			status = B_BAD_VALUE;
6397 	}
6398 
6399 	put_fd(descriptor);
6400 	return status;
6401 }
6402 
6403 
6404 static status_t
6405 common_sync(int fd, bool kernel)
6406 {
6407 	struct file_descriptor* descriptor;
6408 	struct vnode* vnode;
6409 	status_t status;
6410 
6411 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6412 
6413 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6414 	if (descriptor == NULL)
6415 		return B_FILE_ERROR;
6416 
6417 	if (HAS_FS_CALL(vnode, fsync))
6418 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6419 	else
6420 		status = B_UNSUPPORTED;
6421 
6422 	put_fd(descriptor);
6423 	return status;
6424 }
6425 
6426 
6427 static status_t
6428 common_lock_node(int fd, bool kernel)
6429 {
6430 	struct file_descriptor* descriptor;
6431 	struct vnode* vnode;
6432 
6433 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6434 	if (descriptor == NULL)
6435 		return B_FILE_ERROR;
6436 
6437 	status_t status = B_OK;
6438 
6439 	// We need to set the locking atomically - someone
6440 	// else might set one at the same time
6441 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6442 			(file_descriptor*)NULL) != NULL)
6443 		status = B_BUSY;
6444 
6445 	put_fd(descriptor);
6446 	return status;
6447 }
6448 
6449 
6450 static status_t
6451 common_unlock_node(int fd, bool kernel)
6452 {
6453 	struct file_descriptor* descriptor;
6454 	struct vnode* vnode;
6455 
6456 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6457 	if (descriptor == NULL)
6458 		return B_FILE_ERROR;
6459 
6460 	status_t status = B_OK;
6461 
6462 	// We need to set the locking atomically - someone
6463 	// else might set one at the same time
6464 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6465 			(file_descriptor*)NULL, descriptor) != descriptor)
6466 		status = B_BAD_VALUE;
6467 
6468 	put_fd(descriptor);
6469 	return status;
6470 }
6471 
6472 
6473 static status_t
6474 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6475 {
6476 	struct file_descriptor* descriptor;
6477 	struct vnode* vnode;
6478 
6479 	if (offset < 0 || length == 0)
6480 		return B_BAD_VALUE;
6481 	if (offset > OFF_MAX - length)
6482 		return B_FILE_TOO_LARGE;
6483 
6484 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6485 	if (descriptor == NULL || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6486 		return B_FILE_ERROR;
6487 
6488 	switch (vnode->Type() & S_IFMT) {
6489 		case S_IFIFO:
6490 		case S_IFSOCK:
6491 			return ESPIPE;
6492 
6493 		case S_IFBLK:
6494 		case S_IFCHR:
6495 		case S_IFDIR:
6496 		case S_IFLNK:
6497 			return B_DEVICE_NOT_FOUND;
6498 
6499 		case S_IFREG:
6500 			break;
6501 	}
6502 
6503 	status_t status = B_OK;
6504 	if (HAS_FS_CALL(vnode, preallocate)) {
6505 		status = FS_CALL(vnode, preallocate, offset, length);
6506 	} else {
6507 		status = HAS_FS_CALL(vnode, write)
6508 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6509 	}
6510 
6511 	return status;
6512 }
6513 
6514 
6515 static status_t
6516 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6517 	bool kernel)
6518 {
6519 	struct vnode* vnode;
6520 	status_t status;
6521 
6522 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6523 	if (status != B_OK)
6524 		return status;
6525 
6526 	if (HAS_FS_CALL(vnode, read_symlink)) {
6527 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6528 	} else
6529 		status = B_BAD_VALUE;
6530 
6531 	put_vnode(vnode);
6532 	return status;
6533 }
6534 
6535 
6536 static status_t
6537 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6538 	bool kernel)
6539 {
6540 	// path validity checks have to be in the calling function!
6541 	char name[B_FILE_NAME_LENGTH];
6542 	struct vnode* vnode;
6543 	status_t status;
6544 
6545 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6546 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6547 
6548 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6549 	if (status != B_OK)
6550 		return status;
6551 
6552 	if (HAS_FS_CALL(vnode, create_symlink))
6553 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6554 	else {
6555 		status = HAS_FS_CALL(vnode, write)
6556 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6557 	}
6558 
6559 	put_vnode(vnode);
6560 
6561 	return status;
6562 }
6563 
6564 
6565 static status_t
6566 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6567 	bool traverseLeafLink, bool kernel)
6568 {
6569 	// path validity checks have to be in the calling function!
6570 
6571 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6572 		toPath, kernel));
6573 
6574 	char name[B_FILE_NAME_LENGTH];
6575 	struct vnode* directory;
6576 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6577 		kernel);
6578 	if (status != B_OK)
6579 		return status;
6580 
6581 	struct vnode* vnode;
6582 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6583 		kernel);
6584 	if (status != B_OK)
6585 		goto err;
6586 
6587 	if (directory->mount != vnode->mount) {
6588 		status = B_CROSS_DEVICE_LINK;
6589 		goto err1;
6590 	}
6591 
6592 	if (HAS_FS_CALL(directory, link))
6593 		status = FS_CALL(directory, link, name, vnode);
6594 	else
6595 		status = B_READ_ONLY_DEVICE;
6596 
6597 err1:
6598 	put_vnode(vnode);
6599 err:
6600 	put_vnode(directory);
6601 
6602 	return status;
6603 }
6604 
6605 
6606 static status_t
6607 common_unlink(int fd, char* path, bool kernel)
6608 {
6609 	char filename[B_FILE_NAME_LENGTH];
6610 	struct vnode* vnode;
6611 	status_t status;
6612 
6613 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6614 		kernel));
6615 
6616 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6617 	if (status < 0)
6618 		return status;
6619 
6620 	if (HAS_FS_CALL(vnode, unlink))
6621 		status = FS_CALL(vnode, unlink, filename);
6622 	else
6623 		status = B_READ_ONLY_DEVICE;
6624 
6625 	put_vnode(vnode);
6626 
6627 	return status;
6628 }
6629 
6630 
6631 static status_t
6632 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6633 {
6634 	struct vnode* vnode;
6635 	status_t status;
6636 
6637 	// TODO: honor effectiveUserGroup argument
6638 
6639 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6640 	if (status != B_OK)
6641 		return status;
6642 
6643 	if (HAS_FS_CALL(vnode, access))
6644 		status = FS_CALL(vnode, access, mode);
6645 	else
6646 		status = B_OK;
6647 
6648 	put_vnode(vnode);
6649 
6650 	return status;
6651 }
6652 
6653 
6654 static status_t
6655 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6656 {
6657 	struct vnode* fromVnode;
6658 	struct vnode* toVnode;
6659 	char fromName[B_FILE_NAME_LENGTH];
6660 	char toName[B_FILE_NAME_LENGTH];
6661 	status_t status;
6662 
6663 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6664 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6665 
6666 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6667 	if (status != B_OK)
6668 		return status;
6669 
6670 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6671 	if (status != B_OK)
6672 		goto err1;
6673 
6674 	if (fromVnode->device != toVnode->device) {
6675 		status = B_CROSS_DEVICE_LINK;
6676 		goto err2;
6677 	}
6678 
6679 	if (fromName[0] == '\0' || toName[0] == '\0'
6680 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6681 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6682 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6683 		status = B_BAD_VALUE;
6684 		goto err2;
6685 	}
6686 
6687 	if (HAS_FS_CALL(fromVnode, rename))
6688 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6689 	else
6690 		status = B_READ_ONLY_DEVICE;
6691 
6692 err2:
6693 	put_vnode(toVnode);
6694 err1:
6695 	put_vnode(fromVnode);
6696 
6697 	return status;
6698 }
6699 
6700 
6701 static status_t
6702 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6703 {
6704 	struct vnode* vnode = descriptor->u.vnode;
6705 
6706 	FUNCTION(("common_read_stat: stat %p\n", stat));
6707 
6708 	// TODO: remove this once all file systems properly set them!
6709 	stat->st_crtim.tv_nsec = 0;
6710 	stat->st_ctim.tv_nsec = 0;
6711 	stat->st_mtim.tv_nsec = 0;
6712 	stat->st_atim.tv_nsec = 0;
6713 
6714 	return vfs_stat_vnode(vnode, stat);
6715 }
6716 
6717 
6718 static status_t
6719 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6720 	int statMask)
6721 {
6722 	struct vnode* vnode = descriptor->u.vnode;
6723 
6724 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6725 		vnode, stat, statMask));
6726 
6727 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6728 		&& (statMask & B_STAT_SIZE) != 0) {
6729 		return B_BAD_VALUE;
6730 	}
6731 
6732 	if (!HAS_FS_CALL(vnode, write_stat))
6733 		return B_READ_ONLY_DEVICE;
6734 
6735 	return FS_CALL(vnode, write_stat, stat, statMask);
6736 }
6737 
6738 
6739 static status_t
6740 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6741 	struct stat* stat, bool kernel)
6742 {
6743 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6744 		stat));
6745 
6746 	struct vnode* vnode;
6747 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6748 		NULL, kernel);
6749 	if (status != B_OK)
6750 		return status;
6751 
6752 	status = vfs_stat_vnode(vnode, stat);
6753 
6754 	put_vnode(vnode);
6755 	return status;
6756 }
6757 
6758 
6759 static status_t
6760 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6761 	const struct stat* stat, int statMask, bool kernel)
6762 {
6763 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6764 		"kernel %d\n", fd, path, stat, statMask, kernel));
6765 
6766 	struct vnode* vnode;
6767 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6768 		NULL, kernel);
6769 	if (status != B_OK)
6770 		return status;
6771 
6772 	if (HAS_FS_CALL(vnode, write_stat))
6773 		status = FS_CALL(vnode, write_stat, stat, statMask);
6774 	else
6775 		status = B_READ_ONLY_DEVICE;
6776 
6777 	put_vnode(vnode);
6778 
6779 	return status;
6780 }
6781 
6782 
6783 static int
6784 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6785 {
6786 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6787 		kernel));
6788 
6789 	struct vnode* vnode;
6790 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6791 		NULL, kernel);
6792 	if (status != B_OK)
6793 		return status;
6794 
6795 	status = open_attr_dir_vnode(vnode, kernel);
6796 	if (status < 0)
6797 		put_vnode(vnode);
6798 
6799 	return status;
6800 }
6801 
6802 
6803 static status_t
6804 attr_dir_close(struct file_descriptor* descriptor)
6805 {
6806 	struct vnode* vnode = descriptor->u.vnode;
6807 
6808 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6809 
6810 	if (HAS_FS_CALL(vnode, close_attr_dir))
6811 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6812 
6813 	return B_OK;
6814 }
6815 
6816 
6817 static void
6818 attr_dir_free_fd(struct file_descriptor* descriptor)
6819 {
6820 	struct vnode* vnode = descriptor->u.vnode;
6821 
6822 	if (vnode != NULL) {
6823 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6824 		put_vnode(vnode);
6825 	}
6826 }
6827 
6828 
6829 static status_t
6830 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6831 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6832 {
6833 	struct vnode* vnode = descriptor->u.vnode;
6834 
6835 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6836 
6837 	if (HAS_FS_CALL(vnode, read_attr_dir))
6838 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6839 			bufferSize, _count);
6840 
6841 	return B_UNSUPPORTED;
6842 }
6843 
6844 
6845 static status_t
6846 attr_dir_rewind(struct file_descriptor* descriptor)
6847 {
6848 	struct vnode* vnode = descriptor->u.vnode;
6849 
6850 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6851 
6852 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6853 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6854 
6855 	return B_UNSUPPORTED;
6856 }
6857 
6858 
6859 static int
6860 attr_create(int fd, char* path, const char* name, uint32 type,
6861 	int openMode, bool kernel)
6862 {
6863 	if (name == NULL || *name == '\0')
6864 		return B_BAD_VALUE;
6865 
6866 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6867 	struct vnode* vnode;
6868 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6869 		kernel);
6870 	if (status != B_OK)
6871 		return status;
6872 
6873 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6874 		status = B_LINK_LIMIT;
6875 		goto err;
6876 	}
6877 
6878 	if (!HAS_FS_CALL(vnode, create_attr)) {
6879 		status = B_READ_ONLY_DEVICE;
6880 		goto err;
6881 	}
6882 
6883 	void* cookie;
6884 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6885 	if (status != B_OK)
6886 		goto err;
6887 
6888 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6889 	if (fd >= 0)
6890 		return fd;
6891 
6892 	status = fd;
6893 
6894 	FS_CALL(vnode, close_attr, cookie);
6895 	FS_CALL(vnode, free_attr_cookie, cookie);
6896 
6897 	FS_CALL(vnode, remove_attr, name);
6898 
6899 err:
6900 	put_vnode(vnode);
6901 
6902 	return status;
6903 }
6904 
6905 
6906 static int
6907 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6908 {
6909 	if (name == NULL || *name == '\0')
6910 		return B_BAD_VALUE;
6911 
6912 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6913 	struct vnode* vnode;
6914 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6915 		kernel);
6916 	if (status != B_OK)
6917 		return status;
6918 
6919 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6920 		status = B_LINK_LIMIT;
6921 		goto err;
6922 	}
6923 
6924 	if (!HAS_FS_CALL(vnode, open_attr)) {
6925 		status = B_UNSUPPORTED;
6926 		goto err;
6927 	}
6928 
6929 	void* cookie;
6930 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6931 	if (status != B_OK)
6932 		goto err;
6933 
6934 	// now we only need a file descriptor for this attribute and we're done
6935 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6936 	if (fd >= 0)
6937 		return fd;
6938 
6939 	status = fd;
6940 
6941 	FS_CALL(vnode, close_attr, cookie);
6942 	FS_CALL(vnode, free_attr_cookie, cookie);
6943 
6944 err:
6945 	put_vnode(vnode);
6946 
6947 	return status;
6948 }
6949 
6950 
6951 static status_t
6952 attr_close(struct file_descriptor* descriptor)
6953 {
6954 	struct vnode* vnode = descriptor->u.vnode;
6955 
6956 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6957 
6958 	if (HAS_FS_CALL(vnode, close_attr))
6959 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6960 
6961 	return B_OK;
6962 }
6963 
6964 
6965 static void
6966 attr_free_fd(struct file_descriptor* descriptor)
6967 {
6968 	struct vnode* vnode = descriptor->u.vnode;
6969 
6970 	if (vnode != NULL) {
6971 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6972 		put_vnode(vnode);
6973 	}
6974 }
6975 
6976 
6977 static status_t
6978 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6979 	size_t* length)
6980 {
6981 	struct vnode* vnode = descriptor->u.vnode;
6982 
6983 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6984 		pos, length, *length));
6985 
6986 	if (!HAS_FS_CALL(vnode, read_attr))
6987 		return B_UNSUPPORTED;
6988 
6989 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6990 }
6991 
6992 
6993 static status_t
6994 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6995 	size_t* length)
6996 {
6997 	struct vnode* vnode = descriptor->u.vnode;
6998 
6999 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
7000 		length));
7001 
7002 	if (!HAS_FS_CALL(vnode, write_attr))
7003 		return B_UNSUPPORTED;
7004 
7005 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
7006 }
7007 
7008 
7009 static off_t
7010 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
7011 {
7012 	off_t offset;
7013 
7014 	switch (seekType) {
7015 		case SEEK_SET:
7016 			offset = 0;
7017 			break;
7018 		case SEEK_CUR:
7019 			offset = descriptor->pos;
7020 			break;
7021 		case SEEK_END:
7022 		{
7023 			struct vnode* vnode = descriptor->u.vnode;
7024 			if (!HAS_FS_CALL(vnode, read_stat))
7025 				return B_UNSUPPORTED;
7026 
7027 			struct stat stat;
7028 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
7029 				&stat);
7030 			if (status != B_OK)
7031 				return status;
7032 
7033 			offset = stat.st_size;
7034 			break;
7035 		}
7036 		default:
7037 			return B_BAD_VALUE;
7038 	}
7039 
7040 	// assumes off_t is 64 bits wide
7041 	if (offset > 0 && LONGLONG_MAX - offset < pos)
7042 		return B_BUFFER_OVERFLOW;
7043 
7044 	pos += offset;
7045 	if (pos < 0)
7046 		return B_BAD_VALUE;
7047 
7048 	return descriptor->pos = pos;
7049 }
7050 
7051 
7052 static status_t
7053 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7054 {
7055 	struct vnode* vnode = descriptor->u.vnode;
7056 
7057 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7058 
7059 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7060 		return B_UNSUPPORTED;
7061 
7062 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7063 }
7064 
7065 
7066 static status_t
7067 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7068 	int statMask)
7069 {
7070 	struct vnode* vnode = descriptor->u.vnode;
7071 
7072 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7073 
7074 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7075 		return B_READ_ONLY_DEVICE;
7076 
7077 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7078 }
7079 
7080 
7081 static status_t
7082 attr_remove(int fd, const char* name, bool kernel)
7083 {
7084 	struct file_descriptor* descriptor;
7085 	struct vnode* vnode;
7086 	status_t status;
7087 
7088 	if (name == NULL || *name == '\0')
7089 		return B_BAD_VALUE;
7090 
7091 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7092 		kernel));
7093 
7094 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
7095 	if (descriptor == NULL)
7096 		return B_FILE_ERROR;
7097 
7098 	if (HAS_FS_CALL(vnode, remove_attr))
7099 		status = FS_CALL(vnode, remove_attr, name);
7100 	else
7101 		status = B_READ_ONLY_DEVICE;
7102 
7103 	put_fd(descriptor);
7104 
7105 	return status;
7106 }
7107 
7108 
7109 static status_t
7110 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7111 	bool kernel)
7112 {
7113 	struct file_descriptor* fromDescriptor;
7114 	struct file_descriptor* toDescriptor;
7115 	struct vnode* fromVnode;
7116 	struct vnode* toVnode;
7117 	status_t status;
7118 
7119 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7120 		|| *toName == '\0')
7121 		return B_BAD_VALUE;
7122 
7123 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7124 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7125 
7126 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
7127 	if (fromDescriptor == NULL)
7128 		return B_FILE_ERROR;
7129 
7130 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
7131 	if (toDescriptor == NULL) {
7132 		status = B_FILE_ERROR;
7133 		goto err;
7134 	}
7135 
7136 	// are the files on the same volume?
7137 	if (fromVnode->device != toVnode->device) {
7138 		status = B_CROSS_DEVICE_LINK;
7139 		goto err1;
7140 	}
7141 
7142 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7143 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7144 	} else
7145 		status = B_READ_ONLY_DEVICE;
7146 
7147 err1:
7148 	put_fd(toDescriptor);
7149 err:
7150 	put_fd(fromDescriptor);
7151 
7152 	return status;
7153 }
7154 
7155 
7156 static int
7157 index_dir_open(dev_t mountID, bool kernel)
7158 {
7159 	struct fs_mount* mount;
7160 	void* cookie;
7161 
7162 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7163 		kernel));
7164 
7165 	status_t status = get_mount(mountID, &mount);
7166 	if (status != B_OK)
7167 		return status;
7168 
7169 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7170 		status = B_UNSUPPORTED;
7171 		goto error;
7172 	}
7173 
7174 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7175 	if (status != B_OK)
7176 		goto error;
7177 
7178 	// get fd for the index directory
7179 	int fd;
7180 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7181 	if (fd >= 0)
7182 		return fd;
7183 
7184 	// something went wrong
7185 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7186 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7187 
7188 	status = fd;
7189 
7190 error:
7191 	put_mount(mount);
7192 	return status;
7193 }
7194 
7195 
7196 static status_t
7197 index_dir_close(struct file_descriptor* descriptor)
7198 {
7199 	struct fs_mount* mount = descriptor->u.mount;
7200 
7201 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7202 
7203 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7204 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7205 
7206 	return B_OK;
7207 }
7208 
7209 
7210 static void
7211 index_dir_free_fd(struct file_descriptor* descriptor)
7212 {
7213 	struct fs_mount* mount = descriptor->u.mount;
7214 
7215 	if (mount != NULL) {
7216 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7217 		put_mount(mount);
7218 	}
7219 }
7220 
7221 
7222 static status_t
7223 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7224 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7225 {
7226 	struct fs_mount* mount = descriptor->u.mount;
7227 
7228 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7229 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7230 			bufferSize, _count);
7231 	}
7232 
7233 	return B_UNSUPPORTED;
7234 }
7235 
7236 
7237 static status_t
7238 index_dir_rewind(struct file_descriptor* descriptor)
7239 {
7240 	struct fs_mount* mount = descriptor->u.mount;
7241 
7242 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7243 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7244 
7245 	return B_UNSUPPORTED;
7246 }
7247 
7248 
7249 static status_t
7250 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7251 	bool kernel)
7252 {
7253 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7254 		mountID, name, kernel));
7255 
7256 	struct fs_mount* mount;
7257 	status_t status = get_mount(mountID, &mount);
7258 	if (status != B_OK)
7259 		return status;
7260 
7261 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7262 		status = B_READ_ONLY_DEVICE;
7263 		goto out;
7264 	}
7265 
7266 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7267 
7268 out:
7269 	put_mount(mount);
7270 	return status;
7271 }
7272 
7273 
7274 #if 0
7275 static status_t
7276 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7277 {
7278 	struct vnode* vnode = descriptor->u.vnode;
7279 
7280 	// ToDo: currently unused!
7281 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7282 	if (!HAS_FS_CALL(vnode, read_index_stat))
7283 		return B_UNSUPPORTED;
7284 
7285 	return B_UNSUPPORTED;
7286 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7287 }
7288 
7289 
7290 static void
7291 index_free_fd(struct file_descriptor* descriptor)
7292 {
7293 	struct vnode* vnode = descriptor->u.vnode;
7294 
7295 	if (vnode != NULL) {
7296 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7297 		put_vnode(vnode);
7298 	}
7299 }
7300 #endif
7301 
7302 
7303 static status_t
7304 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7305 	bool kernel)
7306 {
7307 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7308 		mountID, name, kernel));
7309 
7310 	struct fs_mount* mount;
7311 	status_t status = get_mount(mountID, &mount);
7312 	if (status != B_OK)
7313 		return status;
7314 
7315 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7316 		status = B_UNSUPPORTED;
7317 		goto out;
7318 	}
7319 
7320 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7321 
7322 out:
7323 	put_mount(mount);
7324 	return status;
7325 }
7326 
7327 
7328 static status_t
7329 index_remove(dev_t mountID, const char* name, bool kernel)
7330 {
7331 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7332 		mountID, name, kernel));
7333 
7334 	struct fs_mount* mount;
7335 	status_t status = get_mount(mountID, &mount);
7336 	if (status != B_OK)
7337 		return status;
7338 
7339 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7340 		status = B_READ_ONLY_DEVICE;
7341 		goto out;
7342 	}
7343 
7344 	status = FS_MOUNT_CALL(mount, remove_index, name);
7345 
7346 out:
7347 	put_mount(mount);
7348 	return status;
7349 }
7350 
7351 
7352 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7353 		It would be nice if the FS would find some more kernel support
7354 		for them.
7355 		For example, query parsing should be moved into the kernel.
7356 */
7357 static int
7358 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7359 	int32 token, bool kernel)
7360 {
7361 	struct fs_mount* mount;
7362 	void* cookie;
7363 
7364 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7365 		device, query, kernel));
7366 
7367 	status_t status = get_mount(device, &mount);
7368 	if (status != B_OK)
7369 		return status;
7370 
7371 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7372 		status = B_UNSUPPORTED;
7373 		goto error;
7374 	}
7375 
7376 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7377 		&cookie);
7378 	if (status != B_OK)
7379 		goto error;
7380 
7381 	// get fd for the index directory
7382 	int fd;
7383 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7384 	if (fd >= 0)
7385 		return fd;
7386 
7387 	status = fd;
7388 
7389 	// something went wrong
7390 	FS_MOUNT_CALL(mount, close_query, cookie);
7391 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7392 
7393 error:
7394 	put_mount(mount);
7395 	return status;
7396 }
7397 
7398 
7399 static status_t
7400 query_close(struct file_descriptor* descriptor)
7401 {
7402 	struct fs_mount* mount = descriptor->u.mount;
7403 
7404 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7405 
7406 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7407 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7408 
7409 	return B_OK;
7410 }
7411 
7412 
7413 static void
7414 query_free_fd(struct file_descriptor* descriptor)
7415 {
7416 	struct fs_mount* mount = descriptor->u.mount;
7417 
7418 	if (mount != NULL) {
7419 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7420 		put_mount(mount);
7421 	}
7422 }
7423 
7424 
7425 static status_t
7426 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7427 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7428 {
7429 	struct fs_mount* mount = descriptor->u.mount;
7430 
7431 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7432 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7433 			bufferSize, _count);
7434 	}
7435 
7436 	return B_UNSUPPORTED;
7437 }
7438 
7439 
7440 static status_t
7441 query_rewind(struct file_descriptor* descriptor)
7442 {
7443 	struct fs_mount* mount = descriptor->u.mount;
7444 
7445 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7446 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7447 
7448 	return B_UNSUPPORTED;
7449 }
7450 
7451 
7452 //	#pragma mark - General File System functions
7453 
7454 
7455 static dev_t
7456 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7457 	const char* args, bool kernel)
7458 {
7459 	struct ::fs_mount* mount;
7460 	status_t status = B_OK;
7461 	fs_volume* volume = NULL;
7462 	int32 layer = 0;
7463 	Vnode* coveredNode = NULL;
7464 
7465 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7466 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7467 
7468 	// The path is always safe, we just have to make sure that fsName is
7469 	// almost valid - we can't make any assumptions about args, though.
7470 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7471 	// We'll get it from the DDM later.
7472 	if (fsName == NULL) {
7473 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7474 			return B_BAD_VALUE;
7475 	} else if (fsName[0] == '\0')
7476 		return B_BAD_VALUE;
7477 
7478 	RecursiveLocker mountOpLocker(sMountOpLock);
7479 
7480 	// Helper to delete a newly created file device on failure.
7481 	// Not exactly beautiful, but helps to keep the code below cleaner.
7482 	struct FileDeviceDeleter {
7483 		FileDeviceDeleter() : id(-1) {}
7484 		~FileDeviceDeleter()
7485 		{
7486 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7487 		}
7488 
7489 		partition_id id;
7490 	} fileDeviceDeleter;
7491 
7492 	// If the file system is not a "virtual" one, the device argument should
7493 	// point to a real file/device (if given at all).
7494 	// get the partition
7495 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7496 	KPartition* partition = NULL;
7497 	KPath normalizedDevice;
7498 	bool newlyCreatedFileDevice = false;
7499 
7500 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7501 		// normalize the device path
7502 		status = normalizedDevice.SetTo(device, true);
7503 		if (status != B_OK)
7504 			return status;
7505 
7506 		// get a corresponding partition from the DDM
7507 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7508 		if (partition == NULL) {
7509 			// Partition not found: This either means, the user supplied
7510 			// an invalid path, or the path refers to an image file. We try
7511 			// to let the DDM create a file device for the path.
7512 			partition_id deviceID = ddm->CreateFileDevice(
7513 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7514 			if (deviceID >= 0) {
7515 				partition = ddm->RegisterPartition(deviceID);
7516 				if (newlyCreatedFileDevice)
7517 					fileDeviceDeleter.id = deviceID;
7518 			}
7519 		}
7520 
7521 		if (!partition) {
7522 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7523 				normalizedDevice.Path()));
7524 			return B_ENTRY_NOT_FOUND;
7525 		}
7526 
7527 		device = normalizedDevice.Path();
7528 			// correct path to file device
7529 	}
7530 	PartitionRegistrar partitionRegistrar(partition, true);
7531 
7532 	// Write lock the partition's device. For the time being, we keep the lock
7533 	// until we're done mounting -- not nice, but ensure, that no-one is
7534 	// interfering.
7535 	// TODO: Just mark the partition busy while mounting!
7536 	KDiskDevice* diskDevice = NULL;
7537 	if (partition) {
7538 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7539 		if (!diskDevice) {
7540 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7541 			return B_ERROR;
7542 		}
7543 	}
7544 
7545 	DeviceWriteLocker writeLocker(diskDevice, true);
7546 		// this takes over the write lock acquired before
7547 
7548 	if (partition != NULL) {
7549 		// make sure, that the partition is not busy
7550 		if (partition->IsBusy()) {
7551 			TRACE(("fs_mount(): Partition is busy.\n"));
7552 			return B_BUSY;
7553 		}
7554 
7555 		// if no FS name had been supplied, we get it from the partition
7556 		if (fsName == NULL) {
7557 			KDiskSystem* diskSystem = partition->DiskSystem();
7558 			if (!diskSystem) {
7559 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7560 					"recognize it.\n"));
7561 				return B_BAD_VALUE;
7562 			}
7563 
7564 			if (!diskSystem->IsFileSystem()) {
7565 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7566 					"partitioning system.\n"));
7567 				return B_BAD_VALUE;
7568 			}
7569 
7570 			// The disk system name will not change, and the KDiskSystem
7571 			// object will not go away while the disk device is locked (and
7572 			// the partition has a reference to it), so this is safe.
7573 			fsName = diskSystem->Name();
7574 		}
7575 	}
7576 
7577 	mount = new(std::nothrow) (struct ::fs_mount);
7578 	if (mount == NULL)
7579 		return B_NO_MEMORY;
7580 
7581 	mount->device_name = strdup(device);
7582 		// "device" can be NULL
7583 
7584 	status = mount->entry_cache.Init();
7585 	if (status != B_OK)
7586 		goto err1;
7587 
7588 	// initialize structure
7589 	mount->id = sNextMountID++;
7590 	mount->partition = NULL;
7591 	mount->root_vnode = NULL;
7592 	mount->covers_vnode = NULL;
7593 	mount->unmounting = false;
7594 	mount->owns_file_device = false;
7595 	mount->volume = NULL;
7596 
7597 	// build up the volume(s)
7598 	while (true) {
7599 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7600 		if (layerFSName == NULL) {
7601 			if (layer == 0) {
7602 				status = B_NO_MEMORY;
7603 				goto err1;
7604 			}
7605 
7606 			break;
7607 		}
7608 		MemoryDeleter layerFSNameDeleter(layerFSName);
7609 
7610 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7611 		if (volume == NULL) {
7612 			status = B_NO_MEMORY;
7613 			goto err1;
7614 		}
7615 
7616 		volume->id = mount->id;
7617 		volume->partition = partition != NULL ? partition->ID() : -1;
7618 		volume->layer = layer++;
7619 		volume->private_volume = NULL;
7620 		volume->ops = NULL;
7621 		volume->sub_volume = NULL;
7622 		volume->super_volume = NULL;
7623 		volume->file_system = NULL;
7624 		volume->file_system_name = NULL;
7625 
7626 		volume->file_system_name = get_file_system_name(layerFSName);
7627 		if (volume->file_system_name == NULL) {
7628 			status = B_NO_MEMORY;
7629 			free(volume);
7630 			goto err1;
7631 		}
7632 
7633 		volume->file_system = get_file_system(layerFSName);
7634 		if (volume->file_system == NULL) {
7635 			status = B_DEVICE_NOT_FOUND;
7636 			free(volume->file_system_name);
7637 			free(volume);
7638 			goto err1;
7639 		}
7640 
7641 		if (mount->volume == NULL)
7642 			mount->volume = volume;
7643 		else {
7644 			volume->super_volume = mount->volume;
7645 			mount->volume->sub_volume = volume;
7646 			mount->volume = volume;
7647 		}
7648 	}
7649 
7650 	// insert mount struct into list before we call FS's mount() function
7651 	// so that vnodes can be created for this mount
7652 	rw_lock_write_lock(&sMountLock);
7653 	sMountsTable->Insert(mount);
7654 	rw_lock_write_unlock(&sMountLock);
7655 
7656 	ino_t rootID;
7657 
7658 	if (!sRoot) {
7659 		// we haven't mounted anything yet
7660 		if (strcmp(path, "/") != 0) {
7661 			status = B_ERROR;
7662 			goto err2;
7663 		}
7664 
7665 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7666 			args, &rootID);
7667 		if (status != B_OK || mount->volume->ops == NULL)
7668 			goto err2;
7669 	} else {
7670 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7671 		if (status != B_OK)
7672 			goto err2;
7673 
7674 		mount->covers_vnode = coveredNode;
7675 
7676 		// make sure covered_vnode is a directory
7677 		if (!S_ISDIR(coveredNode->Type())) {
7678 			status = B_NOT_A_DIRECTORY;
7679 			goto err3;
7680 		}
7681 
7682 		if (coveredNode->IsCovered()) {
7683 			// this is already a covered vnode
7684 			status = B_BUSY;
7685 			goto err3;
7686 		}
7687 
7688 		// mount it/them
7689 		fs_volume* volume = mount->volume;
7690 		while (volume) {
7691 			status = volume->file_system->mount(volume, device, flags, args,
7692 				&rootID);
7693 			if (status != B_OK || volume->ops == NULL) {
7694 				if (status == B_OK && volume->ops == NULL)
7695 					panic("fs_mount: mount() succeeded but ops is NULL!");
7696 				if (volume->sub_volume)
7697 					goto err4;
7698 				goto err3;
7699 			}
7700 
7701 			volume = volume->super_volume;
7702 		}
7703 
7704 		volume = mount->volume;
7705 		while (volume) {
7706 			if (volume->ops->all_layers_mounted != NULL)
7707 				volume->ops->all_layers_mounted(volume);
7708 			volume = volume->super_volume;
7709 		}
7710 	}
7711 
7712 	// the root node is supposed to be owned by the file system - it must
7713 	// exist at this point
7714 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7715 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7716 		panic("fs_mount: file system does not own its root node!\n");
7717 		status = B_ERROR;
7718 		goto err4;
7719 	}
7720 
7721 	// set up the links between the root vnode and the vnode it covers
7722 	rw_lock_write_lock(&sVnodeLock);
7723 	if (coveredNode != NULL) {
7724 		if (coveredNode->IsCovered()) {
7725 			// the vnode is covered now
7726 			status = B_BUSY;
7727 			rw_lock_write_unlock(&sVnodeLock);
7728 			goto err4;
7729 		}
7730 
7731 		mount->root_vnode->covers = coveredNode;
7732 		mount->root_vnode->SetCovering(true);
7733 
7734 		coveredNode->covered_by = mount->root_vnode;
7735 		coveredNode->SetCovered(true);
7736 	}
7737 	rw_lock_write_unlock(&sVnodeLock);
7738 
7739 	if (!sRoot) {
7740 		sRoot = mount->root_vnode;
7741 		mutex_lock(&sIOContextRootLock);
7742 		get_current_io_context(true)->root = sRoot;
7743 		mutex_unlock(&sIOContextRootLock);
7744 		inc_vnode_ref_count(sRoot);
7745 	}
7746 
7747 	// supply the partition (if any) with the mount cookie and mark it mounted
7748 	if (partition) {
7749 		partition->SetMountCookie(mount->volume->private_volume);
7750 		partition->SetVolumeID(mount->id);
7751 
7752 		// keep a partition reference as long as the partition is mounted
7753 		partitionRegistrar.Detach();
7754 		mount->partition = partition;
7755 		mount->owns_file_device = newlyCreatedFileDevice;
7756 		fileDeviceDeleter.id = -1;
7757 	}
7758 
7759 	notify_mount(mount->id,
7760 		coveredNode != NULL ? coveredNode->device : -1,
7761 		coveredNode ? coveredNode->id : -1);
7762 
7763 	return mount->id;
7764 
7765 err4:
7766 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7767 err3:
7768 	if (coveredNode != NULL)
7769 		put_vnode(coveredNode);
7770 err2:
7771 	rw_lock_write_lock(&sMountLock);
7772 	sMountsTable->Remove(mount);
7773 	rw_lock_write_unlock(&sMountLock);
7774 err1:
7775 	delete mount;
7776 
7777 	return status;
7778 }
7779 
7780 
7781 static status_t
7782 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7783 {
7784 	struct fs_mount* mount;
7785 	status_t err;
7786 
7787 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7788 		mountID, kernel));
7789 
7790 	struct vnode* pathVnode = NULL;
7791 	if (path != NULL) {
7792 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7793 		if (err != B_OK)
7794 			return B_ENTRY_NOT_FOUND;
7795 	}
7796 
7797 	RecursiveLocker mountOpLocker(sMountOpLock);
7798 	ReadLocker mountLocker(sMountLock);
7799 
7800 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7801 	if (mount == NULL) {
7802 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7803 			pathVnode);
7804 	}
7805 
7806 	mountLocker.Unlock();
7807 
7808 	if (path != NULL) {
7809 		put_vnode(pathVnode);
7810 
7811 		if (mount->root_vnode != pathVnode) {
7812 			// not mountpoint
7813 			return B_BAD_VALUE;
7814 		}
7815 	}
7816 
7817 	// if the volume is associated with a partition, lock the device of the
7818 	// partition as long as we are unmounting
7819 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7820 	KPartition* partition = mount->partition;
7821 	KDiskDevice* diskDevice = NULL;
7822 	if (partition != NULL) {
7823 		if (partition->Device() == NULL) {
7824 			dprintf("fs_unmount(): There is no device!\n");
7825 			return B_ERROR;
7826 		}
7827 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7828 		if (!diskDevice) {
7829 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7830 			return B_ERROR;
7831 		}
7832 	}
7833 	DeviceWriteLocker writeLocker(diskDevice, true);
7834 
7835 	// make sure, that the partition is not busy
7836 	if (partition != NULL) {
7837 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7838 			TRACE(("fs_unmount(): Partition is busy.\n"));
7839 			return B_BUSY;
7840 		}
7841 	}
7842 
7843 	// grab the vnode master mutex to keep someone from creating
7844 	// a vnode while we're figuring out if we can continue
7845 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7846 
7847 	bool disconnectedDescriptors = false;
7848 
7849 	while (true) {
7850 		bool busy = false;
7851 
7852 		// cycle through the list of vnodes associated with this mount and
7853 		// make sure all of them are not busy or have refs on them
7854 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7855 		while (struct vnode* vnode = iterator.Next()) {
7856 			if (vnode->IsBusy()) {
7857 				busy = true;
7858 				break;
7859 			}
7860 
7861 			// check the vnode's ref count -- subtract additional references for
7862 			// covering
7863 			int32 refCount = vnode->ref_count;
7864 			if (vnode->covers != NULL)
7865 				refCount--;
7866 			if (vnode->covered_by != NULL)
7867 				refCount--;
7868 
7869 			if (refCount != 0) {
7870 				// there are still vnodes in use on this mount, so we cannot
7871 				// unmount yet
7872 				busy = true;
7873 				break;
7874 			}
7875 		}
7876 
7877 		if (!busy)
7878 			break;
7879 
7880 		if ((flags & B_FORCE_UNMOUNT) == 0)
7881 			return B_BUSY;
7882 
7883 		if (disconnectedDescriptors) {
7884 			// wait a bit until the last access is finished, and then try again
7885 			vnodesWriteLocker.Unlock();
7886 			snooze(100000);
7887 			// TODO: if there is some kind of bug that prevents the ref counts
7888 			// from getting back to zero, this will fall into an endless loop...
7889 			vnodesWriteLocker.Lock();
7890 			continue;
7891 		}
7892 
7893 		// the file system is still busy - but we're forced to unmount it,
7894 		// so let's disconnect all open file descriptors
7895 
7896 		mount->unmounting = true;
7897 			// prevent new vnodes from being created
7898 
7899 		vnodesWriteLocker.Unlock();
7900 
7901 		disconnect_mount_or_vnode_fds(mount, NULL);
7902 		disconnectedDescriptors = true;
7903 
7904 		vnodesWriteLocker.Lock();
7905 	}
7906 
7907 	// We can safely continue. Mark all of the vnodes busy and this mount
7908 	// structure in unmounting state. Also undo the vnode covers/covered_by
7909 	// links.
7910 	mount->unmounting = true;
7911 
7912 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7913 	while (struct vnode* vnode = iterator.Next()) {
7914 		// Remove all covers/covered_by links from other mounts' nodes to this
7915 		// vnode and adjust the node ref count accordingly. We will release the
7916 		// references to the external vnodes below.
7917 		if (Vnode* coveredNode = vnode->covers) {
7918 			if (Vnode* coveringNode = vnode->covered_by) {
7919 				// We have both covered and covering vnodes, so just remove us
7920 				// from the chain.
7921 				coveredNode->covered_by = coveringNode;
7922 				coveringNode->covers = coveredNode;
7923 				vnode->ref_count -= 2;
7924 
7925 				vnode->covered_by = NULL;
7926 				vnode->covers = NULL;
7927 				vnode->SetCovering(false);
7928 				vnode->SetCovered(false);
7929 			} else {
7930 				// We only have a covered vnode. Remove its link to us.
7931 				coveredNode->covered_by = NULL;
7932 				coveredNode->SetCovered(false);
7933 				vnode->ref_count--;
7934 
7935 				// If the other node is an external vnode, we keep its link
7936 				// link around so we can put the reference later on. Otherwise
7937 				// we get rid of it right now.
7938 				if (coveredNode->mount == mount) {
7939 					vnode->covers = NULL;
7940 					coveredNode->ref_count--;
7941 				}
7942 			}
7943 		} else if (Vnode* coveringNode = vnode->covered_by) {
7944 			// We only have a covering vnode. Remove its link to us.
7945 			coveringNode->covers = NULL;
7946 			coveringNode->SetCovering(false);
7947 			vnode->ref_count--;
7948 
7949 			// If the other node is an external vnode, we keep its link
7950 			// link around so we can put the reference later on. Otherwise
7951 			// we get rid of it right now.
7952 			if (coveringNode->mount == mount) {
7953 				vnode->covered_by = NULL;
7954 				coveringNode->ref_count--;
7955 			}
7956 		}
7957 
7958 		vnode->SetBusy(true);
7959 		vnode_to_be_freed(vnode);
7960 	}
7961 
7962 	vnodesWriteLocker.Unlock();
7963 
7964 	// Free all vnodes associated with this mount.
7965 	// They will be removed from the mount list by free_vnode(), so
7966 	// we don't have to do this.
7967 	while (struct vnode* vnode = mount->vnodes.Head()) {
7968 		// Put the references to external covered/covering vnodes we kept above.
7969 		if (Vnode* coveredNode = vnode->covers)
7970 			put_vnode(coveredNode);
7971 		if (Vnode* coveringNode = vnode->covered_by)
7972 			put_vnode(coveringNode);
7973 
7974 		free_vnode(vnode, false);
7975 	}
7976 
7977 	// remove the mount structure from the hash table
7978 	rw_lock_write_lock(&sMountLock);
7979 	sMountsTable->Remove(mount);
7980 	rw_lock_write_unlock(&sMountLock);
7981 
7982 	mountOpLocker.Unlock();
7983 
7984 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7985 	notify_unmount(mount->id);
7986 
7987 	// dereference the partition and mark it unmounted
7988 	if (partition) {
7989 		partition->SetVolumeID(-1);
7990 		partition->SetMountCookie(NULL);
7991 
7992 		if (mount->owns_file_device)
7993 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7994 		partition->Unregister();
7995 	}
7996 
7997 	delete mount;
7998 	return B_OK;
7999 }
8000 
8001 
8002 static status_t
8003 fs_sync(dev_t device)
8004 {
8005 	struct fs_mount* mount;
8006 	status_t status = get_mount(device, &mount);
8007 	if (status != B_OK)
8008 		return status;
8009 
8010 	struct vnode marker;
8011 	memset(&marker, 0, sizeof(marker));
8012 	marker.SetBusy(true);
8013 	marker.SetRemoved(true);
8014 
8015 	// First, synchronize all file caches
8016 
8017 	while (true) {
8018 		WriteLocker locker(sVnodeLock);
8019 			// Note: That's the easy way. Which is probably OK for sync(),
8020 			// since it's a relatively rare call and doesn't need to allow for
8021 			// a lot of concurrency. Using a read lock would be possible, but
8022 			// also more involved, since we had to lock the individual nodes
8023 			// and take care of the locking order, which we might not want to
8024 			// do while holding fs_mount::lock.
8025 
8026 		// synchronize access to vnode list
8027 		mutex_lock(&mount->lock);
8028 
8029 		struct vnode* vnode;
8030 		if (!marker.IsRemoved()) {
8031 			vnode = mount->vnodes.GetNext(&marker);
8032 			mount->vnodes.Remove(&marker);
8033 			marker.SetRemoved(true);
8034 		} else
8035 			vnode = mount->vnodes.First();
8036 
8037 		while (vnode != NULL && (vnode->cache == NULL
8038 			|| vnode->IsRemoved() || vnode->IsBusy())) {
8039 			// TODO: we could track writes (and writable mapped vnodes)
8040 			//	and have a simple flag that we could test for here
8041 			vnode = mount->vnodes.GetNext(vnode);
8042 		}
8043 
8044 		if (vnode != NULL) {
8045 			// insert marker vnode again
8046 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
8047 			marker.SetRemoved(false);
8048 		}
8049 
8050 		mutex_unlock(&mount->lock);
8051 
8052 		if (vnode == NULL)
8053 			break;
8054 
8055 		vnode = lookup_vnode(mount->id, vnode->id);
8056 		if (vnode == NULL || vnode->IsBusy())
8057 			continue;
8058 
8059 		if (vnode->ref_count == 0) {
8060 			// this vnode has been unused before
8061 			vnode_used(vnode);
8062 		}
8063 		inc_vnode_ref_count(vnode);
8064 
8065 		locker.Unlock();
8066 
8067 		if (vnode->cache != NULL && !vnode->IsRemoved())
8068 			vnode->cache->WriteModified();
8069 
8070 		put_vnode(vnode);
8071 	}
8072 
8073 	// Let the file systems do their synchronizing work
8074 	if (HAS_FS_MOUNT_CALL(mount, sync))
8075 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8076 
8077 	// Finally, flush the underlying device's write cache (if possible.)
8078 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8079 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8080 
8081 	put_mount(mount);
8082 	return status;
8083 }
8084 
8085 
8086 static status_t
8087 fs_read_info(dev_t device, struct fs_info* info)
8088 {
8089 	struct fs_mount* mount;
8090 	status_t status = get_mount(device, &mount);
8091 	if (status != B_OK)
8092 		return status;
8093 
8094 	memset(info, 0, sizeof(struct fs_info));
8095 
8096 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8097 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8098 
8099 	// fill in info the file system doesn't (have to) know about
8100 	if (status == B_OK) {
8101 		info->dev = mount->id;
8102 		info->root = mount->root_vnode->id;
8103 
8104 		fs_volume* volume = mount->volume;
8105 		while (volume->super_volume != NULL)
8106 			volume = volume->super_volume;
8107 
8108 		strlcpy(info->fsh_name, volume->file_system_name,
8109 			sizeof(info->fsh_name));
8110 		if (mount->device_name != NULL) {
8111 			strlcpy(info->device_name, mount->device_name,
8112 				sizeof(info->device_name));
8113 		}
8114 	}
8115 
8116 	// if the call is not supported by the file system, there are still
8117 	// the parts that we filled out ourselves
8118 
8119 	put_mount(mount);
8120 	return status;
8121 }
8122 
8123 
8124 static status_t
8125 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8126 {
8127 	struct fs_mount* mount;
8128 	status_t status = get_mount(device, &mount);
8129 	if (status != B_OK)
8130 		return status;
8131 
8132 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8133 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8134 	else
8135 		status = B_READ_ONLY_DEVICE;
8136 
8137 	put_mount(mount);
8138 	return status;
8139 }
8140 
8141 
8142 static dev_t
8143 fs_next_device(int32* _cookie)
8144 {
8145 	struct fs_mount* mount = NULL;
8146 	dev_t device = *_cookie;
8147 
8148 	rw_lock_read_lock(&sMountLock);
8149 
8150 	// Since device IDs are assigned sequentially, this algorithm
8151 	// does work good enough. It makes sure that the device list
8152 	// returned is sorted, and that no device is skipped when an
8153 	// already visited device got unmounted.
8154 
8155 	while (device < sNextMountID) {
8156 		mount = find_mount(device++);
8157 		if (mount != NULL && mount->volume->private_volume != NULL)
8158 			break;
8159 	}
8160 
8161 	*_cookie = device;
8162 
8163 	if (mount != NULL)
8164 		device = mount->id;
8165 	else
8166 		device = B_BAD_VALUE;
8167 
8168 	rw_lock_read_unlock(&sMountLock);
8169 
8170 	return device;
8171 }
8172 
8173 
8174 ssize_t
8175 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8176 	void *buffer, size_t readBytes)
8177 {
8178 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8179 	if (attrFD < 0)
8180 		return attrFD;
8181 
8182 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8183 
8184 	_kern_close(attrFD);
8185 
8186 	return bytesRead;
8187 }
8188 
8189 
8190 static status_t
8191 get_cwd(char* buffer, size_t size, bool kernel)
8192 {
8193 	// Get current working directory from io context
8194 	struct io_context* context = get_current_io_context(kernel);
8195 	status_t status;
8196 
8197 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8198 
8199 	mutex_lock(&context->io_mutex);
8200 
8201 	struct vnode* vnode = context->cwd;
8202 	if (vnode)
8203 		inc_vnode_ref_count(vnode);
8204 
8205 	mutex_unlock(&context->io_mutex);
8206 
8207 	if (vnode) {
8208 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8209 		put_vnode(vnode);
8210 	} else
8211 		status = B_ERROR;
8212 
8213 	return status;
8214 }
8215 
8216 
8217 static status_t
8218 set_cwd(int fd, char* path, bool kernel)
8219 {
8220 	struct io_context* context;
8221 	struct vnode* vnode = NULL;
8222 	struct vnode* oldDirectory;
8223 	status_t status;
8224 
8225 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8226 
8227 	// Get vnode for passed path, and bail if it failed
8228 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8229 	if (status < 0)
8230 		return status;
8231 
8232 	if (!S_ISDIR(vnode->Type())) {
8233 		// nope, can't cwd to here
8234 		status = B_NOT_A_DIRECTORY;
8235 		goto err;
8236 	}
8237 
8238 	// We need to have the permission to enter the directory, too
8239 	if (HAS_FS_CALL(vnode, access)) {
8240 		status = FS_CALL(vnode, access, X_OK);
8241 		if (status != B_OK)
8242 			goto err;
8243 	}
8244 
8245 	// Get current io context and lock
8246 	context = get_current_io_context(kernel);
8247 	mutex_lock(&context->io_mutex);
8248 
8249 	// save the old current working directory first
8250 	oldDirectory = context->cwd;
8251 	context->cwd = vnode;
8252 
8253 	mutex_unlock(&context->io_mutex);
8254 
8255 	if (oldDirectory)
8256 		put_vnode(oldDirectory);
8257 
8258 	return B_NO_ERROR;
8259 
8260 err:
8261 	put_vnode(vnode);
8262 	return status;
8263 }
8264 
8265 
8266 static status_t
8267 user_copy_name(char* to, const char* from, size_t length)
8268 {
8269 	ssize_t len = user_strlcpy(to, from, length);
8270 	if (len < 0)
8271 		return len;
8272 	if (len >= (ssize_t)length)
8273 		return B_NAME_TOO_LONG;
8274 	return B_OK;
8275 }
8276 
8277 
8278 //	#pragma mark - kernel mirrored syscalls
8279 
8280 
8281 dev_t
8282 _kern_mount(const char* path, const char* device, const char* fsName,
8283 	uint32 flags, const char* args, size_t argsLength)
8284 {
8285 	KPath pathBuffer(path);
8286 	if (pathBuffer.InitCheck() != B_OK)
8287 		return B_NO_MEMORY;
8288 
8289 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8290 }
8291 
8292 
8293 status_t
8294 _kern_unmount(const char* path, uint32 flags)
8295 {
8296 	KPath pathBuffer(path);
8297 	if (pathBuffer.InitCheck() != B_OK)
8298 		return B_NO_MEMORY;
8299 
8300 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8301 }
8302 
8303 
8304 status_t
8305 _kern_read_fs_info(dev_t device, struct fs_info* info)
8306 {
8307 	if (info == NULL)
8308 		return B_BAD_VALUE;
8309 
8310 	return fs_read_info(device, info);
8311 }
8312 
8313 
8314 status_t
8315 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8316 {
8317 	if (info == NULL)
8318 		return B_BAD_VALUE;
8319 
8320 	return fs_write_info(device, info, mask);
8321 }
8322 
8323 
8324 status_t
8325 _kern_sync(void)
8326 {
8327 	// Note: _kern_sync() is also called from _user_sync()
8328 	int32 cookie = 0;
8329 	dev_t device;
8330 	while ((device = next_dev(&cookie)) >= 0) {
8331 		status_t status = fs_sync(device);
8332 		if (status != B_OK && status != B_BAD_VALUE) {
8333 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8334 				strerror(status));
8335 		}
8336 	}
8337 
8338 	return B_OK;
8339 }
8340 
8341 
8342 dev_t
8343 _kern_next_device(int32* _cookie)
8344 {
8345 	return fs_next_device(_cookie);
8346 }
8347 
8348 
8349 status_t
8350 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8351 	size_t infoSize)
8352 {
8353 	if (infoSize != sizeof(fd_info))
8354 		return B_BAD_VALUE;
8355 
8356 	// get the team
8357 	Team* team = Team::Get(teamID);
8358 	if (team == NULL)
8359 		return B_BAD_TEAM_ID;
8360 	BReference<Team> teamReference(team, true);
8361 
8362 	// now that we have a team reference, its I/O context won't go away
8363 	io_context* context = team->io_context;
8364 	MutexLocker contextLocker(context->io_mutex);
8365 
8366 	uint32 slot = *_cookie;
8367 
8368 	struct file_descriptor* descriptor;
8369 	while (slot < context->table_size
8370 		&& (descriptor = context->fds[slot]) == NULL) {
8371 		slot++;
8372 	}
8373 
8374 	if (slot >= context->table_size)
8375 		return B_ENTRY_NOT_FOUND;
8376 
8377 	info->number = slot;
8378 	info->open_mode = descriptor->open_mode;
8379 
8380 	struct vnode* vnode = fd_vnode(descriptor);
8381 	if (vnode != NULL) {
8382 		info->device = vnode->device;
8383 		info->node = vnode->id;
8384 	} else if (descriptor->u.mount != NULL) {
8385 		info->device = descriptor->u.mount->id;
8386 		info->node = -1;
8387 	}
8388 
8389 	*_cookie = slot + 1;
8390 	return B_OK;
8391 }
8392 
8393 
8394 int
8395 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8396 	int perms)
8397 {
8398 	if ((openMode & O_CREAT) != 0) {
8399 		return file_create_entry_ref(device, inode, name, openMode, perms,
8400 			true);
8401 	}
8402 
8403 	return file_open_entry_ref(device, inode, name, openMode, true);
8404 }
8405 
8406 
8407 /*!	\brief Opens a node specified by a FD + path pair.
8408 
8409 	At least one of \a fd and \a path must be specified.
8410 	If only \a fd is given, the function opens the node identified by this
8411 	FD. If only a path is given, this path is opened. If both are given and
8412 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8413 	of the directory (!) identified by \a fd.
8414 
8415 	\param fd The FD. May be < 0.
8416 	\param path The absolute or relative path. May be \c NULL.
8417 	\param openMode The open mode.
8418 	\return A FD referring to the newly opened node, or an error code,
8419 			if an error occurs.
8420 */
8421 int
8422 _kern_open(int fd, const char* path, int openMode, int perms)
8423 {
8424 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8425 	if (pathBuffer.InitCheck() != B_OK)
8426 		return B_NO_MEMORY;
8427 
8428 	if ((openMode & O_CREAT) != 0)
8429 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8430 
8431 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8432 }
8433 
8434 
8435 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8436 
8437 	The supplied name may be \c NULL, in which case directory identified
8438 	by \a device and \a inode will be opened. Otherwise \a device and
8439 	\a inode identify the parent directory of the directory to be opened
8440 	and \a name its entry name.
8441 
8442 	\param device If \a name is specified the ID of the device the parent
8443 		   directory of the directory to be opened resides on, otherwise
8444 		   the device of the directory itself.
8445 	\param inode If \a name is specified the node ID of the parent
8446 		   directory of the directory to be opened, otherwise node ID of the
8447 		   directory itself.
8448 	\param name The entry name of the directory to be opened. If \c NULL,
8449 		   the \a device + \a inode pair identify the node to be opened.
8450 	\return The FD of the newly opened directory or an error code, if
8451 			something went wrong.
8452 */
8453 int
8454 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8455 {
8456 	return dir_open_entry_ref(device, inode, name, true);
8457 }
8458 
8459 
8460 /*!	\brief Opens a directory specified by a FD + path pair.
8461 
8462 	At least one of \a fd and \a path must be specified.
8463 	If only \a fd is given, the function opens the directory identified by this
8464 	FD. If only a path is given, this path is opened. If both are given and
8465 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8466 	of the directory (!) identified by \a fd.
8467 
8468 	\param fd The FD. May be < 0.
8469 	\param path The absolute or relative path. May be \c NULL.
8470 	\return A FD referring to the newly opened directory, or an error code,
8471 			if an error occurs.
8472 */
8473 int
8474 _kern_open_dir(int fd, const char* path)
8475 {
8476 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8477 	if (pathBuffer.InitCheck() != B_OK)
8478 		return B_NO_MEMORY;
8479 
8480 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8481 }
8482 
8483 
8484 status_t
8485 _kern_fcntl(int fd, int op, size_t argument)
8486 {
8487 	return common_fcntl(fd, op, argument, true);
8488 }
8489 
8490 
8491 status_t
8492 _kern_fsync(int fd)
8493 {
8494 	return common_sync(fd, true);
8495 }
8496 
8497 
8498 status_t
8499 _kern_lock_node(int fd)
8500 {
8501 	return common_lock_node(fd, true);
8502 }
8503 
8504 
8505 status_t
8506 _kern_unlock_node(int fd)
8507 {
8508 	return common_unlock_node(fd, true);
8509 }
8510 
8511 
8512 status_t
8513 _kern_preallocate(int fd, off_t offset, off_t length)
8514 {
8515 	return common_preallocate(fd, offset, length, true);
8516 }
8517 
8518 
8519 status_t
8520 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8521 	int perms)
8522 {
8523 	return dir_create_entry_ref(device, inode, name, perms, true);
8524 }
8525 
8526 
8527 /*!	\brief Creates a directory specified by a FD + path pair.
8528 
8529 	\a path must always be specified (it contains the name of the new directory
8530 	at least). If only a path is given, this path identifies the location at
8531 	which the directory shall be created. If both \a fd and \a path are given
8532 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8533 	of the directory (!) identified by \a fd.
8534 
8535 	\param fd The FD. May be < 0.
8536 	\param path The absolute or relative path. Must not be \c NULL.
8537 	\param perms The access permissions the new directory shall have.
8538 	\return \c B_OK, if the directory has been created successfully, another
8539 			error code otherwise.
8540 */
8541 status_t
8542 _kern_create_dir(int fd, const char* path, int perms)
8543 {
8544 	KPath pathBuffer(path, KPath::DEFAULT);
8545 	if (pathBuffer.InitCheck() != B_OK)
8546 		return B_NO_MEMORY;
8547 
8548 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8549 }
8550 
8551 
8552 status_t
8553 _kern_remove_dir(int fd, const char* path)
8554 {
8555 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8556 	if (pathBuffer.InitCheck() != B_OK)
8557 		return B_NO_MEMORY;
8558 
8559 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8560 }
8561 
8562 
8563 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8564 
8565 	At least one of \a fd and \a path must be specified.
8566 	If only \a fd is given, the function the symlink to be read is the node
8567 	identified by this FD. If only a path is given, this path identifies the
8568 	symlink to be read. If both are given and the path is absolute, \a fd is
8569 	ignored; a relative path is reckoned off of the directory (!) identified
8570 	by \a fd.
8571 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8572 	will still be updated to reflect the required buffer size.
8573 
8574 	\param fd The FD. May be < 0.
8575 	\param path The absolute or relative path. May be \c NULL.
8576 	\param buffer The buffer into which the contents of the symlink shall be
8577 		   written.
8578 	\param _bufferSize A pointer to the size of the supplied buffer.
8579 	\return The length of the link on success or an appropriate error code
8580 */
8581 status_t
8582 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8583 {
8584 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8585 	if (pathBuffer.InitCheck() != B_OK)
8586 		return B_NO_MEMORY;
8587 
8588 	return common_read_link(fd, pathBuffer.LockBuffer(),
8589 		buffer, _bufferSize, true);
8590 }
8591 
8592 
8593 /*!	\brief Creates a symlink specified by a FD + path pair.
8594 
8595 	\a path must always be specified (it contains the name of the new symlink
8596 	at least). If only a path is given, this path identifies the location at
8597 	which the symlink shall be created. If both \a fd and \a path are given and
8598 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8599 	of the directory (!) identified by \a fd.
8600 
8601 	\param fd The FD. May be < 0.
8602 	\param toPath The absolute or relative path. Must not be \c NULL.
8603 	\param mode The access permissions the new symlink shall have.
8604 	\return \c B_OK, if the symlink has been created successfully, another
8605 			error code otherwise.
8606 */
8607 status_t
8608 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8609 {
8610 	KPath pathBuffer(path);
8611 	if (pathBuffer.InitCheck() != B_OK)
8612 		return B_NO_MEMORY;
8613 
8614 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8615 		toPath, mode, true);
8616 }
8617 
8618 
8619 status_t
8620 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8621 	bool traverseLeafLink)
8622 {
8623 	KPath pathBuffer(path);
8624 	KPath toPathBuffer(toPath);
8625 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8626 		return B_NO_MEMORY;
8627 
8628 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8629 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8630 }
8631 
8632 
8633 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8634 
8635 	\a path must always be specified (it contains at least the name of the entry
8636 	to be deleted). If only a path is given, this path identifies the entry
8637 	directly. If both \a fd and \a path are given and the path is absolute,
8638 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8639 	identified by \a fd.
8640 
8641 	\param fd The FD. May be < 0.
8642 	\param path The absolute or relative path. Must not be \c NULL.
8643 	\return \c B_OK, if the entry has been removed successfully, another
8644 			error code otherwise.
8645 */
8646 status_t
8647 _kern_unlink(int fd, const char* path)
8648 {
8649 	KPath pathBuffer(path);
8650 	if (pathBuffer.InitCheck() != B_OK)
8651 		return B_NO_MEMORY;
8652 
8653 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8654 }
8655 
8656 
8657 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8658 		   by another FD + path pair.
8659 
8660 	\a oldPath and \a newPath must always be specified (they contain at least
8661 	the name of the entry). If only a path is given, this path identifies the
8662 	entry directly. If both a FD and a path are given and the path is absolute,
8663 	the FD is ignored; a relative path is reckoned off of the directory (!)
8664 	identified by the respective FD.
8665 
8666 	\param oldFD The FD of the old location. May be < 0.
8667 	\param oldPath The absolute or relative path of the old location. Must not
8668 		   be \c NULL.
8669 	\param newFD The FD of the new location. May be < 0.
8670 	\param newPath The absolute or relative path of the new location. Must not
8671 		   be \c NULL.
8672 	\return \c B_OK, if the entry has been moved successfully, another
8673 			error code otherwise.
8674 */
8675 status_t
8676 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8677 {
8678 	KPath oldPathBuffer(oldPath);
8679 	KPath newPathBuffer(newPath);
8680 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8681 		return B_NO_MEMORY;
8682 
8683 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8684 		newFD, newPathBuffer.LockBuffer(), true);
8685 }
8686 
8687 
8688 status_t
8689 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8690 {
8691 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8692 	if (pathBuffer.InitCheck() != B_OK)
8693 		return B_NO_MEMORY;
8694 
8695 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8696 		true);
8697 }
8698 
8699 
8700 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8701 
8702 	If only \a fd is given, the stat operation associated with the type
8703 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8704 	given, this path identifies the entry for whose node to retrieve the
8705 	stat data. If both \a fd and \a path are given and the path is absolute,
8706 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8707 	identified by \a fd and specifies the entry whose stat data shall be
8708 	retrieved.
8709 
8710 	\param fd The FD. May be < 0.
8711 	\param path The absolute or relative path. Must not be \c NULL.
8712 	\param traverseLeafLink If \a path is given, \c true specifies that the
8713 		   function shall not stick to symlinks, but traverse them.
8714 	\param stat The buffer the stat data shall be written into.
8715 	\param statSize The size of the supplied stat buffer.
8716 	\return \c B_OK, if the the stat data have been read successfully, another
8717 			error code otherwise.
8718 */
8719 status_t
8720 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8721 	struct stat* stat, size_t statSize)
8722 {
8723 	struct stat completeStat;
8724 	struct stat* originalStat = NULL;
8725 	status_t status;
8726 
8727 	if (statSize > sizeof(struct stat))
8728 		return B_BAD_VALUE;
8729 
8730 	// this supports different stat extensions
8731 	if (statSize < sizeof(struct stat)) {
8732 		originalStat = stat;
8733 		stat = &completeStat;
8734 	}
8735 
8736 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8737 
8738 	if (status == B_OK && originalStat != NULL)
8739 		memcpy(originalStat, stat, statSize);
8740 
8741 	return status;
8742 }
8743 
8744 
8745 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8746 
8747 	If only \a fd is given, the stat operation associated with the type
8748 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8749 	given, this path identifies the entry for whose node to write the
8750 	stat data. If both \a fd and \a path are given and the path is absolute,
8751 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8752 	identified by \a fd and specifies the entry whose stat data shall be
8753 	written.
8754 
8755 	\param fd The FD. May be < 0.
8756 	\param path The absolute or relative path. May be \c NULL.
8757 	\param traverseLeafLink If \a path is given, \c true specifies that the
8758 		   function shall not stick to symlinks, but traverse them.
8759 	\param stat The buffer containing the stat data to be written.
8760 	\param statSize The size of the supplied stat buffer.
8761 	\param statMask A mask specifying which parts of the stat data shall be
8762 		   written.
8763 	\return \c B_OK, if the the stat data have been written successfully,
8764 			another error code otherwise.
8765 */
8766 status_t
8767 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8768 	const struct stat* stat, size_t statSize, int statMask)
8769 {
8770 	struct stat completeStat;
8771 
8772 	if (statSize > sizeof(struct stat))
8773 		return B_BAD_VALUE;
8774 
8775 	// this supports different stat extensions
8776 	if (statSize < sizeof(struct stat)) {
8777 		memset((uint8*)&completeStat + statSize, 0,
8778 			sizeof(struct stat) - statSize);
8779 		memcpy(&completeStat, stat, statSize);
8780 		stat = &completeStat;
8781 	}
8782 
8783 	status_t status;
8784 
8785 	if (path != NULL) {
8786 		// path given: write the stat of the node referred to by (fd, path)
8787 		KPath pathBuffer(path);
8788 		if (pathBuffer.InitCheck() != B_OK)
8789 			return B_NO_MEMORY;
8790 
8791 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8792 			traverseLeafLink, stat, statMask, true);
8793 	} else {
8794 		// no path given: get the FD and use the FD operation
8795 		struct file_descriptor* descriptor
8796 			= get_fd(get_current_io_context(true), fd);
8797 		if (descriptor == NULL)
8798 			return B_FILE_ERROR;
8799 
8800 		if (descriptor->ops->fd_write_stat)
8801 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8802 		else
8803 			status = B_UNSUPPORTED;
8804 
8805 		put_fd(descriptor);
8806 	}
8807 
8808 	return status;
8809 }
8810 
8811 
8812 int
8813 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8814 {
8815 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8816 	if (pathBuffer.InitCheck() != B_OK)
8817 		return B_NO_MEMORY;
8818 
8819 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8820 }
8821 
8822 
8823 int
8824 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8825 	int openMode)
8826 {
8827 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8828 	if (pathBuffer.InitCheck() != B_OK)
8829 		return B_NO_MEMORY;
8830 
8831 	if ((openMode & O_CREAT) != 0) {
8832 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8833 			true);
8834 	}
8835 
8836 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8837 }
8838 
8839 
8840 status_t
8841 _kern_remove_attr(int fd, const char* name)
8842 {
8843 	return attr_remove(fd, name, true);
8844 }
8845 
8846 
8847 status_t
8848 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8849 	const char* toName)
8850 {
8851 	return attr_rename(fromFile, fromName, toFile, toName, true);
8852 }
8853 
8854 
8855 int
8856 _kern_open_index_dir(dev_t device)
8857 {
8858 	return index_dir_open(device, true);
8859 }
8860 
8861 
8862 status_t
8863 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8864 {
8865 	return index_create(device, name, type, flags, true);
8866 }
8867 
8868 
8869 status_t
8870 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8871 {
8872 	return index_name_read_stat(device, name, stat, true);
8873 }
8874 
8875 
8876 status_t
8877 _kern_remove_index(dev_t device, const char* name)
8878 {
8879 	return index_remove(device, name, true);
8880 }
8881 
8882 
8883 status_t
8884 _kern_getcwd(char* buffer, size_t size)
8885 {
8886 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8887 
8888 	// Call vfs to get current working directory
8889 	return get_cwd(buffer, size, true);
8890 }
8891 
8892 
8893 status_t
8894 _kern_setcwd(int fd, const char* path)
8895 {
8896 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8897 	if (pathBuffer.InitCheck() != B_OK)
8898 		return B_NO_MEMORY;
8899 
8900 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8901 }
8902 
8903 
8904 //	#pragma mark - userland syscalls
8905 
8906 
8907 dev_t
8908 _user_mount(const char* userPath, const char* userDevice,
8909 	const char* userFileSystem, uint32 flags, const char* userArgs,
8910 	size_t argsLength)
8911 {
8912 	char fileSystem[B_FILE_NAME_LENGTH];
8913 	KPath path, device;
8914 	char* args = NULL;
8915 	status_t status;
8916 
8917 	if (!IS_USER_ADDRESS(userPath))
8918 		return B_BAD_ADDRESS;
8919 
8920 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8921 		return B_NO_MEMORY;
8922 
8923 	status = user_copy_name(path.LockBuffer(), userPath,
8924 		B_PATH_NAME_LENGTH);
8925 	if (status != B_OK)
8926 		return status;
8927 	path.UnlockBuffer();
8928 
8929 	if (userFileSystem != NULL) {
8930 		if (!IS_USER_ADDRESS(userFileSystem))
8931 			return B_BAD_ADDRESS;
8932 
8933 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8934 		if (status != B_OK)
8935 			return status;
8936 	}
8937 
8938 	if (userDevice != NULL) {
8939 		if (!IS_USER_ADDRESS(userDevice))
8940 			return B_BAD_ADDRESS;
8941 
8942 		status = user_copy_name(device.LockBuffer(), userDevice,
8943 			B_PATH_NAME_LENGTH);
8944 		if (status != B_OK)
8945 			return status;
8946 		device.UnlockBuffer();
8947 	}
8948 
8949 	if (userArgs != NULL && argsLength > 0) {
8950 		if (!IS_USER_ADDRESS(userArgs))
8951 			return B_BAD_ADDRESS;
8952 
8953 		// this is a safety restriction
8954 		if (argsLength >= 65536)
8955 			return B_NAME_TOO_LONG;
8956 
8957 		args = (char*)malloc(argsLength + 1);
8958 		if (args == NULL)
8959 			return B_NO_MEMORY;
8960 
8961 		status = user_copy_name(args, userArgs, argsLength + 1);
8962 		if (status != B_OK) {
8963 			free(args);
8964 			return status;
8965 		}
8966 	}
8967 
8968 	status = fs_mount(path.LockBuffer(),
8969 		userDevice != NULL ? device.Path() : NULL,
8970 		userFileSystem ? fileSystem : NULL, flags, args, false);
8971 
8972 	free(args);
8973 	return status;
8974 }
8975 
8976 
8977 status_t
8978 _user_unmount(const char* userPath, uint32 flags)
8979 {
8980 	if (!IS_USER_ADDRESS(userPath))
8981 		return B_BAD_ADDRESS;
8982 
8983 	KPath pathBuffer;
8984 	if (pathBuffer.InitCheck() != B_OK)
8985 		return B_NO_MEMORY;
8986 
8987 	char* path = pathBuffer.LockBuffer();
8988 
8989 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8990 	if (status != B_OK)
8991 		return status;
8992 
8993 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8994 }
8995 
8996 
8997 status_t
8998 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8999 {
9000 	struct fs_info info;
9001 	status_t status;
9002 
9003 	if (userInfo == NULL)
9004 		return B_BAD_VALUE;
9005 
9006 	if (!IS_USER_ADDRESS(userInfo))
9007 		return B_BAD_ADDRESS;
9008 
9009 	status = fs_read_info(device, &info);
9010 	if (status != B_OK)
9011 		return status;
9012 
9013 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
9014 		return B_BAD_ADDRESS;
9015 
9016 	return B_OK;
9017 }
9018 
9019 
9020 status_t
9021 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
9022 {
9023 	struct fs_info info;
9024 
9025 	if (userInfo == NULL)
9026 		return B_BAD_VALUE;
9027 
9028 	if (!IS_USER_ADDRESS(userInfo)
9029 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
9030 		return B_BAD_ADDRESS;
9031 
9032 	return fs_write_info(device, &info, mask);
9033 }
9034 
9035 
9036 dev_t
9037 _user_next_device(int32* _userCookie)
9038 {
9039 	int32 cookie;
9040 	dev_t device;
9041 
9042 	if (!IS_USER_ADDRESS(_userCookie)
9043 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
9044 		return B_BAD_ADDRESS;
9045 
9046 	device = fs_next_device(&cookie);
9047 
9048 	if (device >= B_OK) {
9049 		// update user cookie
9050 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
9051 			return B_BAD_ADDRESS;
9052 	}
9053 
9054 	return device;
9055 }
9056 
9057 
9058 status_t
9059 _user_sync(void)
9060 {
9061 	return _kern_sync();
9062 }
9063 
9064 
9065 status_t
9066 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
9067 	size_t infoSize)
9068 {
9069 	struct fd_info info;
9070 	uint32 cookie;
9071 
9072 	// only root can do this
9073 	if (geteuid() != 0)
9074 		return B_NOT_ALLOWED;
9075 
9076 	if (infoSize != sizeof(fd_info))
9077 		return B_BAD_VALUE;
9078 
9079 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9080 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9081 		return B_BAD_ADDRESS;
9082 
9083 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9084 	if (status != B_OK)
9085 		return status;
9086 
9087 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9088 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9089 		return B_BAD_ADDRESS;
9090 
9091 	return status;
9092 }
9093 
9094 
9095 status_t
9096 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9097 	char* userPath, size_t pathLength)
9098 {
9099 	if (!IS_USER_ADDRESS(userPath))
9100 		return B_BAD_ADDRESS;
9101 
9102 	KPath path;
9103 	if (path.InitCheck() != B_OK)
9104 		return B_NO_MEMORY;
9105 
9106 	// copy the leaf name onto the stack
9107 	char stackLeaf[B_FILE_NAME_LENGTH];
9108 	if (leaf != NULL) {
9109 		if (!IS_USER_ADDRESS(leaf))
9110 			return B_BAD_ADDRESS;
9111 
9112 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9113 		if (status != B_OK)
9114 			return status;
9115 
9116 		leaf = stackLeaf;
9117 	}
9118 
9119 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9120 		false, path.LockBuffer(), path.BufferSize());
9121 	if (status != B_OK)
9122 		return status;
9123 
9124 	path.UnlockBuffer();
9125 
9126 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9127 	if (length < 0)
9128 		return length;
9129 	if (length >= (int)pathLength)
9130 		return B_BUFFER_OVERFLOW;
9131 
9132 	return B_OK;
9133 }
9134 
9135 
9136 status_t
9137 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9138 {
9139 	if (userPath == NULL || buffer == NULL)
9140 		return B_BAD_VALUE;
9141 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9142 		return B_BAD_ADDRESS;
9143 
9144 	// copy path from userland
9145 	KPath pathBuffer;
9146 	if (pathBuffer.InitCheck() != B_OK)
9147 		return B_NO_MEMORY;
9148 	char* path = pathBuffer.LockBuffer();
9149 
9150 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9151 	if (status != B_OK)
9152 		return status;
9153 
9154 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9155 		false);
9156 	if (error != B_OK)
9157 		return error;
9158 
9159 	// copy back to userland
9160 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9161 	if (len < 0)
9162 		return len;
9163 	if (len >= B_PATH_NAME_LENGTH)
9164 		return B_BUFFER_OVERFLOW;
9165 
9166 	return B_OK;
9167 }
9168 
9169 
9170 int
9171 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9172 	int openMode, int perms)
9173 {
9174 	char name[B_FILE_NAME_LENGTH];
9175 
9176 	if (userName == NULL || device < 0 || inode < 0)
9177 		return B_BAD_VALUE;
9178 	if (!IS_USER_ADDRESS(userName))
9179 		return B_BAD_ADDRESS;
9180 	status_t status = user_copy_name(name, userName, sizeof(name));
9181 	if (status != B_OK)
9182 		return status;
9183 
9184 	if ((openMode & O_CREAT) != 0) {
9185 		return file_create_entry_ref(device, inode, name, openMode, perms,
9186 			false);
9187 	}
9188 
9189 	return file_open_entry_ref(device, inode, name, openMode, false);
9190 }
9191 
9192 
9193 int
9194 _user_open(int fd, const char* userPath, int openMode, int perms)
9195 {
9196 	KPath path;
9197 	if (path.InitCheck() != B_OK)
9198 		return B_NO_MEMORY;
9199 
9200 	char* buffer = path.LockBuffer();
9201 
9202 	if (!IS_USER_ADDRESS(userPath))
9203 		return B_BAD_ADDRESS;
9204 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9205 	if (status != B_OK)
9206 		return status;
9207 
9208 	if ((openMode & O_CREAT) != 0)
9209 		return file_create(fd, buffer, openMode, perms, false);
9210 
9211 	return file_open(fd, buffer, openMode, false);
9212 }
9213 
9214 
9215 int
9216 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9217 {
9218 	if (userName != NULL) {
9219 		char name[B_FILE_NAME_LENGTH];
9220 
9221 		if (!IS_USER_ADDRESS(userName))
9222 			return B_BAD_ADDRESS;
9223 		status_t status = user_copy_name(name, userName, sizeof(name));
9224 		if (status != B_OK)
9225 			return status;
9226 
9227 		return dir_open_entry_ref(device, inode, name, false);
9228 	}
9229 	return dir_open_entry_ref(device, inode, NULL, false);
9230 }
9231 
9232 
9233 int
9234 _user_open_dir(int fd, const char* userPath)
9235 {
9236 	if (userPath == NULL)
9237 		return dir_open(fd, NULL, false);
9238 
9239 	KPath path;
9240 	if (path.InitCheck() != B_OK)
9241 		return B_NO_MEMORY;
9242 
9243 	char* buffer = path.LockBuffer();
9244 
9245 	if (!IS_USER_ADDRESS(userPath))
9246 		return B_BAD_ADDRESS;
9247 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9248 	if (status != B_OK)
9249 		return status;
9250 
9251 	return dir_open(fd, buffer, false);
9252 }
9253 
9254 
9255 /*!	\brief Opens a directory's parent directory and returns the entry name
9256 		   of the former.
9257 
9258 	Aside from that it returns the directory's entry name, this method is
9259 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9260 	equivalent, if \a userName is \c NULL.
9261 
9262 	If a name buffer is supplied and the name does not fit the buffer, the
9263 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9264 
9265 	\param fd A FD referring to a directory.
9266 	\param userName Buffer the directory's entry name shall be written into.
9267 		   May be \c NULL.
9268 	\param nameLength Size of the name buffer.
9269 	\return The file descriptor of the opened parent directory, if everything
9270 			went fine, an error code otherwise.
9271 */
9272 int
9273 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9274 {
9275 	bool kernel = false;
9276 
9277 	if (userName && !IS_USER_ADDRESS(userName))
9278 		return B_BAD_ADDRESS;
9279 
9280 	// open the parent dir
9281 	int parentFD = dir_open(fd, (char*)"..", kernel);
9282 	if (parentFD < 0)
9283 		return parentFD;
9284 	FDCloser fdCloser(parentFD, kernel);
9285 
9286 	if (userName) {
9287 		// get the vnodes
9288 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9289 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9290 		VNodePutter parentVNodePutter(parentVNode);
9291 		VNodePutter dirVNodePutter(dirVNode);
9292 		if (!parentVNode || !dirVNode)
9293 			return B_FILE_ERROR;
9294 
9295 		// get the vnode name
9296 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9297 		struct dirent* buffer = (struct dirent*)_buffer;
9298 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9299 			sizeof(_buffer), get_current_io_context(false));
9300 		if (status != B_OK)
9301 			return status;
9302 
9303 		// copy the name to the userland buffer
9304 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9305 		if (len < 0)
9306 			return len;
9307 		if (len >= (int)nameLength)
9308 			return B_BUFFER_OVERFLOW;
9309 	}
9310 
9311 	return fdCloser.Detach();
9312 }
9313 
9314 
9315 status_t
9316 _user_fcntl(int fd, int op, size_t argument)
9317 {
9318 	status_t status = common_fcntl(fd, op, argument, false);
9319 	if (op == F_SETLKW)
9320 		syscall_restart_handle_post(status);
9321 
9322 	return status;
9323 }
9324 
9325 
9326 status_t
9327 _user_fsync(int fd)
9328 {
9329 	return common_sync(fd, false);
9330 }
9331 
9332 
9333 status_t
9334 _user_flock(int fd, int operation)
9335 {
9336 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9337 
9338 	// Check if the operation is valid
9339 	switch (operation & ~LOCK_NB) {
9340 		case LOCK_UN:
9341 		case LOCK_SH:
9342 		case LOCK_EX:
9343 			break;
9344 
9345 		default:
9346 			return B_BAD_VALUE;
9347 	}
9348 
9349 	struct file_descriptor* descriptor;
9350 	struct vnode* vnode;
9351 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9352 	if (descriptor == NULL)
9353 		return B_FILE_ERROR;
9354 
9355 	if (descriptor->type != FDTYPE_FILE) {
9356 		put_fd(descriptor);
9357 		return B_BAD_VALUE;
9358 	}
9359 
9360 	struct flock flock;
9361 	flock.l_start = 0;
9362 	flock.l_len = OFF_MAX;
9363 	flock.l_whence = 0;
9364 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9365 
9366 	status_t status;
9367 	if ((operation & LOCK_UN) != 0) {
9368 		if (HAS_FS_CALL(vnode, release_lock))
9369 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9370 		else
9371 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9372 	} else {
9373 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9374 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9375 				(operation & LOCK_NB) == 0);
9376 		} else {
9377 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9378 				(operation & LOCK_NB) == 0);
9379 		}
9380 	}
9381 
9382 	syscall_restart_handle_post(status);
9383 
9384 	put_fd(descriptor);
9385 	return status;
9386 }
9387 
9388 
9389 status_t
9390 _user_lock_node(int fd)
9391 {
9392 	return common_lock_node(fd, false);
9393 }
9394 
9395 
9396 status_t
9397 _user_unlock_node(int fd)
9398 {
9399 	return common_unlock_node(fd, false);
9400 }
9401 
9402 
9403 status_t
9404 _user_preallocate(int fd, off_t offset, off_t length)
9405 {
9406 	return common_preallocate(fd, offset, length, false);
9407 }
9408 
9409 
9410 status_t
9411 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9412 	int perms)
9413 {
9414 	char name[B_FILE_NAME_LENGTH];
9415 	status_t status;
9416 
9417 	if (!IS_USER_ADDRESS(userName))
9418 		return B_BAD_ADDRESS;
9419 
9420 	status = user_copy_name(name, userName, sizeof(name));
9421 	if (status != B_OK)
9422 		return status;
9423 
9424 	return dir_create_entry_ref(device, inode, name, perms, false);
9425 }
9426 
9427 
9428 status_t
9429 _user_create_dir(int fd, const char* userPath, int perms)
9430 {
9431 	KPath pathBuffer;
9432 	if (pathBuffer.InitCheck() != B_OK)
9433 		return B_NO_MEMORY;
9434 
9435 	char* path = pathBuffer.LockBuffer();
9436 
9437 	if (!IS_USER_ADDRESS(userPath))
9438 		return B_BAD_ADDRESS;
9439 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9440 	if (status != B_OK)
9441 		return status;
9442 
9443 	return dir_create(fd, path, perms, false);
9444 }
9445 
9446 
9447 status_t
9448 _user_remove_dir(int fd, const char* userPath)
9449 {
9450 	KPath pathBuffer;
9451 	if (pathBuffer.InitCheck() != B_OK)
9452 		return B_NO_MEMORY;
9453 
9454 	char* path = pathBuffer.LockBuffer();
9455 
9456 	if (userPath != NULL) {
9457 		if (!IS_USER_ADDRESS(userPath))
9458 			return B_BAD_ADDRESS;
9459 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9460 		if (status != B_OK)
9461 			return status;
9462 	}
9463 
9464 	return dir_remove(fd, userPath ? path : NULL, false);
9465 }
9466 
9467 
9468 status_t
9469 _user_read_link(int fd, const char* userPath, char* userBuffer,
9470 	size_t* userBufferSize)
9471 {
9472 	KPath pathBuffer, linkBuffer;
9473 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9474 		return B_NO_MEMORY;
9475 
9476 	size_t bufferSize;
9477 
9478 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9479 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9480 		return B_BAD_ADDRESS;
9481 
9482 	char* path = pathBuffer.LockBuffer();
9483 	char* buffer = linkBuffer.LockBuffer();
9484 
9485 	if (userPath) {
9486 		if (!IS_USER_ADDRESS(userPath))
9487 			return B_BAD_ADDRESS;
9488 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9489 		if (status != B_OK)
9490 			return status;
9491 
9492 		if (bufferSize > B_PATH_NAME_LENGTH)
9493 			bufferSize = B_PATH_NAME_LENGTH;
9494 	}
9495 
9496 	size_t newBufferSize = bufferSize;
9497 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9498 		&newBufferSize, false);
9499 
9500 	// we also update the bufferSize in case of errors
9501 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9502 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9503 		return B_BAD_ADDRESS;
9504 
9505 	if (status != B_OK)
9506 		return status;
9507 
9508 	bufferSize = min_c(newBufferSize, bufferSize);
9509 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9510 		return B_BAD_ADDRESS;
9511 
9512 	return B_OK;
9513 }
9514 
9515 
9516 status_t
9517 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9518 	int mode)
9519 {
9520 	KPath pathBuffer;
9521 	KPath toPathBuffer;
9522 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9523 		return B_NO_MEMORY;
9524 
9525 	char* path = pathBuffer.LockBuffer();
9526 	char* toPath = toPathBuffer.LockBuffer();
9527 
9528 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9529 		return B_BAD_ADDRESS;
9530 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9531 	if (status != B_OK)
9532 		return status;
9533 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9534 	if (status != B_OK)
9535 		return status;
9536 
9537 	return common_create_symlink(fd, path, toPath, mode, false);
9538 }
9539 
9540 
9541 status_t
9542 _user_create_link(int pathFD, const char* userPath, int toFD,
9543 	const char* userToPath, bool traverseLeafLink)
9544 {
9545 	KPath pathBuffer;
9546 	KPath toPathBuffer;
9547 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9548 		return B_NO_MEMORY;
9549 
9550 	char* path = pathBuffer.LockBuffer();
9551 	char* toPath = toPathBuffer.LockBuffer();
9552 
9553 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9554 		return B_BAD_ADDRESS;
9555 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9556 	if (status != B_OK)
9557 		return status;
9558 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9559 	if (status != B_OK)
9560 		return status;
9561 
9562 	status = check_path(toPath);
9563 	if (status != B_OK)
9564 		return status;
9565 
9566 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9567 		false);
9568 }
9569 
9570 
9571 status_t
9572 _user_unlink(int fd, const char* userPath)
9573 {
9574 	KPath pathBuffer;
9575 	if (pathBuffer.InitCheck() != B_OK)
9576 		return B_NO_MEMORY;
9577 
9578 	char* path = pathBuffer.LockBuffer();
9579 
9580 	if (!IS_USER_ADDRESS(userPath))
9581 		return B_BAD_ADDRESS;
9582 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9583 	if (status != B_OK)
9584 		return status;
9585 
9586 	return common_unlink(fd, path, false);
9587 }
9588 
9589 
9590 status_t
9591 _user_rename(int oldFD, const char* userOldPath, int newFD,
9592 	const char* userNewPath)
9593 {
9594 	KPath oldPathBuffer;
9595 	KPath newPathBuffer;
9596 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9597 		return B_NO_MEMORY;
9598 
9599 	char* oldPath = oldPathBuffer.LockBuffer();
9600 	char* newPath = newPathBuffer.LockBuffer();
9601 
9602 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9603 		return B_BAD_ADDRESS;
9604 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9605 	if (status != B_OK)
9606 		return status;
9607 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9608 	if (status != B_OK)
9609 		return status;
9610 
9611 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9612 }
9613 
9614 
9615 status_t
9616 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9617 {
9618 	KPath pathBuffer;
9619 	if (pathBuffer.InitCheck() != B_OK)
9620 		return B_NO_MEMORY;
9621 
9622 	char* path = pathBuffer.LockBuffer();
9623 
9624 	if (!IS_USER_ADDRESS(userPath))
9625 		return B_BAD_ADDRESS;
9626 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9627 	if (status != B_OK)
9628 		return status;
9629 
9630 	// split into directory vnode and filename path
9631 	char filename[B_FILE_NAME_LENGTH];
9632 	struct vnode* dir;
9633 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9634 	if (status != B_OK)
9635 		return status;
9636 
9637 	VNodePutter _(dir);
9638 
9639 	// the underlying FS needs to support creating FIFOs
9640 	if (!HAS_FS_CALL(dir, create_special_node))
9641 		return B_UNSUPPORTED;
9642 
9643 	// create the entry	-- the FIFO sub node is set up automatically
9644 	fs_vnode superVnode;
9645 	ino_t nodeID;
9646 	status = FS_CALL(dir, create_special_node, filename, NULL,
9647 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9648 
9649 	// create_special_node() acquired a reference for us that we don't need.
9650 	if (status == B_OK)
9651 		put_vnode(dir->mount->volume, nodeID);
9652 
9653 	return status;
9654 }
9655 
9656 
9657 status_t
9658 _user_create_pipe(int* userFDs)
9659 {
9660 	// rootfs should support creating FIFOs, but let's be sure
9661 	if (!HAS_FS_CALL(sRoot, create_special_node))
9662 		return B_UNSUPPORTED;
9663 
9664 	// create the node	-- the FIFO sub node is set up automatically
9665 	fs_vnode superVnode;
9666 	ino_t nodeID;
9667 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9668 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9669 	if (status != B_OK)
9670 		return status;
9671 
9672 	// We've got one reference to the node and need another one.
9673 	struct vnode* vnode;
9674 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9675 	if (status != B_OK) {
9676 		// that should not happen
9677 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9678 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9679 		return status;
9680 	}
9681 
9682 	// Everything looks good so far. Open two FDs for reading respectively
9683 	// writing.
9684 	int fds[2];
9685 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9686 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9687 
9688 	FDCloser closer0(fds[0], false);
9689 	FDCloser closer1(fds[1], false);
9690 
9691 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9692 
9693 	// copy FDs to userland
9694 	if (status == B_OK) {
9695 		if (!IS_USER_ADDRESS(userFDs)
9696 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9697 			status = B_BAD_ADDRESS;
9698 		}
9699 	}
9700 
9701 	// keep FDs, if everything went fine
9702 	if (status == B_OK) {
9703 		closer0.Detach();
9704 		closer1.Detach();
9705 	}
9706 
9707 	return status;
9708 }
9709 
9710 
9711 status_t
9712 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9713 {
9714 	KPath pathBuffer;
9715 	if (pathBuffer.InitCheck() != B_OK)
9716 		return B_NO_MEMORY;
9717 
9718 	char* path = pathBuffer.LockBuffer();
9719 
9720 	if (!IS_USER_ADDRESS(userPath))
9721 		return B_BAD_ADDRESS;
9722 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9723 	if (status != B_OK)
9724 		return status;
9725 
9726 	return common_access(fd, path, mode, effectiveUserGroup, false);
9727 }
9728 
9729 
9730 status_t
9731 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9732 	struct stat* userStat, size_t statSize)
9733 {
9734 	struct stat stat = {0};
9735 	status_t status;
9736 
9737 	if (statSize > sizeof(struct stat))
9738 		return B_BAD_VALUE;
9739 
9740 	if (!IS_USER_ADDRESS(userStat))
9741 		return B_BAD_ADDRESS;
9742 
9743 	if (userPath != NULL) {
9744 		// path given: get the stat of the node referred to by (fd, path)
9745 		if (!IS_USER_ADDRESS(userPath))
9746 			return B_BAD_ADDRESS;
9747 
9748 		KPath pathBuffer;
9749 		if (pathBuffer.InitCheck() != B_OK)
9750 			return B_NO_MEMORY;
9751 
9752 		char* path = pathBuffer.LockBuffer();
9753 
9754 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9755 		if (status != B_OK)
9756 			return status;
9757 
9758 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9759 	} else {
9760 		// no path given: get the FD and use the FD operation
9761 		struct file_descriptor* descriptor
9762 			= get_fd(get_current_io_context(false), fd);
9763 		if (descriptor == NULL)
9764 			return B_FILE_ERROR;
9765 
9766 		if (descriptor->ops->fd_read_stat)
9767 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9768 		else
9769 			status = B_UNSUPPORTED;
9770 
9771 		put_fd(descriptor);
9772 	}
9773 
9774 	if (status != B_OK)
9775 		return status;
9776 
9777 	return user_memcpy(userStat, &stat, statSize);
9778 }
9779 
9780 
9781 status_t
9782 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9783 	const struct stat* userStat, size_t statSize, int statMask)
9784 {
9785 	if (statSize > sizeof(struct stat))
9786 		return B_BAD_VALUE;
9787 
9788 	struct stat stat;
9789 
9790 	if (!IS_USER_ADDRESS(userStat)
9791 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9792 		return B_BAD_ADDRESS;
9793 
9794 	// clear additional stat fields
9795 	if (statSize < sizeof(struct stat))
9796 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9797 
9798 	status_t status;
9799 
9800 	if (userPath != NULL) {
9801 		// path given: write the stat of the node referred to by (fd, path)
9802 		if (!IS_USER_ADDRESS(userPath))
9803 			return B_BAD_ADDRESS;
9804 
9805 		KPath pathBuffer;
9806 		if (pathBuffer.InitCheck() != B_OK)
9807 			return B_NO_MEMORY;
9808 
9809 		char* path = pathBuffer.LockBuffer();
9810 
9811 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9812 		if (status != B_OK)
9813 			return status;
9814 
9815 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9816 			statMask, false);
9817 	} else {
9818 		// no path given: get the FD and use the FD operation
9819 		struct file_descriptor* descriptor
9820 			= get_fd(get_current_io_context(false), fd);
9821 		if (descriptor == NULL)
9822 			return B_FILE_ERROR;
9823 
9824 		if (descriptor->ops->fd_write_stat) {
9825 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9826 				statMask);
9827 		} else
9828 			status = B_UNSUPPORTED;
9829 
9830 		put_fd(descriptor);
9831 	}
9832 
9833 	return status;
9834 }
9835 
9836 
9837 int
9838 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9839 {
9840 	KPath pathBuffer;
9841 	if (pathBuffer.InitCheck() != B_OK)
9842 		return B_NO_MEMORY;
9843 
9844 	char* path = pathBuffer.LockBuffer();
9845 
9846 	if (userPath != NULL) {
9847 		if (!IS_USER_ADDRESS(userPath))
9848 			return B_BAD_ADDRESS;
9849 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9850 		if (status != B_OK)
9851 			return status;
9852 	}
9853 
9854 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9855 }
9856 
9857 
9858 ssize_t
9859 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9860 	size_t readBytes)
9861 {
9862 	char attribute[B_FILE_NAME_LENGTH];
9863 
9864 	if (userAttribute == NULL)
9865 		return B_BAD_VALUE;
9866 	if (!IS_USER_ADDRESS(userAttribute))
9867 		return B_BAD_ADDRESS;
9868 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9869 	if (status != B_OK)
9870 		return status;
9871 
9872 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9873 	if (attr < 0)
9874 		return attr;
9875 
9876 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9877 	_user_close(attr);
9878 
9879 	return bytes;
9880 }
9881 
9882 
9883 ssize_t
9884 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9885 	const void* buffer, size_t writeBytes)
9886 {
9887 	char attribute[B_FILE_NAME_LENGTH];
9888 
9889 	if (userAttribute == NULL)
9890 		return B_BAD_VALUE;
9891 	if (!IS_USER_ADDRESS(userAttribute))
9892 		return B_BAD_ADDRESS;
9893 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9894 	if (status != B_OK)
9895 		return status;
9896 
9897 	// Try to support the BeOS typical truncation as well as the position
9898 	// argument
9899 	int attr = attr_create(fd, NULL, attribute, type,
9900 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9901 	if (attr < 0)
9902 		return attr;
9903 
9904 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9905 	_user_close(attr);
9906 
9907 	return bytes;
9908 }
9909 
9910 
9911 status_t
9912 _user_stat_attr(int fd, const char* userAttribute,
9913 	struct attr_info* userAttrInfo)
9914 {
9915 	char attribute[B_FILE_NAME_LENGTH];
9916 
9917 	if (userAttribute == NULL || userAttrInfo == NULL)
9918 		return B_BAD_VALUE;
9919 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9920 		return B_BAD_ADDRESS;
9921 	status_t status = user_copy_name(attribute, userAttribute,
9922 		sizeof(attribute));
9923 	if (status != B_OK)
9924 		return status;
9925 
9926 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9927 	if (attr < 0)
9928 		return attr;
9929 
9930 	struct file_descriptor* descriptor
9931 		= get_fd(get_current_io_context(false), attr);
9932 	if (descriptor == NULL) {
9933 		_user_close(attr);
9934 		return B_FILE_ERROR;
9935 	}
9936 
9937 	struct stat stat;
9938 	if (descriptor->ops->fd_read_stat)
9939 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9940 	else
9941 		status = B_UNSUPPORTED;
9942 
9943 	put_fd(descriptor);
9944 	_user_close(attr);
9945 
9946 	if (status == B_OK) {
9947 		attr_info info;
9948 		info.type = stat.st_type;
9949 		info.size = stat.st_size;
9950 
9951 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9952 			return B_BAD_ADDRESS;
9953 	}
9954 
9955 	return status;
9956 }
9957 
9958 
9959 int
9960 _user_open_attr(int fd, const char* userPath, const char* userName,
9961 	uint32 type, int openMode)
9962 {
9963 	char name[B_FILE_NAME_LENGTH];
9964 
9965 	if (!IS_USER_ADDRESS(userName))
9966 		return B_BAD_ADDRESS;
9967 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9968 	if (status != B_OK)
9969 		return status;
9970 
9971 	KPath pathBuffer;
9972 	if (pathBuffer.InitCheck() != B_OK)
9973 		return B_NO_MEMORY;
9974 
9975 	char* path = pathBuffer.LockBuffer();
9976 
9977 	if (userPath != NULL) {
9978 		if (!IS_USER_ADDRESS(userPath))
9979 			return B_BAD_ADDRESS;
9980 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9981 		if (status != B_OK)
9982 			return status;
9983 	}
9984 
9985 	if ((openMode & O_CREAT) != 0) {
9986 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9987 			false);
9988 	}
9989 
9990 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9991 }
9992 
9993 
9994 status_t
9995 _user_remove_attr(int fd, const char* userName)
9996 {
9997 	char name[B_FILE_NAME_LENGTH];
9998 
9999 	if (!IS_USER_ADDRESS(userName))
10000 		return B_BAD_ADDRESS;
10001 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10002 	if (status != B_OK)
10003 		return status;
10004 
10005 	return attr_remove(fd, name, false);
10006 }
10007 
10008 
10009 status_t
10010 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
10011 	const char* userToName)
10012 {
10013 	if (!IS_USER_ADDRESS(userFromName)
10014 		|| !IS_USER_ADDRESS(userToName))
10015 		return B_BAD_ADDRESS;
10016 
10017 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
10018 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
10019 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
10020 		return B_NO_MEMORY;
10021 
10022 	char* fromName = fromNameBuffer.LockBuffer();
10023 	char* toName = toNameBuffer.LockBuffer();
10024 
10025 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
10026 	if (status != B_OK)
10027 		return status;
10028 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
10029 	if (status != B_OK)
10030 		return status;
10031 
10032 	return attr_rename(fromFile, fromName, toFile, toName, false);
10033 }
10034 
10035 
10036 int
10037 _user_open_index_dir(dev_t device)
10038 {
10039 	return index_dir_open(device, false);
10040 }
10041 
10042 
10043 status_t
10044 _user_create_index(dev_t device, const char* userName, uint32 type,
10045 	uint32 flags)
10046 {
10047 	char name[B_FILE_NAME_LENGTH];
10048 
10049 	if (!IS_USER_ADDRESS(userName))
10050 		return B_BAD_ADDRESS;
10051 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10052 	if (status != B_OK)
10053 		return status;
10054 
10055 	return index_create(device, name, type, flags, false);
10056 }
10057 
10058 
10059 status_t
10060 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
10061 {
10062 	char name[B_FILE_NAME_LENGTH];
10063 	struct stat stat = {0};
10064 	status_t status;
10065 
10066 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
10067 		return B_BAD_ADDRESS;
10068 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10069 	if (status != B_OK)
10070 		return status;
10071 
10072 	status = index_name_read_stat(device, name, &stat, false);
10073 	if (status == B_OK) {
10074 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
10075 			return B_BAD_ADDRESS;
10076 	}
10077 
10078 	return status;
10079 }
10080 
10081 
10082 status_t
10083 _user_remove_index(dev_t device, const char* userName)
10084 {
10085 	char name[B_FILE_NAME_LENGTH];
10086 
10087 	if (!IS_USER_ADDRESS(userName))
10088 		return B_BAD_ADDRESS;
10089 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10090 	if (status != B_OK)
10091 		return status;
10092 
10093 	return index_remove(device, name, false);
10094 }
10095 
10096 
10097 status_t
10098 _user_getcwd(char* userBuffer, size_t size)
10099 {
10100 	if (size == 0)
10101 		return B_BAD_VALUE;
10102 	if (!IS_USER_ADDRESS(userBuffer))
10103 		return B_BAD_ADDRESS;
10104 
10105 	if (size > kMaxPathLength)
10106 		size = kMaxPathLength;
10107 
10108 	KPath pathBuffer(size);
10109 	if (pathBuffer.InitCheck() != B_OK)
10110 		return B_NO_MEMORY;
10111 
10112 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10113 
10114 	char* path = pathBuffer.LockBuffer();
10115 
10116 	status_t status = get_cwd(path, size, false);
10117 	if (status != B_OK)
10118 		return status;
10119 
10120 	// Copy back the result
10121 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10122 		return B_BAD_ADDRESS;
10123 
10124 	return status;
10125 }
10126 
10127 
10128 status_t
10129 _user_setcwd(int fd, const char* userPath)
10130 {
10131 	TRACE(("user_setcwd: path = %p\n", userPath));
10132 
10133 	KPath pathBuffer;
10134 	if (pathBuffer.InitCheck() != B_OK)
10135 		return B_NO_MEMORY;
10136 
10137 	char* path = pathBuffer.LockBuffer();
10138 
10139 	if (userPath != NULL) {
10140 		if (!IS_USER_ADDRESS(userPath))
10141 			return B_BAD_ADDRESS;
10142 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10143 		if (status != B_OK)
10144 			return status;
10145 	}
10146 
10147 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10148 }
10149 
10150 
10151 status_t
10152 _user_change_root(const char* userPath)
10153 {
10154 	// only root is allowed to chroot()
10155 	if (geteuid() != 0)
10156 		return B_NOT_ALLOWED;
10157 
10158 	// alloc path buffer
10159 	KPath pathBuffer;
10160 	if (pathBuffer.InitCheck() != B_OK)
10161 		return B_NO_MEMORY;
10162 
10163 	// copy userland path to kernel
10164 	char* path = pathBuffer.LockBuffer();
10165 	if (userPath != NULL) {
10166 		if (!IS_USER_ADDRESS(userPath))
10167 			return B_BAD_ADDRESS;
10168 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10169 		if (status != B_OK)
10170 			return status;
10171 	}
10172 
10173 	// get the vnode
10174 	struct vnode* vnode;
10175 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
10176 	if (status != B_OK)
10177 		return status;
10178 
10179 	// set the new root
10180 	struct io_context* context = get_current_io_context(false);
10181 	mutex_lock(&sIOContextRootLock);
10182 	struct vnode* oldRoot = context->root;
10183 	context->root = vnode;
10184 	mutex_unlock(&sIOContextRootLock);
10185 
10186 	put_vnode(oldRoot);
10187 
10188 	return B_OK;
10189 }
10190 
10191 
10192 int
10193 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10194 	uint32 flags, port_id port, int32 token)
10195 {
10196 	if (device < 0 || userQuery == NULL || queryLength == 0)
10197 		return B_BAD_VALUE;
10198 
10199 	if (!IS_USER_ADDRESS(userQuery))
10200 		return B_BAD_ADDRESS;
10201 
10202 	// this is a safety restriction
10203 	if (queryLength >= 65536)
10204 		return B_NAME_TOO_LONG;
10205 
10206 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10207 	if (!query.IsValid())
10208 		return B_NO_MEMORY;
10209 
10210 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10211 		return B_BAD_ADDRESS;
10212 
10213 	return query_open(device, query, flags, port, token, false);
10214 }
10215 
10216 
10217 #include "vfs_request_io.cpp"
10218