xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 52c4471a3024d2eb81fe88e2c3982b9f8daa5e56)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <block_cache.h>
36 #include <boot/kernel_args.h>
37 #include <debug_heap.h>
38 #include <disk_device_manager/KDiskDevice.h>
39 #include <disk_device_manager/KDiskDeviceManager.h>
40 #include <disk_device_manager/KDiskDeviceUtils.h>
41 #include <disk_device_manager/KDiskSystem.h>
42 #include <fd.h>
43 #include <file_cache.h>
44 #include <fs/node_monitor.h>
45 #include <KPath.h>
46 #include <lock.h>
47 #include <low_resource_manager.h>
48 #include <slab/Slab.h>
49 #include <StackOrHeapArray.h>
50 #include <syscalls.h>
51 #include <syscall_restart.h>
52 #include <tracing.h>
53 #include <util/atomic.h>
54 #include <util/AutoLock.h>
55 #include <util/ThreadAutoLock.h>
56 #include <util/DoublyLinkedList.h>
57 #include <vfs.h>
58 #include <vm/vm.h>
59 #include <vm/VMCache.h>
60 #include <wait_for_objects.h>
61 
62 #include "EntryCache.h"
63 #include "fifo.h"
64 #include "IORequest.h"
65 #include "unused_vnodes.h"
66 #include "vfs_tracing.h"
67 #include "Vnode.h"
68 #include "../cache/vnode_store.h"
69 
70 
71 //#define TRACE_VFS
72 #ifdef TRACE_VFS
73 #	define TRACE(x) dprintf x
74 #	define FUNCTION(x) dprintf x
75 #else
76 #	define TRACE(x) ;
77 #	define FUNCTION(x) ;
78 #endif
79 
80 #define ADD_DEBUGGER_COMMANDS
81 
82 
83 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
84 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
85 
86 #if KDEBUG
87 #	define FS_CALL(vnode, op, params...) \
88 		( HAS_FS_CALL(vnode, op) ? \
89 			vnode->ops->op(vnode->mount->volume, vnode, params) \
90 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
91 #	define FS_CALL_NO_PARAMS(vnode, op) \
92 		( HAS_FS_CALL(vnode, op) ? \
93 			vnode->ops->op(vnode->mount->volume, vnode) \
94 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
95 #	define FS_MOUNT_CALL(mount, op, params...) \
96 		( HAS_FS_MOUNT_CALL(mount, op) ? \
97 			mount->volume->ops->op(mount->volume, params) \
98 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
99 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
100 		( HAS_FS_MOUNT_CALL(mount, op) ? \
101 			mount->volume->ops->op(mount->volume) \
102 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
103 #else
104 #	define FS_CALL(vnode, op, params...) \
105 			vnode->ops->op(vnode->mount->volume, vnode, params)
106 #	define FS_CALL_NO_PARAMS(vnode, op) \
107 			vnode->ops->op(vnode->mount->volume, vnode)
108 #	define FS_MOUNT_CALL(mount, op, params...) \
109 			mount->volume->ops->op(mount->volume, params)
110 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
111 			mount->volume->ops->op(mount->volume)
112 #endif
113 
114 
115 const static size_t kMaxPathLength = 65536;
116 	// The absolute maximum path length (for getcwd() - this is not depending
117 	// on PATH_MAX
118 
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		mutex_init(&lock, "mount lock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		mutex_destroy(&lock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	mutex			lock;	// guards the vnodes list
165 	struct vnode*	root_vnode;
166 	struct vnode*	covers_vnode;	// immutable
167 	KPartition*		partition;
168 	VnodeList		vnodes;
169 	EntryCache		entry_cache;
170 	bool			unmounting;
171 	bool			owns_file_device;
172 };
173 
174 
175 namespace {
176 
177 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
178 	list_link		link;
179 	void*			bound_to;
180 	team_id			team;
181 	pid_t			session;
182 	off_t			start;
183 	off_t			end;
184 	bool			shared;
185 };
186 
187 typedef DoublyLinkedList<advisory_lock> LockList;
188 
189 } // namespace
190 
191 
192 struct advisory_locking {
193 	sem_id			lock;
194 	sem_id			wait_sem;
195 	LockList		locks;
196 
197 	advisory_locking()
198 		:
199 		lock(-1),
200 		wait_sem(-1)
201 	{
202 	}
203 
204 	~advisory_locking()
205 	{
206 		if (lock >= 0)
207 			delete_sem(lock);
208 		if (wait_sem >= 0)
209 			delete_sem(wait_sem);
210 	}
211 };
212 
213 /*!	\brief Guards sMountsTable.
214 
215 	The holder is allowed to read/write access the sMountsTable.
216 	Manipulation of the fs_mount structures themselves
217 	(and their destruction) requires different locks though.
218 */
219 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
220 
221 /*!	\brief Guards mount/unmount operations.
222 
223 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
224 	That is locking the lock ensures that no FS is mounted/unmounted. In
225 	particular this means that
226 	- sMountsTable will not be modified,
227 	- the fields immutable after initialization of the fs_mount structures in
228 	  sMountsTable will not be modified,
229 
230 	The thread trying to lock the lock must not hold sVnodeLock or
231 	sMountLock.
232 */
233 static recursive_lock sMountOpLock;
234 
235 /*!	\brief Guards sVnodeTable.
236 
237 	The holder is allowed read/write access to sVnodeTable and to
238 	any unbusy vnode in that table, save to the immutable fields (device, id,
239 	private_node, mount) to which only read-only access is allowed.
240 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
241 	well as the busy, removed, unused flags, and the vnode's type can also be
242 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
243 	locked. Write access to covered_by and covers requires to write lock
244 	sVnodeLock.
245 
246 	The thread trying to acquire the lock must not hold sMountLock.
247 	You must not hold this lock when calling create_sem(), as this might call
248 	vfs_free_unused_vnodes() and thus cause a deadlock.
249 */
250 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
251 
252 /*!	\brief Guards io_context::root.
253 
254 	Must be held when setting or getting the io_context::root field.
255 	The only operation allowed while holding this lock besides getting or
256 	setting the field is inc_vnode_ref_count() on io_context::root.
257 */
258 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
259 
260 
261 namespace {
262 
263 struct vnode_hash_key {
264 	dev_t	device;
265 	ino_t	vnode;
266 };
267 
268 struct VnodeHash {
269 	typedef vnode_hash_key	KeyType;
270 	typedef	struct vnode	ValueType;
271 
272 #define VHASH(mountid, vnodeid) \
273 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
274 
275 	size_t HashKey(KeyType key) const
276 	{
277 		return VHASH(key.device, key.vnode);
278 	}
279 
280 	size_t Hash(ValueType* vnode) const
281 	{
282 		return VHASH(vnode->device, vnode->id);
283 	}
284 
285 #undef VHASH
286 
287 	bool Compare(KeyType key, ValueType* vnode) const
288 	{
289 		return vnode->device == key.device && vnode->id == key.vnode;
290 	}
291 
292 	ValueType*& GetLink(ValueType* value) const
293 	{
294 		return value->next;
295 	}
296 };
297 
298 typedef BOpenHashTable<VnodeHash> VnodeTable;
299 
300 
301 struct MountHash {
302 	typedef dev_t			KeyType;
303 	typedef	struct fs_mount	ValueType;
304 
305 	size_t HashKey(KeyType key) const
306 	{
307 		return key;
308 	}
309 
310 	size_t Hash(ValueType* mount) const
311 	{
312 		return mount->id;
313 	}
314 
315 	bool Compare(KeyType key, ValueType* mount) const
316 	{
317 		return mount->id == key;
318 	}
319 
320 	ValueType*& GetLink(ValueType* value) const
321 	{
322 		return value->next;
323 	}
324 };
325 
326 typedef BOpenHashTable<MountHash> MountTable;
327 
328 } // namespace
329 
330 
331 object_cache* sPathNameCache;
332 object_cache* sVnodeCache;
333 object_cache* sFileDescriptorCache;
334 
335 #define VNODE_HASH_TABLE_SIZE 1024
336 static VnodeTable* sVnodeTable;
337 static struct vnode* sRoot;
338 
339 #define MOUNTS_HASH_TABLE_SIZE 16
340 static MountTable* sMountsTable;
341 static dev_t sNextMountID = 1;
342 
343 #define MAX_TEMP_IO_VECS 8
344 
345 // How long to wait for busy vnodes (10s)
346 #define BUSY_VNODE_RETRIES 2000
347 #define BUSY_VNODE_DELAY 5000
348 
349 mode_t __gUmask = 022;
350 
351 /* function declarations */
352 
353 static void free_unused_vnodes();
354 
355 // file descriptor operation prototypes
356 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
357 	void* buffer, size_t* _bytes);
358 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
359 	const void* buffer, size_t* _bytes);
360 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
361 	int seekType);
362 static void file_free_fd(struct file_descriptor* descriptor);
363 static status_t file_close(struct file_descriptor* descriptor);
364 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
365 	struct selectsync* sync);
366 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
367 	struct selectsync* sync);
368 static status_t dir_read(struct io_context* context,
369 	struct file_descriptor* descriptor, struct dirent* buffer,
370 	size_t bufferSize, uint32* _count);
371 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
372 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
373 static status_t dir_rewind(struct file_descriptor* descriptor);
374 static void dir_free_fd(struct file_descriptor* descriptor);
375 static status_t dir_close(struct file_descriptor* descriptor);
376 static status_t attr_dir_read(struct io_context* context,
377 	struct file_descriptor* descriptor, struct dirent* buffer,
378 	size_t bufferSize, uint32* _count);
379 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
380 static void attr_dir_free_fd(struct file_descriptor* descriptor);
381 static status_t attr_dir_close(struct file_descriptor* descriptor);
382 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
383 	void* buffer, size_t* _bytes);
384 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
385 	const void* buffer, size_t* _bytes);
386 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
387 	int seekType);
388 static void attr_free_fd(struct file_descriptor* descriptor);
389 static status_t attr_close(struct file_descriptor* descriptor);
390 static status_t attr_read_stat(struct file_descriptor* descriptor,
391 	struct stat* statData);
392 static status_t attr_write_stat(struct file_descriptor* descriptor,
393 	const struct stat* stat, int statMask);
394 static status_t index_dir_read(struct io_context* context,
395 	struct file_descriptor* descriptor, struct dirent* buffer,
396 	size_t bufferSize, uint32* _count);
397 static status_t index_dir_rewind(struct file_descriptor* descriptor);
398 static void index_dir_free_fd(struct file_descriptor* descriptor);
399 static status_t index_dir_close(struct file_descriptor* descriptor);
400 static status_t query_read(struct io_context* context,
401 	struct file_descriptor* descriptor, struct dirent* buffer,
402 	size_t bufferSize, uint32* _count);
403 static status_t query_rewind(struct file_descriptor* descriptor);
404 static void query_free_fd(struct file_descriptor* descriptor);
405 static status_t query_close(struct file_descriptor* descriptor);
406 
407 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
408 	void* buffer, size_t length);
409 static status_t common_read_stat(struct file_descriptor* descriptor,
410 	struct stat* statData);
411 static status_t common_write_stat(struct file_descriptor* descriptor,
412 	const struct stat* statData, int statMask);
413 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
414 	struct stat* stat, bool kernel);
415 
416 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
417 	bool traverseLeafLink, int count, bool kernel,
418 	struct vnode** _vnode, ino_t* _parentID);
419 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
420 	size_t bufferSize, bool kernel);
421 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
422 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
423 static void inc_vnode_ref_count(struct vnode* vnode);
424 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
425 	bool reenter);
426 static inline void put_vnode(struct vnode* vnode);
427 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
428 	bool kernel);
429 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
430 
431 
432 static struct fd_ops sFileOps = {
433 	file_read,
434 	file_write,
435 	file_seek,
436 	common_ioctl,
437 	NULL,		// set_flags
438 	file_select,
439 	file_deselect,
440 	NULL,		// read_dir()
441 	NULL,		// rewind_dir()
442 	common_read_stat,
443 	common_write_stat,
444 	file_close,
445 	file_free_fd
446 };
447 
448 static struct fd_ops sDirectoryOps = {
449 	NULL,		// read()
450 	NULL,		// write()
451 	NULL,		// seek()
452 	common_ioctl,
453 	NULL,		// set_flags
454 	NULL,		// select()
455 	NULL,		// deselect()
456 	dir_read,
457 	dir_rewind,
458 	common_read_stat,
459 	common_write_stat,
460 	dir_close,
461 	dir_free_fd
462 };
463 
464 static struct fd_ops sAttributeDirectoryOps = {
465 	NULL,		// read()
466 	NULL,		// write()
467 	NULL,		// seek()
468 	common_ioctl,
469 	NULL,		// set_flags
470 	NULL,		// select()
471 	NULL,		// deselect()
472 	attr_dir_read,
473 	attr_dir_rewind,
474 	common_read_stat,
475 	common_write_stat,
476 	attr_dir_close,
477 	attr_dir_free_fd
478 };
479 
480 static struct fd_ops sAttributeOps = {
481 	attr_read,
482 	attr_write,
483 	attr_seek,
484 	common_ioctl,
485 	NULL,		// set_flags
486 	NULL,		// select()
487 	NULL,		// deselect()
488 	NULL,		// read_dir()
489 	NULL,		// rewind_dir()
490 	attr_read_stat,
491 	attr_write_stat,
492 	attr_close,
493 	attr_free_fd
494 };
495 
496 static struct fd_ops sIndexDirectoryOps = {
497 	NULL,		// read()
498 	NULL,		// write()
499 	NULL,		// seek()
500 	NULL,		// ioctl()
501 	NULL,		// set_flags
502 	NULL,		// select()
503 	NULL,		// deselect()
504 	index_dir_read,
505 	index_dir_rewind,
506 	NULL,		// read_stat()
507 	NULL,		// write_stat()
508 	index_dir_close,
509 	index_dir_free_fd
510 };
511 
512 #if 0
513 static struct fd_ops sIndexOps = {
514 	NULL,		// read()
515 	NULL,		// write()
516 	NULL,		// seek()
517 	NULL,		// ioctl()
518 	NULL,		// set_flags
519 	NULL,		// select()
520 	NULL,		// deselect()
521 	NULL,		// dir_read()
522 	NULL,		// dir_rewind()
523 	index_read_stat,	// read_stat()
524 	NULL,		// write_stat()
525 	NULL,		// dir_close()
526 	NULL		// free_fd()
527 };
528 #endif
529 
530 static struct fd_ops sQueryOps = {
531 	NULL,		// read()
532 	NULL,		// write()
533 	NULL,		// seek()
534 	NULL,		// ioctl()
535 	NULL,		// set_flags
536 	NULL,		// select()
537 	NULL,		// deselect()
538 	query_read,
539 	query_rewind,
540 	NULL,		// read_stat()
541 	NULL,		// write_stat()
542 	query_close,
543 	query_free_fd
544 };
545 
546 
547 namespace {
548 
549 class VNodePutter {
550 public:
551 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
552 
553 	~VNodePutter()
554 	{
555 		Put();
556 	}
557 
558 	void SetTo(struct vnode* vnode)
559 	{
560 		Put();
561 		fVNode = vnode;
562 	}
563 
564 	void Put()
565 	{
566 		if (fVNode) {
567 			put_vnode(fVNode);
568 			fVNode = NULL;
569 		}
570 	}
571 
572 	struct vnode* Detach()
573 	{
574 		struct vnode* vnode = fVNode;
575 		fVNode = NULL;
576 		return vnode;
577 	}
578 
579 private:
580 	struct vnode* fVNode;
581 };
582 
583 
584 class FDCloser {
585 public:
586 	FDCloser() : fFD(-1), fKernel(true) {}
587 
588 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
589 
590 	~FDCloser()
591 	{
592 		Close();
593 	}
594 
595 	void SetTo(int fd, bool kernel)
596 	{
597 		Close();
598 		fFD = fd;
599 		fKernel = kernel;
600 	}
601 
602 	void Close()
603 	{
604 		if (fFD >= 0) {
605 			if (fKernel)
606 				_kern_close(fFD);
607 			else
608 				_user_close(fFD);
609 			fFD = -1;
610 		}
611 	}
612 
613 	int Detach()
614 	{
615 		int fd = fFD;
616 		fFD = -1;
617 		return fd;
618 	}
619 
620 private:
621 	int		fFD;
622 	bool	fKernel;
623 };
624 
625 } // namespace
626 
627 
628 #if VFS_PAGES_IO_TRACING
629 
630 namespace VFSPagesIOTracing {
631 
632 class PagesIOTraceEntry : public AbstractTraceEntry {
633 protected:
634 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
635 		const generic_io_vec* vecs, uint32 count, uint32 flags,
636 		generic_size_t bytesRequested, status_t status,
637 		generic_size_t bytesTransferred)
638 		:
639 		fVnode(vnode),
640 		fMountID(vnode->mount->id),
641 		fNodeID(vnode->id),
642 		fCookie(cookie),
643 		fPos(pos),
644 		fCount(count),
645 		fFlags(flags),
646 		fBytesRequested(bytesRequested),
647 		fStatus(status),
648 		fBytesTransferred(bytesTransferred)
649 	{
650 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
651 			sizeof(generic_io_vec) * count, false);
652 	}
653 
654 	void AddDump(TraceOutput& out, const char* mode)
655 	{
656 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
657 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
658 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
659 			(uint64)fBytesRequested);
660 
661 		if (fVecs != NULL) {
662 			for (uint32 i = 0; i < fCount; i++) {
663 				if (i > 0)
664 					out.Print(", ");
665 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
666 					(uint64)fVecs[i].length);
667 			}
668 		}
669 
670 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
671 			"transferred: %" B_PRIu64, fFlags, fStatus,
672 			(uint64)fBytesTransferred);
673 	}
674 
675 protected:
676 	struct vnode*	fVnode;
677 	dev_t			fMountID;
678 	ino_t			fNodeID;
679 	void*			fCookie;
680 	off_t			fPos;
681 	generic_io_vec*	fVecs;
682 	uint32			fCount;
683 	uint32			fFlags;
684 	generic_size_t	fBytesRequested;
685 	status_t		fStatus;
686 	generic_size_t	fBytesTransferred;
687 };
688 
689 
690 class ReadPages : public PagesIOTraceEntry {
691 public:
692 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
693 		const generic_io_vec* vecs, uint32 count, uint32 flags,
694 		generic_size_t bytesRequested, status_t status,
695 		generic_size_t bytesTransferred)
696 		:
697 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
698 			bytesRequested, status, bytesTransferred)
699 	{
700 		Initialized();
701 	}
702 
703 	virtual void AddDump(TraceOutput& out)
704 	{
705 		PagesIOTraceEntry::AddDump(out, "read");
706 	}
707 };
708 
709 
710 class WritePages : public PagesIOTraceEntry {
711 public:
712 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
713 		const generic_io_vec* vecs, uint32 count, uint32 flags,
714 		generic_size_t bytesRequested, status_t status,
715 		generic_size_t bytesTransferred)
716 		:
717 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
718 			bytesRequested, status, bytesTransferred)
719 	{
720 		Initialized();
721 	}
722 
723 	virtual void AddDump(TraceOutput& out)
724 	{
725 		PagesIOTraceEntry::AddDump(out, "write");
726 	}
727 };
728 
729 }	// namespace VFSPagesIOTracing
730 
731 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
732 #else
733 #	define TPIO(x) ;
734 #endif	// VFS_PAGES_IO_TRACING
735 
736 
737 /*! Finds the mounted device (the fs_mount structure) with the given ID.
738 	Note, you must hold the sMountLock lock when you call this function.
739 */
740 static struct fs_mount*
741 find_mount(dev_t id)
742 {
743 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
744 
745 	return sMountsTable->Lookup(id);
746 }
747 
748 
749 static status_t
750 get_mount(dev_t id, struct fs_mount** _mount)
751 {
752 	struct fs_mount* mount;
753 
754 	ReadLocker nodeLocker(sVnodeLock);
755 	ReadLocker mountLocker(sMountLock);
756 
757 	mount = find_mount(id);
758 	if (mount == NULL)
759 		return B_BAD_VALUE;
760 
761 	struct vnode* rootNode = mount->root_vnode;
762 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
763 		|| rootNode->ref_count == 0) {
764 		// might have been called during a mount/unmount operation
765 		return B_BUSY;
766 	}
767 
768 	inc_vnode_ref_count(rootNode);
769 	*_mount = mount;
770 	return B_OK;
771 }
772 
773 
774 static void
775 put_mount(struct fs_mount* mount)
776 {
777 	if (mount)
778 		put_vnode(mount->root_vnode);
779 }
780 
781 
782 /*!	Tries to open the specified file system module.
783 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
784 	Returns a pointer to file system module interface, or NULL if it
785 	could not open the module.
786 */
787 static file_system_module_info*
788 get_file_system(const char* fsName)
789 {
790 	char name[B_FILE_NAME_LENGTH];
791 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
792 		// construct module name if we didn't get one
793 		// (we currently support only one API)
794 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
795 		fsName = NULL;
796 	}
797 
798 	file_system_module_info* info;
799 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
800 		return NULL;
801 
802 	return info;
803 }
804 
805 
806 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
807 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
808 	The name is allocated for you, and you have to free() it when you're
809 	done with it.
810 	Returns NULL if the required memory is not available.
811 */
812 static char*
813 get_file_system_name(const char* fsName)
814 {
815 	const size_t length = strlen("file_systems/");
816 
817 	if (strncmp(fsName, "file_systems/", length)) {
818 		// the name already seems to be the module's file name
819 		return strdup(fsName);
820 	}
821 
822 	fsName += length;
823 	const char* end = strchr(fsName, '/');
824 	if (end == NULL) {
825 		// this doesn't seem to be a valid name, but well...
826 		return strdup(fsName);
827 	}
828 
829 	// cut off the trailing /v1
830 
831 	char* name = (char*)malloc(end + 1 - fsName);
832 	if (name == NULL)
833 		return NULL;
834 
835 	strlcpy(name, fsName, end + 1 - fsName);
836 	return name;
837 }
838 
839 
840 /*!	Accepts a list of file system names separated by a colon, one for each
841 	layer and returns the file system name for the specified layer.
842 	The name is allocated for you, and you have to free() it when you're
843 	done with it.
844 	Returns NULL if the required memory is not available or if there is no
845 	name for the specified layer.
846 */
847 static char*
848 get_file_system_name_for_layer(const char* fsNames, int32 layer)
849 {
850 	while (layer >= 0) {
851 		const char* end = strchr(fsNames, ':');
852 		if (end == NULL) {
853 			if (layer == 0)
854 				return strdup(fsNames);
855 			return NULL;
856 		}
857 
858 		if (layer == 0) {
859 			size_t length = end - fsNames + 1;
860 			char* result = (char*)malloc(length);
861 			strlcpy(result, fsNames, length);
862 			return result;
863 		}
864 
865 		fsNames = end + 1;
866 		layer--;
867 	}
868 
869 	return NULL;
870 }
871 
872 
873 static void
874 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
875 {
876 	MutexLocker _(mount->lock);
877 	mount->vnodes.Add(vnode);
878 }
879 
880 
881 static void
882 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
883 {
884 	MutexLocker _(mount->lock);
885 	mount->vnodes.Remove(vnode);
886 }
887 
888 
889 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
890 
891 	The caller must hold the sVnodeLock (read lock at least).
892 
893 	\param mountID the mount ID.
894 	\param vnodeID the node ID.
895 
896 	\return The vnode structure, if it was found in the hash table, \c NULL
897 			otherwise.
898 */
899 static struct vnode*
900 lookup_vnode(dev_t mountID, ino_t vnodeID)
901 {
902 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
903 
904 	struct vnode_hash_key key;
905 
906 	key.device = mountID;
907 	key.vnode = vnodeID;
908 
909 	return sVnodeTable->Lookup(key);
910 }
911 
912 
913 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
914 
915 	This will also wait for BUSY_VNODE_DELAY before returning if one should
916 	still wait for the vnode becoming unbusy.
917 
918 	\return \c true if one should retry, \c false if not.
919 */
920 static bool
921 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
922 {
923 	if (--tries < 0) {
924 		// vnode doesn't seem to become unbusy
925 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
926 			" is not becoming unbusy!\n", mountID, vnodeID);
927 		return false;
928 	}
929 	snooze(BUSY_VNODE_DELAY);
930 	return true;
931 }
932 
933 
934 /*!	Creates a new vnode with the given mount and node ID.
935 	If the node already exists, it is returned instead and no new node is
936 	created. In either case -- but not, if an error occurs -- the function write
937 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
938 	error the lock is not held on return.
939 
940 	\param mountID The mount ID.
941 	\param vnodeID The vnode ID.
942 	\param _vnode Will be set to the new vnode on success.
943 	\param _nodeCreated Will be set to \c true when the returned vnode has
944 		been newly created, \c false when it already existed. Will not be
945 		changed on error.
946 	\return \c B_OK, when the vnode was successfully created and inserted or
947 		a node with the given ID was found, \c B_NO_MEMORY or
948 		\c B_ENTRY_NOT_FOUND on error.
949 */
950 static status_t
951 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
952 	bool& _nodeCreated)
953 {
954 	FUNCTION(("create_new_vnode_and_lock()\n"));
955 
956 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
957 	if (vnode == NULL)
958 		return B_NO_MEMORY;
959 
960 	// initialize basic values
961 	memset(vnode, 0, sizeof(struct vnode));
962 	vnode->device = mountID;
963 	vnode->id = vnodeID;
964 	vnode->ref_count = 1;
965 	vnode->SetBusy(true);
966 
967 	// look up the node -- it might have been added by someone else in the
968 	// meantime
969 	rw_lock_write_lock(&sVnodeLock);
970 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
971 	if (existingVnode != NULL) {
972 		object_cache_free(sVnodeCache, vnode, 0);
973 		_vnode = existingVnode;
974 		_nodeCreated = false;
975 		return B_OK;
976 	}
977 
978 	// get the mount structure
979 	rw_lock_read_lock(&sMountLock);
980 	vnode->mount = find_mount(mountID);
981 	if (!vnode->mount || vnode->mount->unmounting) {
982 		rw_lock_read_unlock(&sMountLock);
983 		rw_lock_write_unlock(&sVnodeLock);
984 		object_cache_free(sVnodeCache, vnode, 0);
985 		return B_ENTRY_NOT_FOUND;
986 	}
987 
988 	// add the vnode to the mount's node list and the hash table
989 	sVnodeTable->Insert(vnode);
990 	add_vnode_to_mount_list(vnode, vnode->mount);
991 
992 	rw_lock_read_unlock(&sMountLock);
993 
994 	_vnode = vnode;
995 	_nodeCreated = true;
996 
997 	// keep the vnode lock locked
998 	return B_OK;
999 }
1000 
1001 
1002 /*!	Frees the vnode and all resources it has acquired, and removes
1003 	it from the vnode hash as well as from its mount structure.
1004 	Will also make sure that any cache modifications are written back.
1005 */
1006 static void
1007 free_vnode(struct vnode* vnode, bool reenter)
1008 {
1009 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1010 		vnode);
1011 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1012 
1013 	// write back any changes in this vnode's cache -- but only
1014 	// if the vnode won't be deleted, in which case the changes
1015 	// will be discarded
1016 
1017 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1018 		FS_CALL_NO_PARAMS(vnode, fsync);
1019 
1020 	// Note: If this vnode has a cache attached, there will still be two
1021 	// references to that cache at this point. The last one belongs to the vnode
1022 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1023 	// cache. Each but the last reference to a cache also includes a reference
1024 	// to the vnode. The file cache, however, released its reference (cf.
1025 	// file_cache_create()), so that this vnode's ref count has the chance to
1026 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1027 	// cache reference to be released, which will also release a (no longer
1028 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1029 	// count, so that it will neither become negative nor 0.
1030 	vnode->ref_count = 2;
1031 
1032 	if (!vnode->IsUnpublished()) {
1033 		if (vnode->IsRemoved())
1034 			FS_CALL(vnode, remove_vnode, reenter);
1035 		else
1036 			FS_CALL(vnode, put_vnode, reenter);
1037 	}
1038 
1039 	// If the vnode has a VMCache attached, make sure that it won't try to get
1040 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1041 	// long as the vnode is busy and in the hash, that won't happen, but as
1042 	// soon as we've removed it from the hash, it could reload the vnode -- with
1043 	// a new cache attached!
1044 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1045 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1046 
1047 	// The file system has removed the resources of the vnode now, so we can
1048 	// make it available again (by removing the busy vnode from the hash).
1049 	rw_lock_write_lock(&sVnodeLock);
1050 	sVnodeTable->Remove(vnode);
1051 	rw_lock_write_unlock(&sVnodeLock);
1052 
1053 	// if we have a VMCache attached, remove it
1054 	if (vnode->cache)
1055 		vnode->cache->ReleaseRef();
1056 
1057 	vnode->cache = NULL;
1058 
1059 	remove_vnode_from_mount_list(vnode, vnode->mount);
1060 
1061 	object_cache_free(sVnodeCache, vnode, 0);
1062 }
1063 
1064 
1065 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1066 	if the counter dropped to 0.
1067 
1068 	The caller must, of course, own a reference to the vnode to call this
1069 	function.
1070 	The caller must not hold the sVnodeLock or the sMountLock.
1071 
1072 	\param vnode the vnode.
1073 	\param alwaysFree don't move this vnode into the unused list, but really
1074 		   delete it if possible.
1075 	\param reenter \c true, if this function is called (indirectly) from within
1076 		   a file system. This will be passed to file system hooks only.
1077 	\return \c B_OK, if everything went fine, an error code otherwise.
1078 */
1079 static status_t
1080 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1081 {
1082 	ReadLocker locker(sVnodeLock);
1083 	AutoLocker<Vnode> nodeLocker(vnode);
1084 
1085 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1086 
1087 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1088 
1089 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1090 		vnode->ref_count));
1091 
1092 	if (oldRefCount != 1)
1093 		return B_OK;
1094 
1095 	if (vnode->IsBusy())
1096 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1097 
1098 	bool freeNode = false;
1099 	bool freeUnusedNodes = false;
1100 
1101 	// Just insert the vnode into an unused list if we don't need
1102 	// to delete it
1103 	if (vnode->IsRemoved() || alwaysFree) {
1104 		vnode_to_be_freed(vnode);
1105 		vnode->SetBusy(true);
1106 		freeNode = true;
1107 	} else
1108 		freeUnusedNodes = vnode_unused(vnode);
1109 
1110 	nodeLocker.Unlock();
1111 	locker.Unlock();
1112 
1113 	if (freeNode)
1114 		free_vnode(vnode, reenter);
1115 	else if (freeUnusedNodes)
1116 		free_unused_vnodes();
1117 
1118 	return B_OK;
1119 }
1120 
1121 
1122 /*!	\brief Increments the reference counter of the given vnode.
1123 
1124 	The caller must make sure that the node isn't deleted while this function
1125 	is called. This can be done either:
1126 	- by ensuring that a reference to the node exists and remains in existence,
1127 	  or
1128 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1129 	  or by holding sVnodeLock write locked.
1130 
1131 	In the second case the caller is responsible for dealing with the ref count
1132 	0 -> 1 transition. That is 1. this function must not be invoked when the
1133 	node is busy in the first place and 2. vnode_used() must be called for the
1134 	node.
1135 
1136 	\param vnode the vnode.
1137 */
1138 static void
1139 inc_vnode_ref_count(struct vnode* vnode)
1140 {
1141 	atomic_add(&vnode->ref_count, 1);
1142 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1143 		vnode->ref_count));
1144 }
1145 
1146 
1147 static bool
1148 is_special_node_type(int type)
1149 {
1150 	// at the moment only FIFOs are supported
1151 	return S_ISFIFO(type);
1152 }
1153 
1154 
1155 static status_t
1156 create_special_sub_node(struct vnode* vnode, uint32 flags)
1157 {
1158 	if (S_ISFIFO(vnode->Type()))
1159 		return create_fifo_vnode(vnode->mount->volume, vnode);
1160 
1161 	return B_BAD_VALUE;
1162 }
1163 
1164 
1165 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1166 
1167 	If the node is not yet in memory, it will be loaded.
1168 
1169 	The caller must not hold the sVnodeLock or the sMountLock.
1170 
1171 	\param mountID the mount ID.
1172 	\param vnodeID the node ID.
1173 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1174 		   retrieved vnode structure shall be written.
1175 	\param reenter \c true, if this function is called (indirectly) from within
1176 		   a file system.
1177 	\return \c B_OK, if everything when fine, an error code otherwise.
1178 */
1179 static status_t
1180 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1181 	int reenter)
1182 {
1183 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1184 		mountID, vnodeID, _vnode));
1185 
1186 	rw_lock_read_lock(&sVnodeLock);
1187 
1188 	int32 tries = BUSY_VNODE_RETRIES;
1189 restart:
1190 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1191 	AutoLocker<Vnode> nodeLocker(vnode);
1192 
1193 	if (vnode && vnode->IsBusy()) {
1194 		// vnodes in the Removed state (except ones still Unpublished)
1195 		// which are also Busy will disappear soon, so we do not wait for them.
1196 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1197 
1198 		nodeLocker.Unlock();
1199 		rw_lock_read_unlock(&sVnodeLock);
1200 		if (!canWait) {
1201 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1202 				mountID, vnodeID);
1203 			return B_BUSY;
1204 		}
1205 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1206 			return B_BUSY;
1207 
1208 		rw_lock_read_lock(&sVnodeLock);
1209 		goto restart;
1210 	}
1211 
1212 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1213 
1214 	status_t status;
1215 
1216 	if (vnode) {
1217 		if (vnode->ref_count == 0) {
1218 			// this vnode has been unused before
1219 			vnode_used(vnode);
1220 		}
1221 		inc_vnode_ref_count(vnode);
1222 
1223 		nodeLocker.Unlock();
1224 		rw_lock_read_unlock(&sVnodeLock);
1225 	} else {
1226 		// we need to create a new vnode and read it in
1227 		rw_lock_read_unlock(&sVnodeLock);
1228 			// unlock -- create_new_vnode_and_lock() write-locks on success
1229 		bool nodeCreated;
1230 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1231 			nodeCreated);
1232 		if (status != B_OK)
1233 			return status;
1234 
1235 		if (!nodeCreated) {
1236 			rw_lock_read_lock(&sVnodeLock);
1237 			rw_lock_write_unlock(&sVnodeLock);
1238 			goto restart;
1239 		}
1240 
1241 		rw_lock_write_unlock(&sVnodeLock);
1242 
1243 		int type;
1244 		uint32 flags;
1245 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1246 			&flags, reenter);
1247 		if (status == B_OK && vnode->private_node == NULL)
1248 			status = B_BAD_VALUE;
1249 
1250 		bool gotNode = status == B_OK;
1251 		bool publishSpecialSubNode = false;
1252 		if (gotNode) {
1253 			vnode->SetType(type);
1254 			publishSpecialSubNode = is_special_node_type(type)
1255 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1256 		}
1257 
1258 		if (gotNode && publishSpecialSubNode)
1259 			status = create_special_sub_node(vnode, flags);
1260 
1261 		if (status != B_OK) {
1262 			if (gotNode)
1263 				FS_CALL(vnode, put_vnode, reenter);
1264 
1265 			rw_lock_write_lock(&sVnodeLock);
1266 			sVnodeTable->Remove(vnode);
1267 			remove_vnode_from_mount_list(vnode, vnode->mount);
1268 			rw_lock_write_unlock(&sVnodeLock);
1269 
1270 			object_cache_free(sVnodeCache, vnode, 0);
1271 			return status;
1272 		}
1273 
1274 		rw_lock_read_lock(&sVnodeLock);
1275 		vnode->Lock();
1276 
1277 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1278 		vnode->SetBusy(false);
1279 
1280 		vnode->Unlock();
1281 		rw_lock_read_unlock(&sVnodeLock);
1282 	}
1283 
1284 	TRACE(("get_vnode: returning %p\n", vnode));
1285 
1286 	*_vnode = vnode;
1287 	return B_OK;
1288 }
1289 
1290 
1291 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1292 	if the counter dropped to 0.
1293 
1294 	The caller must, of course, own a reference to the vnode to call this
1295 	function.
1296 	The caller must not hold the sVnodeLock or the sMountLock.
1297 
1298 	\param vnode the vnode.
1299 */
1300 static inline void
1301 put_vnode(struct vnode* vnode)
1302 {
1303 	dec_vnode_ref_count(vnode, false, false);
1304 }
1305 
1306 
1307 static void
1308 free_unused_vnodes(int32 level)
1309 {
1310 	unused_vnodes_check_started();
1311 
1312 	if (level == B_NO_LOW_RESOURCE) {
1313 		unused_vnodes_check_done();
1314 		return;
1315 	}
1316 
1317 	flush_hot_vnodes();
1318 
1319 	// determine how many nodes to free
1320 	uint32 count = 1;
1321 	{
1322 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1323 
1324 		switch (level) {
1325 			case B_LOW_RESOURCE_NOTE:
1326 				count = sUnusedVnodes / 100;
1327 				break;
1328 			case B_LOW_RESOURCE_WARNING:
1329 				count = sUnusedVnodes / 10;
1330 				break;
1331 			case B_LOW_RESOURCE_CRITICAL:
1332 				count = sUnusedVnodes;
1333 				break;
1334 		}
1335 
1336 		if (count > sUnusedVnodes)
1337 			count = sUnusedVnodes;
1338 	}
1339 
1340 	// Write back the modified pages of some unused vnodes and free them.
1341 
1342 	for (uint32 i = 0; i < count; i++) {
1343 		ReadLocker vnodesReadLocker(sVnodeLock);
1344 
1345 		// get the first node
1346 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1347 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1348 			&sUnusedVnodeList);
1349 		unusedVnodesLocker.Unlock();
1350 
1351 		if (vnode == NULL)
1352 			break;
1353 
1354 		// lock the node
1355 		AutoLocker<Vnode> nodeLocker(vnode);
1356 
1357 		// Check whether the node is still unused -- since we only append to the
1358 		// tail of the unused queue, the vnode should still be at its head.
1359 		// Alternatively we could check its ref count for 0 and its busy flag,
1360 		// but if the node is no longer at the head of the queue, it means it
1361 		// has been touched in the meantime, i.e. it is no longer the least
1362 		// recently used unused vnode and we rather don't free it.
1363 		unusedVnodesLocker.Lock();
1364 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1365 			continue;
1366 		unusedVnodesLocker.Unlock();
1367 
1368 		ASSERT(!vnode->IsBusy());
1369 
1370 		// grab a reference
1371 		inc_vnode_ref_count(vnode);
1372 		vnode_used(vnode);
1373 
1374 		// write back changes and free the node
1375 		nodeLocker.Unlock();
1376 		vnodesReadLocker.Unlock();
1377 
1378 		if (vnode->cache != NULL)
1379 			vnode->cache->WriteModified();
1380 
1381 		dec_vnode_ref_count(vnode, true, false);
1382 			// this should free the vnode when it's still unused
1383 	}
1384 
1385 	unused_vnodes_check_done();
1386 }
1387 
1388 
1389 /*!	Gets the vnode the given vnode is covering.
1390 
1391 	The caller must have \c sVnodeLock read-locked at least.
1392 
1393 	The function returns a reference to the retrieved vnode (if any), the caller
1394 	is responsible to free.
1395 
1396 	\param vnode The vnode whose covered node shall be returned.
1397 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1398 		vnode.
1399 */
1400 static inline Vnode*
1401 get_covered_vnode_locked(Vnode* vnode)
1402 {
1403 	if (Vnode* coveredNode = vnode->covers) {
1404 		while (coveredNode->covers != NULL)
1405 			coveredNode = coveredNode->covers;
1406 
1407 		inc_vnode_ref_count(coveredNode);
1408 		return coveredNode;
1409 	}
1410 
1411 	return NULL;
1412 }
1413 
1414 
1415 /*!	Gets the vnode the given vnode is covering.
1416 
1417 	The caller must not hold \c sVnodeLock. Note that this implies a race
1418 	condition, since the situation can change at any time.
1419 
1420 	The function returns a reference to the retrieved vnode (if any), the caller
1421 	is responsible to free.
1422 
1423 	\param vnode The vnode whose covered node shall be returned.
1424 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1425 		vnode.
1426 */
1427 static inline Vnode*
1428 get_covered_vnode(Vnode* vnode)
1429 {
1430 	if (!vnode->IsCovering())
1431 		return NULL;
1432 
1433 	ReadLocker vnodeReadLocker(sVnodeLock);
1434 	return get_covered_vnode_locked(vnode);
1435 }
1436 
1437 
1438 /*!	Gets the vnode the given vnode is covered by.
1439 
1440 	The caller must have \c sVnodeLock read-locked at least.
1441 
1442 	The function returns a reference to the retrieved vnode (if any), the caller
1443 	is responsible to free.
1444 
1445 	\param vnode The vnode whose covering node shall be returned.
1446 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1447 		any vnode.
1448 */
1449 static Vnode*
1450 get_covering_vnode_locked(Vnode* vnode)
1451 {
1452 	if (Vnode* coveringNode = vnode->covered_by) {
1453 		while (coveringNode->covered_by != NULL)
1454 			coveringNode = coveringNode->covered_by;
1455 
1456 		inc_vnode_ref_count(coveringNode);
1457 		return coveringNode;
1458 	}
1459 
1460 	return NULL;
1461 }
1462 
1463 
1464 /*!	Gets the vnode the given vnode is covered by.
1465 
1466 	The caller must not hold \c sVnodeLock. Note that this implies a race
1467 	condition, since the situation can change at any time.
1468 
1469 	The function returns a reference to the retrieved vnode (if any), the caller
1470 	is responsible to free.
1471 
1472 	\param vnode The vnode whose covering node shall be returned.
1473 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1474 		any vnode.
1475 */
1476 static inline Vnode*
1477 get_covering_vnode(Vnode* vnode)
1478 {
1479 	if (!vnode->IsCovered())
1480 		return NULL;
1481 
1482 	ReadLocker vnodeReadLocker(sVnodeLock);
1483 	return get_covering_vnode_locked(vnode);
1484 }
1485 
1486 
1487 static void
1488 free_unused_vnodes()
1489 {
1490 	free_unused_vnodes(
1491 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1492 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1493 }
1494 
1495 
1496 static void
1497 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1498 {
1499 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1500 
1501 	free_unused_vnodes(level);
1502 }
1503 
1504 
1505 static inline void
1506 put_advisory_locking(struct advisory_locking* locking)
1507 {
1508 	release_sem(locking->lock);
1509 }
1510 
1511 
1512 /*!	Returns the advisory_locking object of the \a vnode in case it
1513 	has one, and locks it.
1514 	You have to call put_advisory_locking() when you're done with
1515 	it.
1516 	Note, you must not have the vnode mutex locked when calling
1517 	this function.
1518 */
1519 static struct advisory_locking*
1520 get_advisory_locking(struct vnode* vnode)
1521 {
1522 	rw_lock_read_lock(&sVnodeLock);
1523 	vnode->Lock();
1524 
1525 	struct advisory_locking* locking = vnode->advisory_locking;
1526 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1527 
1528 	vnode->Unlock();
1529 	rw_lock_read_unlock(&sVnodeLock);
1530 
1531 	if (lock >= 0)
1532 		lock = acquire_sem(lock);
1533 	if (lock < 0) {
1534 		// This means the locking has been deleted in the mean time
1535 		// or had never existed in the first place - otherwise, we
1536 		// would get the lock at some point.
1537 		return NULL;
1538 	}
1539 
1540 	return locking;
1541 }
1542 
1543 
1544 /*!	Creates a locked advisory_locking object, and attaches it to the
1545 	given \a vnode.
1546 	Returns B_OK in case of success - also if the vnode got such an
1547 	object from someone else in the mean time, you'll still get this
1548 	one locked then.
1549 */
1550 static status_t
1551 create_advisory_locking(struct vnode* vnode)
1552 {
1553 	if (vnode == NULL)
1554 		return B_FILE_ERROR;
1555 
1556 	ObjectDeleter<advisory_locking> lockingDeleter;
1557 	struct advisory_locking* locking = NULL;
1558 
1559 	while (get_advisory_locking(vnode) == NULL) {
1560 		// no locking object set on the vnode yet, create one
1561 		if (locking == NULL) {
1562 			locking = new(std::nothrow) advisory_locking;
1563 			if (locking == NULL)
1564 				return B_NO_MEMORY;
1565 			lockingDeleter.SetTo(locking);
1566 
1567 			locking->wait_sem = create_sem(0, "advisory lock");
1568 			if (locking->wait_sem < 0)
1569 				return locking->wait_sem;
1570 
1571 			locking->lock = create_sem(0, "advisory locking");
1572 			if (locking->lock < 0)
1573 				return locking->lock;
1574 		}
1575 
1576 		// set our newly created locking object
1577 		ReadLocker _(sVnodeLock);
1578 		AutoLocker<Vnode> nodeLocker(vnode);
1579 		if (vnode->advisory_locking == NULL) {
1580 			vnode->advisory_locking = locking;
1581 			lockingDeleter.Detach();
1582 			return B_OK;
1583 		}
1584 	}
1585 
1586 	// The vnode already had a locking object. That's just as well.
1587 
1588 	return B_OK;
1589 }
1590 
1591 
1592 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1593 	with the advisory_lock \a lock.
1594 */
1595 static bool
1596 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1597 {
1598 	if (flock == NULL)
1599 		return true;
1600 
1601 	return lock->start <= flock->l_start - 1 + flock->l_len
1602 		&& lock->end >= flock->l_start;
1603 }
1604 
1605 
1606 /*!	Tests whether acquiring a lock would block.
1607 */
1608 static status_t
1609 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1610 {
1611 	flock->l_type = F_UNLCK;
1612 
1613 	struct advisory_locking* locking = get_advisory_locking(vnode);
1614 	if (locking == NULL)
1615 		return B_OK;
1616 
1617 	team_id team = team_get_current_team_id();
1618 
1619 	LockList::Iterator iterator = locking->locks.GetIterator();
1620 	while (iterator.HasNext()) {
1621 		struct advisory_lock* lock = iterator.Next();
1622 
1623 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1624 			// locks do overlap
1625 			if (flock->l_type != F_RDLCK || !lock->shared) {
1626 				// collision
1627 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1628 				flock->l_whence = SEEK_SET;
1629 				flock->l_start = lock->start;
1630 				flock->l_len = lock->end - lock->start + 1;
1631 				flock->l_pid = lock->team;
1632 				break;
1633 			}
1634 		}
1635 	}
1636 
1637 	put_advisory_locking(locking);
1638 	return B_OK;
1639 }
1640 
1641 
1642 /*!	Removes the specified lock, or all locks of the calling team
1643 	if \a flock is NULL.
1644 */
1645 static status_t
1646 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1647 	struct file_descriptor* descriptor, struct flock* flock)
1648 {
1649 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1650 
1651 	struct advisory_locking* locking = get_advisory_locking(vnode);
1652 	if (locking == NULL)
1653 		return B_OK;
1654 
1655 	// find matching lock entries
1656 
1657 	LockList::Iterator iterator = locking->locks.GetIterator();
1658 	while (iterator.HasNext()) {
1659 		struct advisory_lock* lock = iterator.Next();
1660 		bool removeLock = false;
1661 
1662 		if (descriptor != NULL && lock->bound_to == descriptor) {
1663 			// Remove flock() locks
1664 			removeLock = true;
1665 		} else if (lock->bound_to == context
1666 				&& advisory_lock_intersects(lock, flock)) {
1667 			// Remove POSIX locks
1668 			bool endsBeyond = false;
1669 			bool startsBefore = false;
1670 			if (flock != NULL) {
1671 				startsBefore = lock->start < flock->l_start;
1672 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1673 			}
1674 
1675 			if (!startsBefore && !endsBeyond) {
1676 				// lock is completely contained in flock
1677 				removeLock = true;
1678 			} else if (startsBefore && !endsBeyond) {
1679 				// cut the end of the lock
1680 				lock->end = flock->l_start - 1;
1681 			} else if (!startsBefore && endsBeyond) {
1682 				// cut the start of the lock
1683 				lock->start = flock->l_start + flock->l_len;
1684 			} else {
1685 				// divide the lock into two locks
1686 				struct advisory_lock* secondLock = new advisory_lock;
1687 				if (secondLock == NULL) {
1688 					// TODO: we should probably revert the locks we already
1689 					// changed... (ie. allocate upfront)
1690 					put_advisory_locking(locking);
1691 					return B_NO_MEMORY;
1692 				}
1693 
1694 				lock->end = flock->l_start - 1;
1695 
1696 				secondLock->bound_to = context;
1697 				secondLock->team = lock->team;
1698 				secondLock->session = lock->session;
1699 				// values must already be normalized when getting here
1700 				secondLock->start = flock->l_start + flock->l_len;
1701 				secondLock->end = lock->end;
1702 				secondLock->shared = lock->shared;
1703 
1704 				locking->locks.Add(secondLock);
1705 			}
1706 		}
1707 
1708 		if (removeLock) {
1709 			// this lock is no longer used
1710 			iterator.Remove();
1711 			free(lock);
1712 		}
1713 	}
1714 
1715 	bool removeLocking = locking->locks.IsEmpty();
1716 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1717 
1718 	put_advisory_locking(locking);
1719 
1720 	if (removeLocking) {
1721 		// We can remove the whole advisory locking structure; it's no
1722 		// longer used
1723 		locking = get_advisory_locking(vnode);
1724 		if (locking != NULL) {
1725 			ReadLocker locker(sVnodeLock);
1726 			AutoLocker<Vnode> nodeLocker(vnode);
1727 
1728 			// the locking could have been changed in the mean time
1729 			if (locking->locks.IsEmpty()) {
1730 				vnode->advisory_locking = NULL;
1731 				nodeLocker.Unlock();
1732 				locker.Unlock();
1733 
1734 				// we've detached the locking from the vnode, so we can
1735 				// safely delete it
1736 				delete locking;
1737 			} else {
1738 				// the locking is in use again
1739 				nodeLocker.Unlock();
1740 				locker.Unlock();
1741 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1742 			}
1743 		}
1744 	}
1745 
1746 	return B_OK;
1747 }
1748 
1749 
1750 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1751 	will wait for the lock to become available, if there are any collisions
1752 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1753 
1754 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1755 	BSD flock() semantics are used, that is, all children can unlock the file
1756 	in question (we even allow parents to remove the lock, though, but that
1757 	seems to be in line to what the BSD's are doing).
1758 */
1759 static status_t
1760 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1761 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1762 {
1763 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1764 		vnode, flock, wait ? "yes" : "no"));
1765 
1766 	bool shared = flock->l_type == F_RDLCK;
1767 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1768 	status_t status = B_OK;
1769 
1770 	// TODO: do deadlock detection!
1771 
1772 	struct advisory_locking* locking;
1773 
1774 	while (true) {
1775 		// if this vnode has an advisory_locking structure attached,
1776 		// lock that one and search for any colliding file lock
1777 		status = create_advisory_locking(vnode);
1778 		if (status != B_OK)
1779 			return status;
1780 
1781 		locking = vnode->advisory_locking;
1782 		team_id team = team_get_current_team_id();
1783 		sem_id waitForLock = -1;
1784 
1785 		// test for collisions
1786 		LockList::Iterator iterator = locking->locks.GetIterator();
1787 		while (iterator.HasNext()) {
1788 			struct advisory_lock* lock = iterator.Next();
1789 
1790 			// TODO: locks from the same team might be joinable!
1791 			if ((lock->team != team || lock->bound_to != boundTo)
1792 					&& advisory_lock_intersects(lock, flock)) {
1793 				// locks do overlap
1794 				if (!shared || !lock->shared) {
1795 					// we need to wait
1796 					waitForLock = locking->wait_sem;
1797 					break;
1798 				}
1799 			}
1800 		}
1801 
1802 		if (waitForLock < 0)
1803 			break;
1804 
1805 		// We need to wait. Do that or fail now, if we've been asked not to.
1806 
1807 		if (!wait) {
1808 			put_advisory_locking(locking);
1809 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1810 		}
1811 
1812 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1813 			B_CAN_INTERRUPT, 0);
1814 		if (status != B_OK && status != B_BAD_SEM_ID)
1815 			return status;
1816 
1817 		// We have been notified, but we need to re-lock the locking object. So
1818 		// go another round...
1819 	}
1820 
1821 	// install new lock
1822 
1823 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1824 		sizeof(struct advisory_lock));
1825 	if (lock == NULL) {
1826 		put_advisory_locking(locking);
1827 		return B_NO_MEMORY;
1828 	}
1829 
1830 	lock->bound_to = boundTo;
1831 	lock->team = team_get_current_team_id();
1832 	lock->session = thread_get_current_thread()->team->session_id;
1833 	// values must already be normalized when getting here
1834 	lock->start = flock->l_start;
1835 	lock->end = flock->l_start - 1 + flock->l_len;
1836 	lock->shared = shared;
1837 
1838 	locking->locks.Add(lock);
1839 	put_advisory_locking(locking);
1840 
1841 	return status;
1842 }
1843 
1844 
1845 /*!	Normalizes the \a flock structure to make it easier to compare the
1846 	structure with others. The l_start and l_len fields are set to absolute
1847 	values according to the l_whence field.
1848 */
1849 static status_t
1850 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1851 {
1852 	switch (flock->l_whence) {
1853 		case SEEK_SET:
1854 			break;
1855 		case SEEK_CUR:
1856 			flock->l_start += descriptor->pos;
1857 			break;
1858 		case SEEK_END:
1859 		{
1860 			struct vnode* vnode = descriptor->u.vnode;
1861 			struct stat stat;
1862 			status_t status;
1863 
1864 			if (!HAS_FS_CALL(vnode, read_stat))
1865 				return B_UNSUPPORTED;
1866 
1867 			status = FS_CALL(vnode, read_stat, &stat);
1868 			if (status != B_OK)
1869 				return status;
1870 
1871 			flock->l_start += stat.st_size;
1872 			break;
1873 		}
1874 		default:
1875 			return B_BAD_VALUE;
1876 	}
1877 
1878 	if (flock->l_start < 0)
1879 		flock->l_start = 0;
1880 	if (flock->l_len == 0)
1881 		flock->l_len = OFF_MAX;
1882 
1883 	// don't let the offset and length overflow
1884 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1885 		flock->l_len = OFF_MAX - flock->l_start;
1886 
1887 	if (flock->l_len < 0) {
1888 		// a negative length reverses the region
1889 		flock->l_start += flock->l_len;
1890 		flock->l_len = -flock->l_len;
1891 	}
1892 
1893 	return B_OK;
1894 }
1895 
1896 
1897 static void
1898 replace_vnode_if_disconnected(struct fs_mount* mount,
1899 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1900 	struct vnode* fallBack, bool lockRootLock)
1901 {
1902 	struct vnode* givenVnode = vnode;
1903 	bool vnodeReplaced = false;
1904 
1905 	ReadLocker vnodeReadLocker(sVnodeLock);
1906 
1907 	if (lockRootLock)
1908 		mutex_lock(&sIOContextRootLock);
1909 
1910 	while (vnode != NULL && vnode->mount == mount
1911 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1912 		if (vnode->covers != NULL) {
1913 			// redirect the vnode to the covered vnode
1914 			vnode = vnode->covers;
1915 		} else
1916 			vnode = fallBack;
1917 
1918 		vnodeReplaced = true;
1919 	}
1920 
1921 	// If we've replaced the node, grab a reference for the new one.
1922 	if (vnodeReplaced && vnode != NULL)
1923 		inc_vnode_ref_count(vnode);
1924 
1925 	if (lockRootLock)
1926 		mutex_unlock(&sIOContextRootLock);
1927 
1928 	vnodeReadLocker.Unlock();
1929 
1930 	if (vnodeReplaced)
1931 		put_vnode(givenVnode);
1932 }
1933 
1934 
1935 /*!	Disconnects all file descriptors that are associated with the
1936 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1937 	\a mount object.
1938 
1939 	Note, after you've called this function, there might still be ongoing
1940 	accesses - they won't be interrupted if they already happened before.
1941 	However, any subsequent access will fail.
1942 
1943 	This is not a cheap function and should be used with care and rarely.
1944 	TODO: there is currently no means to stop a blocking read/write!
1945 */
1946 static void
1947 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1948 	struct vnode* vnodeToDisconnect)
1949 {
1950 	// iterate over all teams and peek into their file descriptors
1951 	TeamListIterator teamIterator;
1952 	while (Team* team = teamIterator.Next()) {
1953 		BReference<Team> teamReference(team, true);
1954 		TeamLocker teamLocker(team);
1955 
1956 		// lock the I/O context
1957 		io_context* context = team->io_context;
1958 		if (context == NULL)
1959 			continue;
1960 		MutexLocker contextLocker(context->io_mutex);
1961 
1962 		teamLocker.Unlock();
1963 
1964 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1965 			sRoot, true);
1966 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1967 			sRoot, false);
1968 
1969 		for (uint32 i = 0; i < context->table_size; i++) {
1970 			struct file_descriptor* descriptor = context->fds[i];
1971 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1972 				continue;
1973 
1974 			inc_fd_ref_count(descriptor);
1975 
1976 			// if this descriptor points at this mount, we
1977 			// need to disconnect it to be able to unmount
1978 			struct vnode* vnode = fd_vnode(descriptor);
1979 			if (vnodeToDisconnect != NULL) {
1980 				if (vnode == vnodeToDisconnect)
1981 					disconnect_fd(descriptor);
1982 			} else if ((vnode != NULL && vnode->mount == mount)
1983 				|| (vnode == NULL && descriptor->u.mount == mount))
1984 				disconnect_fd(descriptor);
1985 
1986 			put_fd(descriptor);
1987 		}
1988 	}
1989 }
1990 
1991 
1992 /*!	\brief Gets the root node of the current IO context.
1993 	If \a kernel is \c true, the kernel IO context will be used.
1994 	The caller obtains a reference to the returned node.
1995 */
1996 struct vnode*
1997 get_root_vnode(bool kernel)
1998 {
1999 	if (!kernel) {
2000 		// Get current working directory from io context
2001 		struct io_context* context = get_current_io_context(kernel);
2002 
2003 		mutex_lock(&sIOContextRootLock);
2004 
2005 		struct vnode* root = context->root;
2006 		if (root != NULL)
2007 			inc_vnode_ref_count(root);
2008 
2009 		mutex_unlock(&sIOContextRootLock);
2010 
2011 		if (root != NULL)
2012 			return root;
2013 
2014 		// That should never happen.
2015 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2016 			"have a root\n", team_get_current_team_id());
2017 	}
2018 
2019 	inc_vnode_ref_count(sRoot);
2020 	return sRoot;
2021 }
2022 
2023 
2024 /*!	\brief Gets the directory path and leaf name for a given path.
2025 
2026 	The supplied \a path is transformed to refer to the directory part of
2027 	the entry identified by the original path, and into the buffer \a filename
2028 	the leaf name of the original entry is written.
2029 	Neither the returned path nor the leaf name can be expected to be
2030 	canonical.
2031 
2032 	\param path The path to be analyzed. Must be able to store at least one
2033 		   additional character.
2034 	\param filename The buffer into which the leaf name will be written.
2035 		   Must be of size B_FILE_NAME_LENGTH at least.
2036 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2037 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2038 		   if the given path name is empty.
2039 */
2040 static status_t
2041 get_dir_path_and_leaf(char* path, char* filename)
2042 {
2043 	if (*path == '\0')
2044 		return B_ENTRY_NOT_FOUND;
2045 
2046 	char* last = strrchr(path, '/');
2047 		// '/' are not allowed in file names!
2048 
2049 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2050 
2051 	if (last == NULL) {
2052 		// this path is single segment with no '/' in it
2053 		// ex. "foo"
2054 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2055 			return B_NAME_TOO_LONG;
2056 
2057 		strcpy(path, ".");
2058 	} else {
2059 		last++;
2060 		if (last[0] == '\0') {
2061 			// special case: the path ends in one or more '/' - remove them
2062 			while (*--last == '/' && last != path);
2063 			last[1] = '\0';
2064 
2065 			if (last == path && last[0] == '/') {
2066 				// This path points to the root of the file system
2067 				strcpy(filename, ".");
2068 				return B_OK;
2069 			}
2070 			for (; last != path && *(last - 1) != '/'; last--);
2071 				// rewind to the start of the leaf before the '/'
2072 		}
2073 
2074 		// normal leaf: replace the leaf portion of the path with a '.'
2075 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2076 			return B_NAME_TOO_LONG;
2077 
2078 		last[0] = '.';
2079 		last[1] = '\0';
2080 	}
2081 	return B_OK;
2082 }
2083 
2084 
2085 static status_t
2086 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2087 	bool traverse, bool kernel, struct vnode** _vnode)
2088 {
2089 	char clonedName[B_FILE_NAME_LENGTH + 1];
2090 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2091 		return B_NAME_TOO_LONG;
2092 
2093 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2094 	struct vnode* directory;
2095 
2096 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2097 	if (status < 0)
2098 		return status;
2099 
2100 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2101 		_vnode, NULL);
2102 }
2103 
2104 
2105 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2106 	and returns the respective vnode.
2107 	On success a reference to the vnode is acquired for the caller.
2108 */
2109 static status_t
2110 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2111 {
2112 	ino_t id;
2113 	bool missing;
2114 
2115 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2116 		return missing ? B_ENTRY_NOT_FOUND
2117 			: get_vnode(dir->device, id, _vnode, true, false);
2118 	}
2119 
2120 	status_t status = FS_CALL(dir, lookup, name, &id);
2121 	if (status != B_OK)
2122 		return status;
2123 
2124 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2125 	// have a reference and just need to look the node up.
2126 	rw_lock_read_lock(&sVnodeLock);
2127 	*_vnode = lookup_vnode(dir->device, id);
2128 	rw_lock_read_unlock(&sVnodeLock);
2129 
2130 	if (*_vnode == NULL) {
2131 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2132 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2133 		return B_ENTRY_NOT_FOUND;
2134 	}
2135 
2136 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2137 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2138 //		(*_vnode)->mount->id, (*_vnode)->id);
2139 
2140 	return B_OK;
2141 }
2142 
2143 
2144 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2145 	\a path must not be NULL.
2146 	If it returns successfully, \a path contains the name of the last path
2147 	component. This function clobbers the buffer pointed to by \a path only
2148 	if it does contain more than one component.
2149 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2150 	it is successful or not!
2151 */
2152 static status_t
2153 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2154 	int count, struct io_context* ioContext, struct vnode** _vnode,
2155 	ino_t* _parentID)
2156 {
2157 	status_t status = B_OK;
2158 	ino_t lastParentID = vnode->id;
2159 
2160 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2161 
2162 	if (path == NULL) {
2163 		put_vnode(vnode);
2164 		return B_BAD_VALUE;
2165 	}
2166 
2167 	if (*path == '\0') {
2168 		put_vnode(vnode);
2169 		return B_ENTRY_NOT_FOUND;
2170 	}
2171 
2172 	while (true) {
2173 		struct vnode* nextVnode;
2174 		char* nextPath;
2175 
2176 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2177 			path));
2178 
2179 		// done?
2180 		if (path[0] == '\0')
2181 			break;
2182 
2183 		// walk to find the next path component ("path" will point to a single
2184 		// path component), and filter out multiple slashes
2185 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2186 				nextPath++);
2187 
2188 		bool directoryFound = false;
2189 		if (*nextPath == '/') {
2190 			directoryFound = true;
2191 			*nextPath = '\0';
2192 			do
2193 				nextPath++;
2194 			while (*nextPath == '/');
2195 		}
2196 
2197 		// See if the '..' is at a covering vnode move to the covered
2198 		// vnode so we pass the '..' path to the underlying filesystem.
2199 		// Also prevent breaking the root of the IO context.
2200 		if (strcmp("..", path) == 0) {
2201 			if (vnode == ioContext->root) {
2202 				// Attempted prison break! Keep it contained.
2203 				path = nextPath;
2204 				continue;
2205 			}
2206 
2207 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2208 				nextVnode = coveredVnode;
2209 				put_vnode(vnode);
2210 				vnode = nextVnode;
2211 			}
2212 		}
2213 
2214 		// check if vnode is really a directory
2215 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2216 			status = B_NOT_A_DIRECTORY;
2217 
2218 		// Check if we have the right to search the current directory vnode.
2219 		// If a file system doesn't have the access() function, we assume that
2220 		// searching a directory is always allowed
2221 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2222 			status = FS_CALL(vnode, access, X_OK);
2223 
2224 		// Tell the filesystem to get the vnode of this path component (if we
2225 		// got the permission from the call above)
2226 		if (status == B_OK)
2227 			status = lookup_dir_entry(vnode, path, &nextVnode);
2228 
2229 		if (status != B_OK) {
2230 			put_vnode(vnode);
2231 			return status;
2232 		}
2233 
2234 		// If the new node is a symbolic link, resolve it (if we've been told
2235 		// to do it)
2236 		if (S_ISLNK(nextVnode->Type())
2237 			&& (traverseLeafLink || directoryFound)) {
2238 			size_t bufferSize;
2239 			char* buffer;
2240 
2241 			TRACE(("traverse link\n"));
2242 
2243 			// it's not exactly nice style using goto in this way, but hey,
2244 			// it works :-/
2245 			if (count + 1 > B_MAX_SYMLINKS) {
2246 				status = B_LINK_LIMIT;
2247 				goto resolve_link_error;
2248 			}
2249 
2250 			bufferSize = B_PATH_NAME_LENGTH;
2251 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2252 			if (buffer == NULL) {
2253 				status = B_NO_MEMORY;
2254 				goto resolve_link_error;
2255 			}
2256 
2257 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2258 				bufferSize--;
2259 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2260 				// null-terminate
2261 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2262 					buffer[bufferSize] = '\0';
2263 			} else
2264 				status = B_BAD_VALUE;
2265 
2266 			if (status != B_OK) {
2267 				free(buffer);
2268 
2269 		resolve_link_error:
2270 				put_vnode(vnode);
2271 				put_vnode(nextVnode);
2272 
2273 				return status;
2274 			}
2275 			put_vnode(nextVnode);
2276 
2277 			// Check if we start from the root directory or the current
2278 			// directory ("vnode" still points to that one).
2279 			// Cut off all leading slashes if it's the root directory
2280 			path = buffer;
2281 			bool absoluteSymlink = false;
2282 			if (path[0] == '/') {
2283 				// we don't need the old directory anymore
2284 				put_vnode(vnode);
2285 
2286 				while (*++path == '/')
2287 					;
2288 
2289 				mutex_lock(&sIOContextRootLock);
2290 				vnode = ioContext->root;
2291 				inc_vnode_ref_count(vnode);
2292 				mutex_unlock(&sIOContextRootLock);
2293 
2294 				absoluteSymlink = true;
2295 			}
2296 
2297 			inc_vnode_ref_count(vnode);
2298 				// balance the next recursion - we will decrement the
2299 				// ref_count of the vnode, no matter if we succeeded or not
2300 
2301 			if (absoluteSymlink && *path == '\0') {
2302 				// symlink was just "/"
2303 				nextVnode = vnode;
2304 			} else {
2305 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2306 					ioContext, &nextVnode, &lastParentID);
2307 			}
2308 
2309 			object_cache_free(sPathNameCache, buffer, 0);
2310 
2311 			if (status != B_OK) {
2312 				put_vnode(vnode);
2313 				return status;
2314 			}
2315 		} else
2316 			lastParentID = vnode->id;
2317 
2318 		// decrease the ref count on the old dir we just looked up into
2319 		put_vnode(vnode);
2320 
2321 		path = nextPath;
2322 		vnode = nextVnode;
2323 
2324 		// see if we hit a covered node
2325 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2326 			put_vnode(vnode);
2327 			vnode = coveringNode;
2328 		}
2329 	}
2330 
2331 	*_vnode = vnode;
2332 	if (_parentID)
2333 		*_parentID = lastParentID;
2334 
2335 	return B_OK;
2336 }
2337 
2338 
2339 static status_t
2340 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2341 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2342 {
2343 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2344 		get_current_io_context(kernel), _vnode, _parentID);
2345 }
2346 
2347 
2348 static status_t
2349 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2350 	ino_t* _parentID, bool kernel)
2351 {
2352 	struct vnode* start = NULL;
2353 
2354 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2355 
2356 	if (!path)
2357 		return B_BAD_VALUE;
2358 
2359 	if (*path == '\0')
2360 		return B_ENTRY_NOT_FOUND;
2361 
2362 	// figure out if we need to start at root or at cwd
2363 	if (*path == '/') {
2364 		if (sRoot == NULL) {
2365 			// we're a bit early, aren't we?
2366 			return B_ERROR;
2367 		}
2368 
2369 		while (*++path == '/')
2370 			;
2371 		start = get_root_vnode(kernel);
2372 
2373 		if (*path == '\0') {
2374 			*_vnode = start;
2375 			return B_OK;
2376 		}
2377 
2378 	} else {
2379 		struct io_context* context = get_current_io_context(kernel);
2380 
2381 		mutex_lock(&context->io_mutex);
2382 		start = context->cwd;
2383 		if (start != NULL)
2384 			inc_vnode_ref_count(start);
2385 		mutex_unlock(&context->io_mutex);
2386 
2387 		if (start == NULL)
2388 			return B_ERROR;
2389 	}
2390 
2391 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2392 		_parentID);
2393 }
2394 
2395 
2396 /*! Returns the vnode in the next to last segment of the path, and returns
2397 	the last portion in filename.
2398 	The path buffer must be able to store at least one additional character.
2399 */
2400 static status_t
2401 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2402 	bool kernel)
2403 {
2404 	status_t status = get_dir_path_and_leaf(path, filename);
2405 	if (status != B_OK)
2406 		return status;
2407 
2408 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2409 }
2410 
2411 
2412 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2413 		   to by a FD + path pair.
2414 
2415 	\a path must be given in either case. \a fd might be omitted, in which
2416 	case \a path is either an absolute path or one relative to the current
2417 	directory. If both a supplied and \a path is relative it is reckoned off
2418 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2419 	ignored.
2420 
2421 	The caller has the responsibility to call put_vnode() on the returned
2422 	directory vnode.
2423 
2424 	\param fd The FD. May be < 0.
2425 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2426 	       is modified by this function. It must have at least room for a
2427 	       string one character longer than the path it contains.
2428 	\param _vnode A pointer to a variable the directory vnode shall be written
2429 		   into.
2430 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2431 		   the leaf name of the specified entry will be written.
2432 	\param kernel \c true, if invoked from inside the kernel, \c false if
2433 		   invoked from userland.
2434 	\return \c B_OK, if everything went fine, another error code otherwise.
2435 */
2436 static status_t
2437 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2438 	char* filename, bool kernel)
2439 {
2440 	if (!path)
2441 		return B_BAD_VALUE;
2442 	if (*path == '\0')
2443 		return B_ENTRY_NOT_FOUND;
2444 	if (fd < 0)
2445 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2446 
2447 	status_t status = get_dir_path_and_leaf(path, filename);
2448 	if (status != B_OK)
2449 		return status;
2450 
2451 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2452 }
2453 
2454 
2455 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2456 		   to by a vnode + path pair.
2457 
2458 	\a path must be given in either case. \a vnode might be omitted, in which
2459 	case \a path is either an absolute path or one relative to the current
2460 	directory. If both a supplied and \a path is relative it is reckoned off
2461 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2462 	ignored.
2463 
2464 	The caller has the responsibility to call put_vnode() on the returned
2465 	directory vnode.
2466 
2467 	\param vnode The vnode. May be \c NULL.
2468 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2469 	       is modified by this function. It must have at least room for a
2470 	       string one character longer than the path it contains.
2471 	\param _vnode A pointer to a variable the directory vnode shall be written
2472 		   into.
2473 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2474 		   the leaf name of the specified entry will be written.
2475 	\param kernel \c true, if invoked from inside the kernel, \c false if
2476 		   invoked from userland.
2477 	\return \c B_OK, if everything went fine, another error code otherwise.
2478 */
2479 static status_t
2480 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2481 	struct vnode** _vnode, char* filename, bool kernel)
2482 {
2483 	if (!path)
2484 		return B_BAD_VALUE;
2485 	if (*path == '\0')
2486 		return B_ENTRY_NOT_FOUND;
2487 	if (vnode == NULL || path[0] == '/')
2488 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2489 
2490 	status_t status = get_dir_path_and_leaf(path, filename);
2491 	if (status != B_OK)
2492 		return status;
2493 
2494 	inc_vnode_ref_count(vnode);
2495 		// vnode_path_to_vnode() always decrements the ref count
2496 
2497 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2498 }
2499 
2500 
2501 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2502 */
2503 static status_t
2504 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2505 	size_t bufferSize, struct io_context* ioContext)
2506 {
2507 	if (bufferSize < sizeof(struct dirent))
2508 		return B_BAD_VALUE;
2509 
2510 	// See if the vnode is covering another vnode and move to the covered
2511 	// vnode so we get the underlying file system
2512 	VNodePutter vnodePutter;
2513 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2514 		vnode = coveredVnode;
2515 		vnodePutter.SetTo(vnode);
2516 	}
2517 
2518 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2519 		// The FS supports getting the name of a vnode.
2520 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2521 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2522 			return B_OK;
2523 	}
2524 
2525 	// The FS doesn't support getting the name of a vnode. So we search the
2526 	// parent directory for the vnode, if the caller let us.
2527 
2528 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2529 		return B_UNSUPPORTED;
2530 
2531 	void* cookie;
2532 
2533 	status_t status = FS_CALL(parent, open_dir, &cookie);
2534 	if (status >= B_OK) {
2535 		while (true) {
2536 			uint32 num = 1;
2537 			// We use the FS hook directly instead of dir_read(), since we don't
2538 			// want the entries to be fixed. We have already resolved vnode to
2539 			// the covered node.
2540 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2541 				&num);
2542 			if (status != B_OK)
2543 				break;
2544 			if (num == 0) {
2545 				status = B_ENTRY_NOT_FOUND;
2546 				break;
2547 			}
2548 
2549 			if (vnode->id == buffer->d_ino) {
2550 				// found correct entry!
2551 				break;
2552 			}
2553 		}
2554 
2555 		FS_CALL(parent, close_dir, cookie);
2556 		FS_CALL(parent, free_dir_cookie, cookie);
2557 	}
2558 	return status;
2559 }
2560 
2561 
2562 static status_t
2563 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2564 	size_t nameSize, bool kernel)
2565 {
2566 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2567 	struct dirent* dirent = (struct dirent*)buffer;
2568 
2569 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2570 		get_current_io_context(kernel));
2571 	if (status != B_OK)
2572 		return status;
2573 
2574 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2575 		return B_BUFFER_OVERFLOW;
2576 
2577 	return B_OK;
2578 }
2579 
2580 
2581 /*!	Gets the full path to a given directory vnode.
2582 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2583 	file system doesn't support this call, it will fall back to iterating
2584 	through the parent directory to get the name of the child.
2585 
2586 	To protect against circular loops, it supports a maximum tree depth
2587 	of 256 levels.
2588 
2589 	Note that the path may not be correct the time this function returns!
2590 	It doesn't use any locking to prevent returning the correct path, as
2591 	paths aren't safe anyway: the path to a file can change at any time.
2592 
2593 	It might be a good idea, though, to check if the returned path exists
2594 	in the calling function (it's not done here because of efficiency)
2595 */
2596 static status_t
2597 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2598 	bool kernel)
2599 {
2600 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2601 
2602 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2603 		return B_BAD_VALUE;
2604 
2605 	if (!S_ISDIR(vnode->Type()))
2606 		return B_NOT_A_DIRECTORY;
2607 
2608 	char* path = buffer;
2609 	int32 insert = bufferSize;
2610 	int32 maxLevel = 256;
2611 	int32 length;
2612 	status_t status = B_OK;
2613 	struct io_context* ioContext = get_current_io_context(kernel);
2614 
2615 	// we don't use get_vnode() here because this call is more
2616 	// efficient and does all we need from get_vnode()
2617 	inc_vnode_ref_count(vnode);
2618 
2619 	path[--insert] = '\0';
2620 		// the path is filled right to left
2621 
2622 	while (true) {
2623 		// If the node is the context's root, bail out. Otherwise resolve mount
2624 		// points.
2625 		if (vnode == ioContext->root)
2626 			break;
2627 
2628 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2629 			put_vnode(vnode);
2630 			vnode = coveredVnode;
2631 		}
2632 
2633 		// lookup the parent vnode
2634 		struct vnode* parentVnode;
2635 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2636 		if (status != B_OK)
2637 			goto out;
2638 
2639 		if (parentVnode == vnode) {
2640 			// The caller apparently got their hands on a node outside of their
2641 			// context's root. Now we've hit the global root.
2642 			put_vnode(parentVnode);
2643 			break;
2644 		}
2645 
2646 		// get the node's name
2647 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2648 			// also used for fs_read_dir()
2649 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2650 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2651 			sizeof(nameBuffer), ioContext);
2652 
2653 		// release the current vnode, we only need its parent from now on
2654 		put_vnode(vnode);
2655 		vnode = parentVnode;
2656 
2657 		if (status != B_OK)
2658 			goto out;
2659 
2660 		// TODO: add an explicit check for loops in about 10 levels to do
2661 		// real loop detection
2662 
2663 		// don't go deeper as 'maxLevel' to prevent circular loops
2664 		if (maxLevel-- < 0) {
2665 			status = B_LINK_LIMIT;
2666 			goto out;
2667 		}
2668 
2669 		// add the name in front of the current path
2670 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2671 		length = strlen(name);
2672 		insert -= length;
2673 		if (insert <= 0) {
2674 			status = B_RESULT_NOT_REPRESENTABLE;
2675 			goto out;
2676 		}
2677 		memcpy(path + insert, name, length);
2678 		path[--insert] = '/';
2679 	}
2680 
2681 	// the root dir will result in an empty path: fix it
2682 	if (path[insert] == '\0')
2683 		path[--insert] = '/';
2684 
2685 	TRACE(("  path is: %s\n", path + insert));
2686 
2687 	// move the path to the start of the buffer
2688 	length = bufferSize - insert;
2689 	memmove(buffer, path + insert, length);
2690 
2691 out:
2692 	put_vnode(vnode);
2693 	return status;
2694 }
2695 
2696 
2697 /*!	Checks the length of every path component, and adds a '.'
2698 	if the path ends in a slash.
2699 	The given path buffer must be able to store at least one
2700 	additional character.
2701 */
2702 static status_t
2703 check_path(char* to)
2704 {
2705 	int32 length = 0;
2706 
2707 	// check length of every path component
2708 
2709 	while (*to) {
2710 		char* begin;
2711 		if (*to == '/')
2712 			to++, length++;
2713 
2714 		begin = to;
2715 		while (*to != '/' && *to)
2716 			to++, length++;
2717 
2718 		if (to - begin > B_FILE_NAME_LENGTH)
2719 			return B_NAME_TOO_LONG;
2720 	}
2721 
2722 	if (length == 0)
2723 		return B_ENTRY_NOT_FOUND;
2724 
2725 	// complete path if there is a slash at the end
2726 
2727 	if (*(to - 1) == '/') {
2728 		if (length > B_PATH_NAME_LENGTH - 2)
2729 			return B_NAME_TOO_LONG;
2730 
2731 		to[0] = '.';
2732 		to[1] = '\0';
2733 	}
2734 
2735 	return B_OK;
2736 }
2737 
2738 
2739 static struct file_descriptor*
2740 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2741 {
2742 	struct file_descriptor* descriptor
2743 		= get_fd(get_current_io_context(kernel), fd);
2744 	if (descriptor == NULL)
2745 		return NULL;
2746 
2747 	struct vnode* vnode = fd_vnode(descriptor);
2748 	if (vnode == NULL) {
2749 		put_fd(descriptor);
2750 		return NULL;
2751 	}
2752 
2753 	// ToDo: when we can close a file descriptor at any point, investigate
2754 	//	if this is still valid to do (accessing the vnode without ref_count
2755 	//	or locking)
2756 	*_vnode = vnode;
2757 	return descriptor;
2758 }
2759 
2760 
2761 static struct vnode*
2762 get_vnode_from_fd(int fd, bool kernel)
2763 {
2764 	struct file_descriptor* descriptor;
2765 	struct vnode* vnode;
2766 
2767 	descriptor = get_fd(get_current_io_context(kernel), fd);
2768 	if (descriptor == NULL)
2769 		return NULL;
2770 
2771 	vnode = fd_vnode(descriptor);
2772 	if (vnode != NULL)
2773 		inc_vnode_ref_count(vnode);
2774 
2775 	put_fd(descriptor);
2776 	return vnode;
2777 }
2778 
2779 
2780 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2781 	only the path will be considered. In this case, the \a path must not be
2782 	NULL.
2783 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2784 	and should be NULL for files.
2785 */
2786 static status_t
2787 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2788 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2789 {
2790 	if (fd < 0 && !path)
2791 		return B_BAD_VALUE;
2792 
2793 	if (path != NULL && *path == '\0')
2794 		return B_ENTRY_NOT_FOUND;
2795 
2796 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2797 		// no FD or absolute path
2798 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2799 	}
2800 
2801 	// FD only, or FD + relative path
2802 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2803 	if (vnode == NULL)
2804 		return B_FILE_ERROR;
2805 
2806 	if (path != NULL) {
2807 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2808 			_vnode, _parentID);
2809 	}
2810 
2811 	// there is no relative path to take into account
2812 
2813 	*_vnode = vnode;
2814 	if (_parentID)
2815 		*_parentID = -1;
2816 
2817 	return B_OK;
2818 }
2819 
2820 
2821 static int
2822 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2823 	void* cookie, int openMode, bool kernel)
2824 {
2825 	struct file_descriptor* descriptor;
2826 	int fd;
2827 
2828 	// If the vnode is locked, we don't allow creating a new file/directory
2829 	// file_descriptor for it
2830 	if (vnode && vnode->mandatory_locked_by != NULL
2831 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2832 		return B_BUSY;
2833 
2834 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2835 		return B_BAD_VALUE;
2836 
2837 	descriptor = alloc_fd();
2838 	if (!descriptor)
2839 		return B_NO_MEMORY;
2840 
2841 	if (vnode)
2842 		descriptor->u.vnode = vnode;
2843 	else
2844 		descriptor->u.mount = mount;
2845 	descriptor->cookie = cookie;
2846 
2847 	switch (type) {
2848 		// vnode types
2849 		case FDTYPE_FILE:
2850 			descriptor->ops = &sFileOps;
2851 			break;
2852 		case FDTYPE_DIR:
2853 			descriptor->ops = &sDirectoryOps;
2854 			break;
2855 		case FDTYPE_ATTR:
2856 			descriptor->ops = &sAttributeOps;
2857 			break;
2858 		case FDTYPE_ATTR_DIR:
2859 			descriptor->ops = &sAttributeDirectoryOps;
2860 			break;
2861 
2862 		// mount types
2863 		case FDTYPE_INDEX_DIR:
2864 			descriptor->ops = &sIndexDirectoryOps;
2865 			break;
2866 		case FDTYPE_QUERY:
2867 			descriptor->ops = &sQueryOps;
2868 			break;
2869 
2870 		default:
2871 			panic("get_new_fd() called with unknown type %d\n", type);
2872 			break;
2873 	}
2874 	descriptor->type = type;
2875 	descriptor->open_mode = openMode;
2876 
2877 	io_context* context = get_current_io_context(kernel);
2878 	fd = new_fd(context, descriptor);
2879 	if (fd < 0) {
2880 		descriptor->ops = NULL;
2881 		put_fd(descriptor);
2882 		return B_NO_MORE_FDS;
2883 	}
2884 
2885 	mutex_lock(&context->io_mutex);
2886 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2887 	mutex_unlock(&context->io_mutex);
2888 
2889 	return fd;
2890 }
2891 
2892 
2893 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2894 	vfs_normalize_path(). See there for more documentation.
2895 */
2896 static status_t
2897 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2898 {
2899 	VNodePutter dirPutter;
2900 	struct vnode* dir = NULL;
2901 	status_t error;
2902 
2903 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2904 		// get dir vnode + leaf name
2905 		struct vnode* nextDir;
2906 		char leaf[B_FILE_NAME_LENGTH];
2907 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2908 		if (error != B_OK)
2909 			return error;
2910 
2911 		dir = nextDir;
2912 		strcpy(path, leaf);
2913 		dirPutter.SetTo(dir);
2914 
2915 		// get file vnode, if we shall resolve links
2916 		bool fileExists = false;
2917 		struct vnode* fileVnode;
2918 		VNodePutter fileVnodePutter;
2919 		if (traverseLink) {
2920 			inc_vnode_ref_count(dir);
2921 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2922 					NULL) == B_OK) {
2923 				fileVnodePutter.SetTo(fileVnode);
2924 				fileExists = true;
2925 			}
2926 		}
2927 
2928 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2929 			// we're done -- construct the path
2930 			bool hasLeaf = true;
2931 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2932 				// special cases "." and ".." -- get the dir, forget the leaf
2933 				inc_vnode_ref_count(dir);
2934 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2935 					&nextDir, NULL);
2936 				if (error != B_OK)
2937 					return error;
2938 				dir = nextDir;
2939 				dirPutter.SetTo(dir);
2940 				hasLeaf = false;
2941 			}
2942 
2943 			// get the directory path
2944 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2945 			if (error != B_OK)
2946 				return error;
2947 
2948 			// append the leaf name
2949 			if (hasLeaf) {
2950 				// insert a directory separator if this is not the file system
2951 				// root
2952 				if ((strcmp(path, "/") != 0
2953 					&& strlcat(path, "/", pathSize) >= pathSize)
2954 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2955 					return B_NAME_TOO_LONG;
2956 				}
2957 			}
2958 
2959 			return B_OK;
2960 		}
2961 
2962 		// read link
2963 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2964 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2965 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2966 			if (error != B_OK)
2967 				return error;
2968 			if (bufferSize < B_PATH_NAME_LENGTH)
2969 				path[bufferSize] = '\0';
2970 		} else
2971 			return B_BAD_VALUE;
2972 	}
2973 
2974 	return B_LINK_LIMIT;
2975 }
2976 
2977 
2978 static status_t
2979 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2980 	struct io_context* ioContext)
2981 {
2982 	// Make sure the IO context root is not bypassed.
2983 	if (parent == ioContext->root) {
2984 		*_device = parent->device;
2985 		*_node = parent->id;
2986 		return B_OK;
2987 	}
2988 
2989 	inc_vnode_ref_count(parent);
2990 		// vnode_path_to_vnode() puts the node
2991 
2992 	// ".." is guaranteed not to be clobbered by this call
2993 	struct vnode* vnode;
2994 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2995 		ioContext, &vnode, NULL);
2996 	if (status == B_OK) {
2997 		*_device = vnode->device;
2998 		*_node = vnode->id;
2999 		put_vnode(vnode);
3000 	}
3001 
3002 	return status;
3003 }
3004 
3005 
3006 #ifdef ADD_DEBUGGER_COMMANDS
3007 
3008 
3009 static void
3010 _dump_advisory_locking(advisory_locking* locking)
3011 {
3012 	if (locking == NULL)
3013 		return;
3014 
3015 	kprintf("   lock:        %" B_PRId32, locking->lock);
3016 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
3017 
3018 	int32 index = 0;
3019 	LockList::Iterator iterator = locking->locks.GetIterator();
3020 	while (iterator.HasNext()) {
3021 		struct advisory_lock* lock = iterator.Next();
3022 
3023 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3024 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3025 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3026 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3027 	}
3028 }
3029 
3030 
3031 static void
3032 _dump_mount(struct fs_mount* mount)
3033 {
3034 	kprintf("MOUNT: %p\n", mount);
3035 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3036 	kprintf(" device_name:   %s\n", mount->device_name);
3037 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3038 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3039 	kprintf(" partition:     %p\n", mount->partition);
3040 	kprintf(" lock:          %p\n", &mount->lock);
3041 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3042 		mount->owns_file_device ? " owns_file_device" : "");
3043 
3044 	fs_volume* volume = mount->volume;
3045 	while (volume != NULL) {
3046 		kprintf(" volume %p:\n", volume);
3047 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3048 		kprintf("  private_volume:   %p\n", volume->private_volume);
3049 		kprintf("  ops:              %p\n", volume->ops);
3050 		kprintf("  file_system:      %p\n", volume->file_system);
3051 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3052 		volume = volume->super_volume;
3053 	}
3054 
3055 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3056 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3057 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3058 	set_debug_variable("_partition", (addr_t)mount->partition);
3059 }
3060 
3061 
3062 static bool
3063 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3064 	const char* name)
3065 {
3066 	bool insertSlash = buffer[bufferSize] != '\0';
3067 	size_t nameLength = strlen(name);
3068 
3069 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3070 		return false;
3071 
3072 	if (insertSlash)
3073 		buffer[--bufferSize] = '/';
3074 
3075 	bufferSize -= nameLength;
3076 	memcpy(buffer + bufferSize, name, nameLength);
3077 
3078 	return true;
3079 }
3080 
3081 
3082 static bool
3083 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3084 	ino_t nodeID)
3085 {
3086 	if (bufferSize == 0)
3087 		return false;
3088 
3089 	bool insertSlash = buffer[bufferSize] != '\0';
3090 	if (insertSlash)
3091 		buffer[--bufferSize] = '/';
3092 
3093 	size_t size = snprintf(buffer, bufferSize,
3094 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3095 	if (size > bufferSize) {
3096 		if (insertSlash)
3097 			bufferSize++;
3098 		return false;
3099 	}
3100 
3101 	if (size < bufferSize)
3102 		memmove(buffer + bufferSize - size, buffer, size);
3103 
3104 	bufferSize -= size;
3105 	return true;
3106 }
3107 
3108 
3109 static char*
3110 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3111 	bool& _truncated)
3112 {
3113 	// null-terminate the path
3114 	buffer[--bufferSize] = '\0';
3115 
3116 	while (true) {
3117 		while (vnode->covers != NULL)
3118 			vnode = vnode->covers;
3119 
3120 		if (vnode == sRoot) {
3121 			_truncated = bufferSize == 0;
3122 			if (!_truncated)
3123 				buffer[--bufferSize] = '/';
3124 			return buffer + bufferSize;
3125 		}
3126 
3127 		// resolve the name
3128 		ino_t dirID;
3129 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3130 			vnode->id, dirID);
3131 		if (name == NULL) {
3132 			// Failed to resolve the name -- prepend "<dev,node>/".
3133 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3134 				vnode->mount->id, vnode->id);
3135 			return buffer + bufferSize;
3136 		}
3137 
3138 		// prepend the name
3139 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3140 			_truncated = true;
3141 			return buffer + bufferSize;
3142 		}
3143 
3144 		// resolve the directory node
3145 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3146 		if (nextVnode == NULL) {
3147 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3148 				vnode->mount->id, dirID);
3149 			return buffer + bufferSize;
3150 		}
3151 
3152 		vnode = nextVnode;
3153 	}
3154 }
3155 
3156 
3157 static void
3158 _dump_vnode(struct vnode* vnode, bool printPath)
3159 {
3160 	kprintf("VNODE: %p\n", vnode);
3161 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3162 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3163 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3164 	kprintf(" private_node:  %p\n", vnode->private_node);
3165 	kprintf(" mount:         %p\n", vnode->mount);
3166 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3167 	kprintf(" covers:        %p\n", vnode->covers);
3168 	kprintf(" cache:         %p\n", vnode->cache);
3169 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3170 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3171 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3172 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3173 
3174 	_dump_advisory_locking(vnode->advisory_locking);
3175 
3176 	if (printPath) {
3177 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3178 		if (buffer != NULL) {
3179 			bool truncated;
3180 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3181 				B_PATH_NAME_LENGTH, truncated);
3182 			if (path != NULL) {
3183 				kprintf(" path:          ");
3184 				if (truncated)
3185 					kputs("<truncated>/");
3186 				kputs(path);
3187 				kputs("\n");
3188 			} else
3189 				kprintf("Failed to resolve vnode path.\n");
3190 
3191 			debug_free(buffer);
3192 		} else
3193 			kprintf("Failed to allocate memory for constructing the path.\n");
3194 	}
3195 
3196 	set_debug_variable("_node", (addr_t)vnode->private_node);
3197 	set_debug_variable("_mount", (addr_t)vnode->mount);
3198 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3199 	set_debug_variable("_covers", (addr_t)vnode->covers);
3200 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3201 }
3202 
3203 
3204 static int
3205 dump_mount(int argc, char** argv)
3206 {
3207 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3208 		kprintf("usage: %s [id|address]\n", argv[0]);
3209 		return 0;
3210 	}
3211 
3212 	ulong val = parse_expression(argv[1]);
3213 	uint32 id = val;
3214 
3215 	struct fs_mount* mount = sMountsTable->Lookup(id);
3216 	if (mount == NULL) {
3217 		if (IS_USER_ADDRESS(id)) {
3218 			kprintf("fs_mount not found\n");
3219 			return 0;
3220 		}
3221 		mount = (fs_mount*)val;
3222 	}
3223 
3224 	_dump_mount(mount);
3225 	return 0;
3226 }
3227 
3228 
3229 static int
3230 dump_mounts(int argc, char** argv)
3231 {
3232 	if (argc != 1) {
3233 		kprintf("usage: %s\n", argv[0]);
3234 		return 0;
3235 	}
3236 
3237 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3238 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3239 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3240 
3241 	struct fs_mount* mount;
3242 
3243 	MountTable::Iterator iterator(sMountsTable);
3244 	while (iterator.HasNext()) {
3245 		mount = iterator.Next();
3246 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3247 			mount->root_vnode->covers, mount->volume->private_volume,
3248 			mount->volume->file_system_name);
3249 
3250 		fs_volume* volume = mount->volume;
3251 		while (volume->super_volume != NULL) {
3252 			volume = volume->super_volume;
3253 			kprintf("                                     %p %s\n",
3254 				volume->private_volume, volume->file_system_name);
3255 		}
3256 	}
3257 
3258 	return 0;
3259 }
3260 
3261 
3262 static int
3263 dump_vnode(int argc, char** argv)
3264 {
3265 	bool printPath = false;
3266 	int argi = 1;
3267 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3268 		printPath = true;
3269 		argi++;
3270 	}
3271 
3272 	if (argi >= argc || argi + 2 < argc) {
3273 		print_debugger_command_usage(argv[0]);
3274 		return 0;
3275 	}
3276 
3277 	struct vnode* vnode = NULL;
3278 
3279 	if (argi + 1 == argc) {
3280 		vnode = (struct vnode*)parse_expression(argv[argi]);
3281 		if (IS_USER_ADDRESS(vnode)) {
3282 			kprintf("invalid vnode address\n");
3283 			return 0;
3284 		}
3285 		_dump_vnode(vnode, printPath);
3286 		return 0;
3287 	}
3288 
3289 	dev_t device = parse_expression(argv[argi]);
3290 	ino_t id = parse_expression(argv[argi + 1]);
3291 
3292 	VnodeTable::Iterator iterator(sVnodeTable);
3293 	while (iterator.HasNext()) {
3294 		vnode = iterator.Next();
3295 		if (vnode->id != id || vnode->device != device)
3296 			continue;
3297 
3298 		_dump_vnode(vnode, printPath);
3299 	}
3300 
3301 	return 0;
3302 }
3303 
3304 
3305 static int
3306 dump_vnodes(int argc, char** argv)
3307 {
3308 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3309 		kprintf("usage: %s [device]\n", argv[0]);
3310 		return 0;
3311 	}
3312 
3313 	// restrict dumped nodes to a certain device if requested
3314 	dev_t device = parse_expression(argv[1]);
3315 
3316 	struct vnode* vnode;
3317 
3318 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3319 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3320 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3321 
3322 	VnodeTable::Iterator iterator(sVnodeTable);
3323 	while (iterator.HasNext()) {
3324 		vnode = iterator.Next();
3325 		if (vnode->device != device)
3326 			continue;
3327 
3328 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3329 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3330 			vnode->private_node, vnode->advisory_locking,
3331 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3332 			vnode->IsUnpublished() ? "u" : "-");
3333 	}
3334 
3335 	return 0;
3336 }
3337 
3338 
3339 static int
3340 dump_vnode_caches(int argc, char** argv)
3341 {
3342 	struct vnode* vnode;
3343 
3344 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3345 		kprintf("usage: %s [device]\n", argv[0]);
3346 		return 0;
3347 	}
3348 
3349 	// restrict dumped nodes to a certain device if requested
3350 	dev_t device = -1;
3351 	if (argc > 1)
3352 		device = parse_expression(argv[1]);
3353 
3354 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3355 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3356 
3357 	VnodeTable::Iterator iterator(sVnodeTable);
3358 	while (iterator.HasNext()) {
3359 		vnode = iterator.Next();
3360 		if (vnode->cache == NULL)
3361 			continue;
3362 		if (device != -1 && vnode->device != device)
3363 			continue;
3364 
3365 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3366 			vnode, vnode->device, vnode->id, vnode->cache,
3367 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3368 			vnode->cache->page_count);
3369 	}
3370 
3371 	return 0;
3372 }
3373 
3374 
3375 int
3376 dump_io_context(int argc, char** argv)
3377 {
3378 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3379 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3380 		return 0;
3381 	}
3382 
3383 	struct io_context* context = NULL;
3384 
3385 	if (argc > 1) {
3386 		ulong num = parse_expression(argv[1]);
3387 		if (IS_KERNEL_ADDRESS(num))
3388 			context = (struct io_context*)num;
3389 		else {
3390 			Team* team = team_get_team_struct_locked(num);
3391 			if (team == NULL) {
3392 				kprintf("could not find team with ID %lu\n", num);
3393 				return 0;
3394 			}
3395 			context = (struct io_context*)team->io_context;
3396 		}
3397 	} else
3398 		context = get_current_io_context(true);
3399 
3400 	kprintf("I/O CONTEXT: %p\n", context);
3401 	kprintf(" root vnode:\t%p\n", context->root);
3402 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3403 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3404 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3405 
3406 	if (context->num_used_fds) {
3407 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3408 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3409 	}
3410 
3411 	for (uint32 i = 0; i < context->table_size; i++) {
3412 		struct file_descriptor* fd = context->fds[i];
3413 		if (fd == NULL)
3414 			continue;
3415 
3416 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3417 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3418 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3419 			fd->pos, fd->cookie,
3420 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3421 				? "mount" : "vnode",
3422 			fd->u.vnode);
3423 	}
3424 
3425 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3426 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3427 
3428 	set_debug_variable("_cwd", (addr_t)context->cwd);
3429 
3430 	return 0;
3431 }
3432 
3433 
3434 int
3435 dump_vnode_usage(int argc, char** argv)
3436 {
3437 	if (argc != 1) {
3438 		kprintf("usage: %s\n", argv[0]);
3439 		return 0;
3440 	}
3441 
3442 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3443 		sUnusedVnodes, kMaxUnusedVnodes);
3444 
3445 	uint32 count = sVnodeTable->CountElements();
3446 
3447 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3448 		count - sUnusedVnodes);
3449 	return 0;
3450 }
3451 
3452 #endif	// ADD_DEBUGGER_COMMANDS
3453 
3454 
3455 /*!	Clears memory specified by an iovec array.
3456 */
3457 static void
3458 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3459 {
3460 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3461 		size_t length = std::min(vecs[i].iov_len, bytes);
3462 		memset(vecs[i].iov_base, 0, length);
3463 		bytes -= length;
3464 	}
3465 }
3466 
3467 
3468 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3469 	and calls the file system hooks to read/write the request to disk.
3470 */
3471 static status_t
3472 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3473 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3474 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3475 	bool doWrite)
3476 {
3477 	if (fileVecCount == 0) {
3478 		// There are no file vecs at this offset, so we're obviously trying
3479 		// to access the file outside of its bounds
3480 		return B_BAD_VALUE;
3481 	}
3482 
3483 	size_t numBytes = *_numBytes;
3484 	uint32 fileVecIndex;
3485 	size_t vecOffset = *_vecOffset;
3486 	uint32 vecIndex = *_vecIndex;
3487 	status_t status;
3488 	size_t size;
3489 
3490 	if (!doWrite && vecOffset == 0) {
3491 		// now directly read the data from the device
3492 		// the first file_io_vec can be read directly
3493 
3494 		if (fileVecs[0].length < (off_t)numBytes)
3495 			size = fileVecs[0].length;
3496 		else
3497 			size = numBytes;
3498 
3499 		if (fileVecs[0].offset >= 0) {
3500 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3501 				&vecs[vecIndex], vecCount - vecIndex, &size);
3502 		} else {
3503 			// sparse read
3504 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3505 			status = B_OK;
3506 		}
3507 		if (status != B_OK)
3508 			return status;
3509 
3510 		// TODO: this is a work-around for buggy device drivers!
3511 		//	When our own drivers honour the length, we can:
3512 		//	a) also use this direct I/O for writes (otherwise, it would
3513 		//	   overwrite precious data)
3514 		//	b) panic if the term below is true (at least for writes)
3515 		if ((off_t)size > fileVecs[0].length) {
3516 			//dprintf("warning: device driver %p doesn't respect total length "
3517 			//	"in read_pages() call!\n", ref->device);
3518 			size = fileVecs[0].length;
3519 		}
3520 
3521 		ASSERT((off_t)size <= fileVecs[0].length);
3522 
3523 		// If the file portion was contiguous, we're already done now
3524 		if (size == numBytes)
3525 			return B_OK;
3526 
3527 		// if we reached the end of the file, we can return as well
3528 		if ((off_t)size != fileVecs[0].length) {
3529 			*_numBytes = size;
3530 			return B_OK;
3531 		}
3532 
3533 		fileVecIndex = 1;
3534 
3535 		// first, find out where we have to continue in our iovecs
3536 		for (; vecIndex < vecCount; vecIndex++) {
3537 			if (size < vecs[vecIndex].iov_len)
3538 				break;
3539 
3540 			size -= vecs[vecIndex].iov_len;
3541 		}
3542 
3543 		vecOffset = size;
3544 	} else {
3545 		fileVecIndex = 0;
3546 		size = 0;
3547 	}
3548 
3549 	// Too bad, let's process the rest of the file_io_vecs
3550 
3551 	size_t totalSize = size;
3552 	size_t bytesLeft = numBytes - size;
3553 
3554 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3555 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3556 		off_t fileOffset = fileVec.offset;
3557 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3558 
3559 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3560 			fileLeft));
3561 
3562 		// process the complete fileVec
3563 		while (fileLeft > 0) {
3564 			iovec tempVecs[MAX_TEMP_IO_VECS];
3565 			uint32 tempCount = 0;
3566 
3567 			// size tracks how much of what is left of the current fileVec
3568 			// (fileLeft) has been assigned to tempVecs
3569 			size = 0;
3570 
3571 			// assign what is left of the current fileVec to the tempVecs
3572 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3573 					&& tempCount < MAX_TEMP_IO_VECS;) {
3574 				// try to satisfy one iovec per iteration (or as much as
3575 				// possible)
3576 
3577 				// bytes left of the current iovec
3578 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3579 				if (vecLeft == 0) {
3580 					vecOffset = 0;
3581 					vecIndex++;
3582 					continue;
3583 				}
3584 
3585 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3586 					vecIndex, vecOffset, size));
3587 
3588 				// actually available bytes
3589 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3590 
3591 				tempVecs[tempCount].iov_base
3592 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3593 				tempVecs[tempCount].iov_len = tempVecSize;
3594 				tempCount++;
3595 
3596 				size += tempVecSize;
3597 				vecOffset += tempVecSize;
3598 			}
3599 
3600 			size_t bytes = size;
3601 
3602 			if (fileOffset == -1) {
3603 				if (doWrite) {
3604 					panic("sparse write attempt: vnode %p", vnode);
3605 					status = B_IO_ERROR;
3606 				} else {
3607 					// sparse read
3608 					zero_iovecs(tempVecs, tempCount, bytes);
3609 					status = B_OK;
3610 				}
3611 			} else if (doWrite) {
3612 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3613 					tempVecs, tempCount, &bytes);
3614 			} else {
3615 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3616 					tempVecs, tempCount, &bytes);
3617 			}
3618 			if (status != B_OK)
3619 				return status;
3620 
3621 			totalSize += bytes;
3622 			bytesLeft -= size;
3623 			if (fileOffset >= 0)
3624 				fileOffset += size;
3625 			fileLeft -= size;
3626 			//dprintf("-> file left = %Lu\n", fileLeft);
3627 
3628 			if (size != bytes || vecIndex >= vecCount) {
3629 				// there are no more bytes or iovecs, let's bail out
3630 				*_numBytes = totalSize;
3631 				return B_OK;
3632 			}
3633 		}
3634 	}
3635 
3636 	*_vecIndex = vecIndex;
3637 	*_vecOffset = vecOffset;
3638 	*_numBytes = totalSize;
3639 	return B_OK;
3640 }
3641 
3642 
3643 static bool
3644 is_user_in_group(gid_t gid)
3645 {
3646 	if (gid == getegid())
3647 		return true;
3648 
3649 	gid_t groups[NGROUPS_MAX];
3650 	int groupCount = getgroups(NGROUPS_MAX, groups);
3651 	for (int i = 0; i < groupCount; i++) {
3652 		if (gid == groups[i])
3653 			return true;
3654 	}
3655 
3656 	return false;
3657 }
3658 
3659 
3660 static status_t
3661 free_io_context(io_context* context)
3662 {
3663 	uint32 i;
3664 
3665 	TIOC(FreeIOContext(context));
3666 
3667 	if (context->root)
3668 		put_vnode(context->root);
3669 
3670 	if (context->cwd)
3671 		put_vnode(context->cwd);
3672 
3673 	mutex_lock(&context->io_mutex);
3674 
3675 	for (i = 0; i < context->table_size; i++) {
3676 		if (struct file_descriptor* descriptor = context->fds[i]) {
3677 			close_fd(context, descriptor);
3678 			put_fd(descriptor);
3679 		}
3680 	}
3681 
3682 	mutex_destroy(&context->io_mutex);
3683 
3684 	remove_node_monitors(context);
3685 	free(context->fds);
3686 	free(context);
3687 
3688 	return B_OK;
3689 }
3690 
3691 
3692 static status_t
3693 resize_monitor_table(struct io_context* context, const int newSize)
3694 {
3695 	int	status = B_OK;
3696 
3697 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3698 		return B_BAD_VALUE;
3699 
3700 	mutex_lock(&context->io_mutex);
3701 
3702 	if ((size_t)newSize < context->num_monitors) {
3703 		status = B_BUSY;
3704 		goto out;
3705 	}
3706 	context->max_monitors = newSize;
3707 
3708 out:
3709 	mutex_unlock(&context->io_mutex);
3710 	return status;
3711 }
3712 
3713 
3714 //	#pragma mark - public API for file systems
3715 
3716 
3717 extern "C" status_t
3718 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3719 	fs_vnode_ops* ops)
3720 {
3721 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3722 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3723 
3724 	if (privateNode == NULL)
3725 		return B_BAD_VALUE;
3726 
3727 	int32 tries = BUSY_VNODE_RETRIES;
3728 restart:
3729 	// create the node
3730 	bool nodeCreated;
3731 	struct vnode* vnode;
3732 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3733 		nodeCreated);
3734 	if (status != B_OK)
3735 		return status;
3736 
3737 	WriteLocker nodeLocker(sVnodeLock, true);
3738 		// create_new_vnode_and_lock() has locked for us
3739 
3740 	if (!nodeCreated && vnode->IsBusy()) {
3741 		nodeLocker.Unlock();
3742 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3743 			return B_BUSY;
3744 		goto restart;
3745 	}
3746 
3747 	// file system integrity check:
3748 	// test if the vnode already exists and bail out if this is the case!
3749 	if (!nodeCreated) {
3750 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3751 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3752 			vnode->private_node);
3753 		return B_ERROR;
3754 	}
3755 
3756 	vnode->private_node = privateNode;
3757 	vnode->ops = ops;
3758 	vnode->SetUnpublished(true);
3759 
3760 	TRACE(("returns: %s\n", strerror(status)));
3761 
3762 	return status;
3763 }
3764 
3765 
3766 extern "C" status_t
3767 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3768 	fs_vnode_ops* ops, int type, uint32 flags)
3769 {
3770 	FUNCTION(("publish_vnode()\n"));
3771 
3772 	int32 tries = BUSY_VNODE_RETRIES;
3773 restart:
3774 	WriteLocker locker(sVnodeLock);
3775 
3776 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3777 
3778 	bool nodeCreated = false;
3779 	if (vnode == NULL) {
3780 		if (privateNode == NULL)
3781 			return B_BAD_VALUE;
3782 
3783 		// create the node
3784 		locker.Unlock();
3785 			// create_new_vnode_and_lock() will re-lock for us on success
3786 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3787 			nodeCreated);
3788 		if (status != B_OK)
3789 			return status;
3790 
3791 		locker.SetTo(sVnodeLock, true);
3792 	}
3793 
3794 	if (nodeCreated) {
3795 		vnode->private_node = privateNode;
3796 		vnode->ops = ops;
3797 		vnode->SetUnpublished(true);
3798 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3799 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3800 		// already known, but not published
3801 	} else if (vnode->IsBusy()) {
3802 		locker.Unlock();
3803 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3804 			return B_BUSY;
3805 		goto restart;
3806 	} else
3807 		return B_BAD_VALUE;
3808 
3809 	bool publishSpecialSubNode = false;
3810 
3811 	vnode->SetType(type);
3812 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3813 	publishSpecialSubNode = is_special_node_type(type)
3814 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3815 
3816 	status_t status = B_OK;
3817 
3818 	// create sub vnodes, if necessary
3819 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3820 		locker.Unlock();
3821 
3822 		fs_volume* subVolume = volume;
3823 		if (volume->sub_volume != NULL) {
3824 			while (status == B_OK && subVolume->sub_volume != NULL) {
3825 				subVolume = subVolume->sub_volume;
3826 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3827 					vnode);
3828 			}
3829 		}
3830 
3831 		if (status == B_OK && publishSpecialSubNode)
3832 			status = create_special_sub_node(vnode, flags);
3833 
3834 		if (status != B_OK) {
3835 			// error -- clean up the created sub vnodes
3836 			while (subVolume->super_volume != volume) {
3837 				subVolume = subVolume->super_volume;
3838 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3839 			}
3840 		}
3841 
3842 		if (status == B_OK) {
3843 			ReadLocker vnodesReadLocker(sVnodeLock);
3844 			AutoLocker<Vnode> nodeLocker(vnode);
3845 			vnode->SetBusy(false);
3846 			vnode->SetUnpublished(false);
3847 		} else {
3848 			locker.Lock();
3849 			sVnodeTable->Remove(vnode);
3850 			remove_vnode_from_mount_list(vnode, vnode->mount);
3851 			object_cache_free(sVnodeCache, vnode, 0);
3852 		}
3853 	} else {
3854 		// we still hold the write lock -- mark the node unbusy and published
3855 		vnode->SetBusy(false);
3856 		vnode->SetUnpublished(false);
3857 	}
3858 
3859 	TRACE(("returns: %s\n", strerror(status)));
3860 
3861 	return status;
3862 }
3863 
3864 
3865 extern "C" status_t
3866 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3867 {
3868 	struct vnode* vnode;
3869 
3870 	if (volume == NULL)
3871 		return B_BAD_VALUE;
3872 
3873 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3874 	if (status != B_OK)
3875 		return status;
3876 
3877 	// If this is a layered FS, we need to get the node cookie for the requested
3878 	// layer.
3879 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3880 		fs_vnode resolvedNode;
3881 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3882 			&resolvedNode);
3883 		if (status != B_OK) {
3884 			panic("get_vnode(): Failed to get super node for vnode %p, "
3885 				"volume: %p", vnode, volume);
3886 			put_vnode(vnode);
3887 			return status;
3888 		}
3889 
3890 		if (_privateNode != NULL)
3891 			*_privateNode = resolvedNode.private_node;
3892 	} else if (_privateNode != NULL)
3893 		*_privateNode = vnode->private_node;
3894 
3895 	return B_OK;
3896 }
3897 
3898 
3899 extern "C" status_t
3900 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3901 {
3902 	ReadLocker nodeLocker(sVnodeLock);
3903 
3904 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3905 	if (vnode == NULL)
3906 		return B_BAD_VALUE;
3907 
3908 	inc_vnode_ref_count(vnode);
3909 	return B_OK;
3910 }
3911 
3912 
3913 extern "C" status_t
3914 put_vnode(fs_volume* volume, ino_t vnodeID)
3915 {
3916 	struct vnode* vnode;
3917 
3918 	rw_lock_read_lock(&sVnodeLock);
3919 	vnode = lookup_vnode(volume->id, vnodeID);
3920 	rw_lock_read_unlock(&sVnodeLock);
3921 
3922 	if (vnode == NULL)
3923 		return B_BAD_VALUE;
3924 
3925 	dec_vnode_ref_count(vnode, false, true);
3926 	return B_OK;
3927 }
3928 
3929 
3930 extern "C" status_t
3931 remove_vnode(fs_volume* volume, ino_t vnodeID)
3932 {
3933 	ReadLocker locker(sVnodeLock);
3934 
3935 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3936 	if (vnode == NULL)
3937 		return B_ENTRY_NOT_FOUND;
3938 
3939 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3940 		// this vnode is in use
3941 		return B_BUSY;
3942 	}
3943 
3944 	vnode->Lock();
3945 
3946 	vnode->SetRemoved(true);
3947 	bool removeUnpublished = false;
3948 
3949 	if (vnode->IsUnpublished()) {
3950 		// prepare the vnode for deletion
3951 		removeUnpublished = true;
3952 		vnode->SetBusy(true);
3953 	}
3954 
3955 	vnode->Unlock();
3956 	locker.Unlock();
3957 
3958 	if (removeUnpublished) {
3959 		// If the vnode hasn't been published yet, we delete it here
3960 		atomic_add(&vnode->ref_count, -1);
3961 		free_vnode(vnode, true);
3962 	}
3963 
3964 	return B_OK;
3965 }
3966 
3967 
3968 extern "C" status_t
3969 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3970 {
3971 	struct vnode* vnode;
3972 
3973 	rw_lock_read_lock(&sVnodeLock);
3974 
3975 	vnode = lookup_vnode(volume->id, vnodeID);
3976 	if (vnode) {
3977 		AutoLocker<Vnode> nodeLocker(vnode);
3978 		vnode->SetRemoved(false);
3979 	}
3980 
3981 	rw_lock_read_unlock(&sVnodeLock);
3982 	return B_OK;
3983 }
3984 
3985 
3986 extern "C" status_t
3987 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3988 {
3989 	ReadLocker _(sVnodeLock);
3990 
3991 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3992 		if (_removed != NULL)
3993 			*_removed = vnode->IsRemoved();
3994 		return B_OK;
3995 	}
3996 
3997 	return B_BAD_VALUE;
3998 }
3999 
4000 
4001 extern "C" fs_volume*
4002 volume_for_vnode(fs_vnode* _vnode)
4003 {
4004 	if (_vnode == NULL)
4005 		return NULL;
4006 
4007 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
4008 	return vnode->mount->volume;
4009 }
4010 
4011 
4012 extern "C" status_t
4013 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
4014 	uid_t nodeUserID)
4015 {
4016 	// get node permissions
4017 	int userPermissions = (mode & S_IRWXU) >> 6;
4018 	int groupPermissions = (mode & S_IRWXG) >> 3;
4019 	int otherPermissions = mode & S_IRWXO;
4020 
4021 	// get the node permissions for this uid/gid
4022 	int permissions = 0;
4023 	uid_t uid = geteuid();
4024 
4025 	if (uid == 0) {
4026 		// user is root
4027 		// root has always read/write permission, but at least one of the
4028 		// X bits must be set for execute permission
4029 		permissions = userPermissions | groupPermissions | otherPermissions
4030 			| S_IROTH | S_IWOTH;
4031 		if (S_ISDIR(mode))
4032 			permissions |= S_IXOTH;
4033 	} else if (uid == nodeUserID) {
4034 		// user is node owner
4035 		permissions = userPermissions;
4036 	} else if (is_user_in_group(nodeGroupID)) {
4037 		// user is in owning group
4038 		permissions = groupPermissions;
4039 	} else {
4040 		// user is one of the others
4041 		permissions = otherPermissions;
4042 	}
4043 
4044 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4045 }
4046 
4047 
4048 #if 0
4049 extern "C" status_t
4050 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4051 	size_t* _numBytes)
4052 {
4053 	struct file_descriptor* descriptor;
4054 	struct vnode* vnode;
4055 
4056 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4057 	if (descriptor == NULL)
4058 		return B_FILE_ERROR;
4059 
4060 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4061 		count, 0, _numBytes);
4062 
4063 	put_fd(descriptor);
4064 	return status;
4065 }
4066 
4067 
4068 extern "C" status_t
4069 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4070 	size_t* _numBytes)
4071 {
4072 	struct file_descriptor* descriptor;
4073 	struct vnode* vnode;
4074 
4075 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4076 	if (descriptor == NULL)
4077 		return B_FILE_ERROR;
4078 
4079 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4080 		count, 0, _numBytes);
4081 
4082 	put_fd(descriptor);
4083 	return status;
4084 }
4085 #endif
4086 
4087 
4088 extern "C" status_t
4089 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4090 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4091 	size_t* _bytes)
4092 {
4093 	struct file_descriptor* descriptor;
4094 	struct vnode* vnode;
4095 
4096 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4097 	if (descriptor == NULL)
4098 		return B_FILE_ERROR;
4099 
4100 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4101 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4102 		false);
4103 
4104 	put_fd(descriptor);
4105 	return status;
4106 }
4107 
4108 
4109 extern "C" status_t
4110 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4111 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4112 	size_t* _bytes)
4113 {
4114 	struct file_descriptor* descriptor;
4115 	struct vnode* vnode;
4116 
4117 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4118 	if (descriptor == NULL)
4119 		return B_FILE_ERROR;
4120 
4121 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4122 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4123 		true);
4124 
4125 	put_fd(descriptor);
4126 	return status;
4127 }
4128 
4129 
4130 extern "C" status_t
4131 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4132 {
4133 	// lookup mount -- the caller is required to make sure that the mount
4134 	// won't go away
4135 	ReadLocker locker(sMountLock);
4136 	struct fs_mount* mount = find_mount(mountID);
4137 	if (mount == NULL)
4138 		return B_BAD_VALUE;
4139 	locker.Unlock();
4140 
4141 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4142 }
4143 
4144 
4145 extern "C" status_t
4146 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4147 {
4148 	// lookup mount -- the caller is required to make sure that the mount
4149 	// won't go away
4150 	ReadLocker locker(sMountLock);
4151 	struct fs_mount* mount = find_mount(mountID);
4152 	if (mount == NULL)
4153 		return B_BAD_VALUE;
4154 	locker.Unlock();
4155 
4156 	return mount->entry_cache.Add(dirID, name, -1, true);
4157 }
4158 
4159 
4160 extern "C" status_t
4161 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4162 {
4163 	// lookup mount -- the caller is required to make sure that the mount
4164 	// won't go away
4165 	ReadLocker locker(sMountLock);
4166 	struct fs_mount* mount = find_mount(mountID);
4167 	if (mount == NULL)
4168 		return B_BAD_VALUE;
4169 	locker.Unlock();
4170 
4171 	return mount->entry_cache.Remove(dirID, name);
4172 }
4173 
4174 
4175 //	#pragma mark - private VFS API
4176 //	Functions the VFS exports for other parts of the kernel
4177 
4178 
4179 /*! Acquires another reference to the vnode that has to be released
4180 	by calling vfs_put_vnode().
4181 */
4182 void
4183 vfs_acquire_vnode(struct vnode* vnode)
4184 {
4185 	inc_vnode_ref_count(vnode);
4186 }
4187 
4188 
4189 /*! This is currently called from file_cache_create() only.
4190 	It's probably a temporary solution as long as devfs requires that
4191 	fs_read_pages()/fs_write_pages() are called with the standard
4192 	open cookie and not with a device cookie.
4193 	If that's done differently, remove this call; it has no other
4194 	purpose.
4195 */
4196 extern "C" status_t
4197 vfs_get_cookie_from_fd(int fd, void** _cookie)
4198 {
4199 	struct file_descriptor* descriptor;
4200 
4201 	descriptor = get_fd(get_current_io_context(true), fd);
4202 	if (descriptor == NULL)
4203 		return B_FILE_ERROR;
4204 
4205 	*_cookie = descriptor->cookie;
4206 	return B_OK;
4207 }
4208 
4209 
4210 extern "C" status_t
4211 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4212 {
4213 	*vnode = get_vnode_from_fd(fd, kernel);
4214 
4215 	if (*vnode == NULL)
4216 		return B_FILE_ERROR;
4217 
4218 	return B_NO_ERROR;
4219 }
4220 
4221 
4222 extern "C" status_t
4223 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4224 {
4225 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4226 		path, kernel));
4227 
4228 	KPath pathBuffer;
4229 	if (pathBuffer.InitCheck() != B_OK)
4230 		return B_NO_MEMORY;
4231 
4232 	char* buffer = pathBuffer.LockBuffer();
4233 	strlcpy(buffer, path, pathBuffer.BufferSize());
4234 
4235 	struct vnode* vnode;
4236 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4237 	if (status != B_OK)
4238 		return status;
4239 
4240 	*_vnode = vnode;
4241 	return B_OK;
4242 }
4243 
4244 
4245 extern "C" status_t
4246 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4247 {
4248 	struct vnode* vnode = NULL;
4249 
4250 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4251 	if (status != B_OK)
4252 		return status;
4253 
4254 	*_vnode = vnode;
4255 	return B_OK;
4256 }
4257 
4258 
4259 extern "C" status_t
4260 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4261 	const char* name, struct vnode** _vnode)
4262 {
4263 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4264 }
4265 
4266 
4267 extern "C" void
4268 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4269 {
4270 	*_mountID = vnode->device;
4271 	*_vnodeID = vnode->id;
4272 }
4273 
4274 
4275 /*!
4276 	Helper function abstracting the process of "converting" a given
4277 	vnode-pointer to a fs_vnode-pointer.
4278 	Currently only used in bindfs.
4279 */
4280 extern "C" fs_vnode*
4281 vfs_fsnode_for_vnode(struct vnode* vnode)
4282 {
4283 	return vnode;
4284 }
4285 
4286 
4287 /*!
4288 	Calls fs_open() on the given vnode and returns a new
4289 	file descriptor for it
4290 */
4291 int
4292 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4293 {
4294 	return open_vnode(vnode, openMode, kernel);
4295 }
4296 
4297 
4298 /*!	Looks up a vnode with the given mount and vnode ID.
4299 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4300 	to the node.
4301 	It's currently only be used by file_cache_create().
4302 */
4303 extern "C" status_t
4304 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4305 {
4306 	rw_lock_read_lock(&sVnodeLock);
4307 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4308 	rw_lock_read_unlock(&sVnodeLock);
4309 
4310 	if (vnode == NULL)
4311 		return B_ERROR;
4312 
4313 	*_vnode = vnode;
4314 	return B_OK;
4315 }
4316 
4317 
4318 extern "C" status_t
4319 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4320 	bool traverseLeafLink, bool kernel, void** _node)
4321 {
4322 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4323 		volume, path, kernel));
4324 
4325 	KPath pathBuffer;
4326 	if (pathBuffer.InitCheck() != B_OK)
4327 		return B_NO_MEMORY;
4328 
4329 	fs_mount* mount;
4330 	status_t status = get_mount(volume->id, &mount);
4331 	if (status != B_OK)
4332 		return status;
4333 
4334 	char* buffer = pathBuffer.LockBuffer();
4335 	strlcpy(buffer, path, pathBuffer.BufferSize());
4336 
4337 	struct vnode* vnode = mount->root_vnode;
4338 
4339 	if (buffer[0] == '/')
4340 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4341 	else {
4342 		inc_vnode_ref_count(vnode);
4343 			// vnode_path_to_vnode() releases a reference to the starting vnode
4344 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4345 			kernel, &vnode, NULL);
4346 	}
4347 
4348 	put_mount(mount);
4349 
4350 	if (status != B_OK)
4351 		return status;
4352 
4353 	if (vnode->device != volume->id) {
4354 		// wrong mount ID - must not gain access on foreign file system nodes
4355 		put_vnode(vnode);
4356 		return B_BAD_VALUE;
4357 	}
4358 
4359 	// Use get_vnode() to resolve the cookie for the right layer.
4360 	status = get_vnode(volume, vnode->id, _node);
4361 	put_vnode(vnode);
4362 
4363 	return status;
4364 }
4365 
4366 
4367 status_t
4368 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4369 	struct stat* stat, bool kernel)
4370 {
4371 	status_t status;
4372 
4373 	if (path != NULL) {
4374 		// path given: get the stat of the node referred to by (fd, path)
4375 		KPath pathBuffer(path);
4376 		if (pathBuffer.InitCheck() != B_OK)
4377 			return B_NO_MEMORY;
4378 
4379 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4380 			traverseLeafLink, stat, kernel);
4381 	} else {
4382 		// no path given: get the FD and use the FD operation
4383 		struct file_descriptor* descriptor
4384 			= get_fd(get_current_io_context(kernel), fd);
4385 		if (descriptor == NULL)
4386 			return B_FILE_ERROR;
4387 
4388 		if (descriptor->ops->fd_read_stat)
4389 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4390 		else
4391 			status = B_UNSUPPORTED;
4392 
4393 		put_fd(descriptor);
4394 	}
4395 
4396 	return status;
4397 }
4398 
4399 
4400 /*!	Finds the full path to the file that contains the module \a moduleName,
4401 	puts it into \a pathBuffer, and returns B_OK for success.
4402 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4403 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4404 	\a pathBuffer is clobbered in any case and must not be relied on if this
4405 	functions returns unsuccessfully.
4406 	\a basePath and \a pathBuffer must not point to the same space.
4407 */
4408 status_t
4409 vfs_get_module_path(const char* basePath, const char* moduleName,
4410 	char* pathBuffer, size_t bufferSize)
4411 {
4412 	struct vnode* dir;
4413 	struct vnode* file;
4414 	status_t status;
4415 	size_t length;
4416 	char* path;
4417 
4418 	if (bufferSize == 0
4419 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4420 		return B_BUFFER_OVERFLOW;
4421 
4422 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4423 	if (status != B_OK)
4424 		return status;
4425 
4426 	// the path buffer had been clobbered by the above call
4427 	length = strlcpy(pathBuffer, basePath, bufferSize);
4428 	if (pathBuffer[length - 1] != '/')
4429 		pathBuffer[length++] = '/';
4430 
4431 	path = pathBuffer + length;
4432 	bufferSize -= length;
4433 
4434 	while (moduleName) {
4435 		char* nextPath = strchr(moduleName, '/');
4436 		if (nextPath == NULL)
4437 			length = strlen(moduleName);
4438 		else {
4439 			length = nextPath - moduleName;
4440 			nextPath++;
4441 		}
4442 
4443 		if (length + 1 >= bufferSize) {
4444 			status = B_BUFFER_OVERFLOW;
4445 			goto err;
4446 		}
4447 
4448 		memcpy(path, moduleName, length);
4449 		path[length] = '\0';
4450 		moduleName = nextPath;
4451 
4452 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4453 		if (status != B_OK) {
4454 			// vnode_path_to_vnode() has already released the reference to dir
4455 			return status;
4456 		}
4457 
4458 		if (S_ISDIR(file->Type())) {
4459 			// goto the next directory
4460 			path[length] = '/';
4461 			path[length + 1] = '\0';
4462 			path += length + 1;
4463 			bufferSize -= length + 1;
4464 
4465 			dir = file;
4466 		} else if (S_ISREG(file->Type())) {
4467 			// it's a file so it should be what we've searched for
4468 			put_vnode(file);
4469 
4470 			return B_OK;
4471 		} else {
4472 			TRACE(("vfs_get_module_path(): something is strange here: "
4473 				"0x%08" B_PRIx32 "...\n", file->Type()));
4474 			status = B_ERROR;
4475 			dir = file;
4476 			goto err;
4477 		}
4478 	}
4479 
4480 	// if we got here, the moduleName just pointed to a directory, not to
4481 	// a real module - what should we do in this case?
4482 	status = B_ENTRY_NOT_FOUND;
4483 
4484 err:
4485 	put_vnode(dir);
4486 	return status;
4487 }
4488 
4489 
4490 /*!	\brief Normalizes a given path.
4491 
4492 	The path must refer to an existing or non-existing entry in an existing
4493 	directory, that is chopping off the leaf component the remaining path must
4494 	refer to an existing directory.
4495 
4496 	The returned will be canonical in that it will be absolute, will not
4497 	contain any "." or ".." components or duplicate occurrences of '/'s,
4498 	and none of the directory components will by symbolic links.
4499 
4500 	Any two paths referring to the same entry, will result in the same
4501 	normalized path (well, that is pretty much the definition of `normalized',
4502 	isn't it :-).
4503 
4504 	\param path The path to be normalized.
4505 	\param buffer The buffer into which the normalized path will be written.
4506 		   May be the same one as \a path.
4507 	\param bufferSize The size of \a buffer.
4508 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4509 	\param kernel \c true, if the IO context of the kernel shall be used,
4510 		   otherwise that of the team this thread belongs to. Only relevant,
4511 		   if the path is relative (to get the CWD).
4512 	\return \c B_OK if everything went fine, another error code otherwise.
4513 */
4514 status_t
4515 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4516 	bool traverseLink, bool kernel)
4517 {
4518 	if (!path || !buffer || bufferSize < 1)
4519 		return B_BAD_VALUE;
4520 
4521 	if (path != buffer) {
4522 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4523 			return B_BUFFER_OVERFLOW;
4524 	}
4525 
4526 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4527 }
4528 
4529 
4530 /*!	\brief Gets the parent of the passed in node.
4531 
4532 	Gets the parent of the passed in node, and correctly resolves covered
4533 	nodes.
4534 */
4535 extern "C" status_t
4536 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4537 {
4538 	return resolve_covered_parent(parent, device, node,
4539 		get_current_io_context(true));
4540 }
4541 
4542 
4543 /*!	\brief Creates a special node in the file system.
4544 
4545 	The caller gets a reference to the newly created node (which is passed
4546 	back through \a _createdVnode) and is responsible for releasing it.
4547 
4548 	\param path The path where to create the entry for the node. Can be \c NULL,
4549 		in which case the node is created without an entry in the root FS -- it
4550 		will automatically be deleted when the last reference has been released.
4551 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4552 		the target file system will just create the node with its standard
4553 		operations. Depending on the type of the node a subnode might be created
4554 		automatically, though.
4555 	\param mode The type and permissions for the node to be created.
4556 	\param flags Flags to be passed to the creating FS.
4557 	\param kernel \c true, if called in the kernel context (relevant only if
4558 		\a path is not \c NULL and not absolute).
4559 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4560 		file system creating the node, with the private data pointer and
4561 		operations for the super node. Can be \c NULL.
4562 	\param _createVnode Pointer to pre-allocated storage where to store the
4563 		pointer to the newly created node.
4564 	\return \c B_OK, if everything went fine, another error code otherwise.
4565 */
4566 status_t
4567 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4568 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4569 	struct vnode** _createdVnode)
4570 {
4571 	struct vnode* dirNode;
4572 	char _leaf[B_FILE_NAME_LENGTH];
4573 	char* leaf = NULL;
4574 
4575 	if (path) {
4576 		// We've got a path. Get the dir vnode and the leaf name.
4577 		KPath tmpPathBuffer;
4578 		if (tmpPathBuffer.InitCheck() != B_OK)
4579 			return B_NO_MEMORY;
4580 
4581 		char* tmpPath = tmpPathBuffer.LockBuffer();
4582 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4583 			return B_NAME_TOO_LONG;
4584 
4585 		// get the dir vnode and the leaf name
4586 		leaf = _leaf;
4587 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4588 		if (error != B_OK)
4589 			return error;
4590 	} else {
4591 		// No path. Create the node in the root FS.
4592 		dirNode = sRoot;
4593 		inc_vnode_ref_count(dirNode);
4594 	}
4595 
4596 	VNodePutter _(dirNode);
4597 
4598 	// check support for creating special nodes
4599 	if (!HAS_FS_CALL(dirNode, create_special_node))
4600 		return B_UNSUPPORTED;
4601 
4602 	// create the node
4603 	fs_vnode superVnode;
4604 	ino_t nodeID;
4605 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4606 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4607 	if (status != B_OK)
4608 		return status;
4609 
4610 	// lookup the node
4611 	rw_lock_read_lock(&sVnodeLock);
4612 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4613 	rw_lock_read_unlock(&sVnodeLock);
4614 
4615 	if (*_createdVnode == NULL) {
4616 		panic("vfs_create_special_node(): lookup of node failed");
4617 		return B_ERROR;
4618 	}
4619 
4620 	return B_OK;
4621 }
4622 
4623 
4624 extern "C" void
4625 vfs_put_vnode(struct vnode* vnode)
4626 {
4627 	put_vnode(vnode);
4628 }
4629 
4630 
4631 extern "C" status_t
4632 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4633 {
4634 	// Get current working directory from io context
4635 	struct io_context* context = get_current_io_context(false);
4636 	status_t status = B_OK;
4637 
4638 	mutex_lock(&context->io_mutex);
4639 
4640 	if (context->cwd != NULL) {
4641 		*_mountID = context->cwd->device;
4642 		*_vnodeID = context->cwd->id;
4643 	} else
4644 		status = B_ERROR;
4645 
4646 	mutex_unlock(&context->io_mutex);
4647 	return status;
4648 }
4649 
4650 
4651 status_t
4652 vfs_unmount(dev_t mountID, uint32 flags)
4653 {
4654 	return fs_unmount(NULL, mountID, flags, true);
4655 }
4656 
4657 
4658 extern "C" status_t
4659 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4660 {
4661 	struct vnode* vnode;
4662 
4663 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4664 	if (status != B_OK)
4665 		return status;
4666 
4667 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4668 	put_vnode(vnode);
4669 	return B_OK;
4670 }
4671 
4672 
4673 extern "C" void
4674 vfs_free_unused_vnodes(int32 level)
4675 {
4676 	vnode_low_resource_handler(NULL,
4677 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4678 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4679 		level);
4680 }
4681 
4682 
4683 extern "C" bool
4684 vfs_can_page(struct vnode* vnode, void* cookie)
4685 {
4686 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4687 
4688 	if (HAS_FS_CALL(vnode, can_page))
4689 		return FS_CALL(vnode, can_page, cookie);
4690 	return false;
4691 }
4692 
4693 
4694 extern "C" status_t
4695 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4696 	const generic_io_vec* vecs, size_t count, uint32 flags,
4697 	generic_size_t* _numBytes)
4698 {
4699 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4700 		vecs, pos));
4701 
4702 #if VFS_PAGES_IO_TRACING
4703 	generic_size_t bytesRequested = *_numBytes;
4704 #endif
4705 
4706 	IORequest request;
4707 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4708 	if (status == B_OK) {
4709 		status = vfs_vnode_io(vnode, cookie, &request);
4710 		if (status == B_OK)
4711 			status = request.Wait();
4712 		*_numBytes = request.TransferredBytes();
4713 	}
4714 
4715 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4716 		status, *_numBytes));
4717 
4718 	return status;
4719 }
4720 
4721 
4722 extern "C" status_t
4723 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4724 	const generic_io_vec* vecs, size_t count, uint32 flags,
4725 	generic_size_t* _numBytes)
4726 {
4727 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4728 		vecs, pos));
4729 
4730 #if VFS_PAGES_IO_TRACING
4731 	generic_size_t bytesRequested = *_numBytes;
4732 #endif
4733 
4734 	IORequest request;
4735 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4736 	if (status == B_OK) {
4737 		status = vfs_vnode_io(vnode, cookie, &request);
4738 		if (status == B_OK)
4739 			status = request.Wait();
4740 		*_numBytes = request.TransferredBytes();
4741 	}
4742 
4743 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4744 		status, *_numBytes));
4745 
4746 	return status;
4747 }
4748 
4749 
4750 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4751 	created if \a allocate is \c true.
4752 	In case it's successful, it will also grab a reference to the cache
4753 	it returns.
4754 */
4755 extern "C" status_t
4756 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4757 {
4758 	if (vnode->cache != NULL) {
4759 		vnode->cache->AcquireRef();
4760 		*_cache = vnode->cache;
4761 		return B_OK;
4762 	}
4763 
4764 	rw_lock_read_lock(&sVnodeLock);
4765 	vnode->Lock();
4766 
4767 	status_t status = B_OK;
4768 
4769 	// The cache could have been created in the meantime
4770 	if (vnode->cache == NULL) {
4771 		if (allocate) {
4772 			// TODO: actually the vnode needs to be busy already here, or
4773 			//	else this won't work...
4774 			bool wasBusy = vnode->IsBusy();
4775 			vnode->SetBusy(true);
4776 
4777 			vnode->Unlock();
4778 			rw_lock_read_unlock(&sVnodeLock);
4779 
4780 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4781 
4782 			rw_lock_read_lock(&sVnodeLock);
4783 			vnode->Lock();
4784 			vnode->SetBusy(wasBusy);
4785 		} else
4786 			status = B_BAD_VALUE;
4787 	}
4788 
4789 	vnode->Unlock();
4790 	rw_lock_read_unlock(&sVnodeLock);
4791 
4792 	if (status == B_OK) {
4793 		vnode->cache->AcquireRef();
4794 		*_cache = vnode->cache;
4795 	}
4796 
4797 	return status;
4798 }
4799 
4800 
4801 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4802 	their own.
4803 	In case it's successful, it will also grab a reference to the cache
4804 	it returns.
4805 */
4806 extern "C" status_t
4807 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4808 {
4809 	rw_lock_read_lock(&sVnodeLock);
4810 	vnode->Lock();
4811 
4812 	status_t status = B_OK;
4813 	if (vnode->cache != NULL) {
4814 		status = B_NOT_ALLOWED;
4815 	} else {
4816 		vnode->cache = _cache;
4817 		_cache->AcquireRef();
4818 	}
4819 
4820 	vnode->Unlock();
4821 	rw_lock_read_unlock(&sVnodeLock);
4822 	return status;
4823 }
4824 
4825 
4826 status_t
4827 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4828 	file_io_vec* vecs, size_t* _count)
4829 {
4830 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4831 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4832 
4833 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4834 }
4835 
4836 
4837 status_t
4838 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4839 {
4840 	status_t status = FS_CALL(vnode, read_stat, stat);
4841 
4842 	// fill in the st_dev and st_ino fields
4843 	if (status == B_OK) {
4844 		stat->st_dev = vnode->device;
4845 		stat->st_ino = vnode->id;
4846 		// the rdev field must stay unset for non-special files
4847 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4848 			stat->st_rdev = -1;
4849 	}
4850 
4851 	return status;
4852 }
4853 
4854 
4855 status_t
4856 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4857 {
4858 	struct vnode* vnode;
4859 	status_t status = get_vnode(device, inode, &vnode, true, false);
4860 	if (status != B_OK)
4861 		return status;
4862 
4863 	status = vfs_stat_vnode(vnode, stat);
4864 
4865 	put_vnode(vnode);
4866 	return status;
4867 }
4868 
4869 
4870 status_t
4871 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4872 {
4873 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4874 }
4875 
4876 
4877 status_t
4878 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4879 	bool kernel, char* path, size_t pathLength)
4880 {
4881 	struct vnode* vnode;
4882 	status_t status;
4883 
4884 	// filter invalid leaf names
4885 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4886 		return B_BAD_VALUE;
4887 
4888 	// get the vnode matching the dir's node_ref
4889 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4890 		// special cases "." and "..": we can directly get the vnode of the
4891 		// referenced directory
4892 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4893 		leaf = NULL;
4894 	} else
4895 		status = get_vnode(device, inode, &vnode, true, false);
4896 	if (status != B_OK)
4897 		return status;
4898 
4899 	// get the directory path
4900 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4901 	put_vnode(vnode);
4902 		// we don't need the vnode anymore
4903 	if (status != B_OK)
4904 		return status;
4905 
4906 	// append the leaf name
4907 	if (leaf) {
4908 		// insert a directory separator if this is not the file system root
4909 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4910 				>= pathLength)
4911 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4912 			return B_NAME_TOO_LONG;
4913 		}
4914 	}
4915 
4916 	return B_OK;
4917 }
4918 
4919 
4920 /*!	If the given descriptor locked its vnode, that lock will be released. */
4921 void
4922 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4923 {
4924 	struct vnode* vnode = fd_vnode(descriptor);
4925 
4926 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4927 		vnode->mandatory_locked_by = NULL;
4928 }
4929 
4930 
4931 /*!	Releases any POSIX locks on the file descriptor. */
4932 status_t
4933 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4934 {
4935 	struct vnode* vnode = descriptor->u.vnode;
4936 	if (vnode == NULL)
4937 		return B_OK;
4938 
4939 	if (HAS_FS_CALL(vnode, release_lock))
4940 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4941 
4942 	return release_advisory_lock(vnode, context, NULL, NULL);
4943 }
4944 
4945 
4946 /*!	Closes all file descriptors of the specified I/O context that
4947 	have the O_CLOEXEC flag set.
4948 */
4949 void
4950 vfs_exec_io_context(io_context* context)
4951 {
4952 	uint32 i;
4953 
4954 	for (i = 0; i < context->table_size; i++) {
4955 		mutex_lock(&context->io_mutex);
4956 
4957 		struct file_descriptor* descriptor = context->fds[i];
4958 		bool remove = false;
4959 
4960 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4961 			context->fds[i] = NULL;
4962 			context->num_used_fds--;
4963 
4964 			remove = true;
4965 		}
4966 
4967 		mutex_unlock(&context->io_mutex);
4968 
4969 		if (remove) {
4970 			close_fd(context, descriptor);
4971 			put_fd(descriptor);
4972 		}
4973 	}
4974 }
4975 
4976 
4977 /*! Sets up a new io_control structure, and inherits the properties
4978 	of the parent io_control if it is given.
4979 */
4980 io_context*
4981 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4982 {
4983 	io_context* context = (io_context*)malloc(sizeof(io_context));
4984 	if (context == NULL)
4985 		return NULL;
4986 
4987 	TIOC(NewIOContext(context, parentContext));
4988 
4989 	memset(context, 0, sizeof(io_context));
4990 	context->ref_count = 1;
4991 
4992 	MutexLocker parentLocker;
4993 
4994 	size_t tableSize;
4995 	if (parentContext != NULL) {
4996 		parentLocker.SetTo(parentContext->io_mutex, false);
4997 		tableSize = parentContext->table_size;
4998 	} else
4999 		tableSize = DEFAULT_FD_TABLE_SIZE;
5000 
5001 	// allocate space for FDs and their close-on-exec flag
5002 	context->fds = (file_descriptor**)malloc(
5003 		sizeof(struct file_descriptor*) * tableSize
5004 		+ sizeof(struct select_info**) * tableSize
5005 		+ (tableSize + 7) / 8);
5006 	if (context->fds == NULL) {
5007 		free(context);
5008 		return NULL;
5009 	}
5010 
5011 	context->select_infos = (select_info**)(context->fds + tableSize);
5012 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
5013 
5014 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
5015 		+ sizeof(struct select_info**) * tableSize
5016 		+ (tableSize + 7) / 8);
5017 
5018 	mutex_init(&context->io_mutex, "I/O context");
5019 
5020 	// Copy all parent file descriptors
5021 
5022 	if (parentContext != NULL) {
5023 		size_t i;
5024 
5025 		mutex_lock(&sIOContextRootLock);
5026 		context->root = parentContext->root;
5027 		if (context->root)
5028 			inc_vnode_ref_count(context->root);
5029 		mutex_unlock(&sIOContextRootLock);
5030 
5031 		context->cwd = parentContext->cwd;
5032 		if (context->cwd)
5033 			inc_vnode_ref_count(context->cwd);
5034 
5035 		if (parentContext->inherit_fds) {
5036 			for (i = 0; i < tableSize; i++) {
5037 				struct file_descriptor* descriptor = parentContext->fds[i];
5038 
5039 				if (descriptor != NULL
5040 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
5041 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5042 					if (closeOnExec && purgeCloseOnExec)
5043 						continue;
5044 
5045 					TFD(InheritFD(context, i, descriptor, parentContext));
5046 
5047 					context->fds[i] = descriptor;
5048 					context->num_used_fds++;
5049 					atomic_add(&descriptor->ref_count, 1);
5050 					atomic_add(&descriptor->open_count, 1);
5051 
5052 					if (closeOnExec)
5053 						fd_set_close_on_exec(context, i, true);
5054 				}
5055 			}
5056 		}
5057 
5058 		parentLocker.Unlock();
5059 	} else {
5060 		context->root = sRoot;
5061 		context->cwd = sRoot;
5062 
5063 		if (context->root)
5064 			inc_vnode_ref_count(context->root);
5065 
5066 		if (context->cwd)
5067 			inc_vnode_ref_count(context->cwd);
5068 	}
5069 
5070 	context->table_size = tableSize;
5071 	context->inherit_fds = parentContext != NULL;
5072 
5073 	list_init(&context->node_monitors);
5074 	context->max_monitors = DEFAULT_NODE_MONITORS;
5075 
5076 	return context;
5077 }
5078 
5079 
5080 void
5081 vfs_get_io_context(io_context* context)
5082 {
5083 	atomic_add(&context->ref_count, 1);
5084 }
5085 
5086 
5087 void
5088 vfs_put_io_context(io_context* context)
5089 {
5090 	if (atomic_add(&context->ref_count, -1) == 1)
5091 		free_io_context(context);
5092 }
5093 
5094 
5095 status_t
5096 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5097 {
5098 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5099 		return B_BAD_VALUE;
5100 
5101 	TIOC(ResizeIOContext(context, newSize));
5102 
5103 	MutexLocker _(context->io_mutex);
5104 
5105 	uint32 oldSize = context->table_size;
5106 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5107 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5108 
5109 	// If the tables shrink, make sure none of the fds being dropped are in use.
5110 	if (newSize < oldSize) {
5111 		for (uint32 i = oldSize; i-- > newSize;) {
5112 			if (context->fds[i])
5113 				return B_BUSY;
5114 		}
5115 	}
5116 
5117 	// store pointers to the old tables
5118 	file_descriptor** oldFDs = context->fds;
5119 	select_info** oldSelectInfos = context->select_infos;
5120 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5121 
5122 	// allocate new tables
5123 	file_descriptor** newFDs = (file_descriptor**)malloc(
5124 		sizeof(struct file_descriptor*) * newSize
5125 		+ sizeof(struct select_infos**) * newSize
5126 		+ newCloseOnExitBitmapSize);
5127 	if (newFDs == NULL)
5128 		return B_NO_MEMORY;
5129 
5130 	context->fds = newFDs;
5131 	context->select_infos = (select_info**)(context->fds + newSize);
5132 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5133 	context->table_size = newSize;
5134 
5135 	// copy entries from old tables
5136 	uint32 toCopy = min_c(oldSize, newSize);
5137 
5138 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5139 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5140 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5141 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5142 
5143 	// clear additional entries, if the tables grow
5144 	if (newSize > oldSize) {
5145 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5146 		memset(context->select_infos + oldSize, 0,
5147 			sizeof(void*) * (newSize - oldSize));
5148 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5149 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5150 	}
5151 
5152 	free(oldFDs);
5153 
5154 	return B_OK;
5155 }
5156 
5157 
5158 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5159 
5160 	Given an arbitrary vnode (identified by mount and node ID), the function
5161 	checks, whether the vnode is covered by another vnode. If it is, the
5162 	function returns the mount and node ID of the covering vnode. Otherwise
5163 	it simply returns the supplied mount and node ID.
5164 
5165 	In case of error (e.g. the supplied node could not be found) the variables
5166 	for storing the resolved mount and node ID remain untouched and an error
5167 	code is returned.
5168 
5169 	\param mountID The mount ID of the vnode in question.
5170 	\param nodeID The node ID of the vnode in question.
5171 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5172 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5173 	\return
5174 	- \c B_OK, if everything went fine,
5175 	- another error code, if something went wrong.
5176 */
5177 status_t
5178 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5179 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5180 {
5181 	// get the node
5182 	struct vnode* node;
5183 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5184 	if (error != B_OK)
5185 		return error;
5186 
5187 	// resolve the node
5188 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5189 		put_vnode(node);
5190 		node = coveringNode;
5191 	}
5192 
5193 	// set the return values
5194 	*resolvedMountID = node->device;
5195 	*resolvedNodeID = node->id;
5196 
5197 	put_vnode(node);
5198 
5199 	return B_OK;
5200 }
5201 
5202 
5203 status_t
5204 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5205 	ino_t* _mountPointNodeID)
5206 {
5207 	ReadLocker nodeLocker(sVnodeLock);
5208 	ReadLocker mountLocker(sMountLock);
5209 
5210 	struct fs_mount* mount = find_mount(mountID);
5211 	if (mount == NULL)
5212 		return B_BAD_VALUE;
5213 
5214 	Vnode* mountPoint = mount->covers_vnode;
5215 
5216 	*_mountPointMountID = mountPoint->device;
5217 	*_mountPointNodeID = mountPoint->id;
5218 
5219 	return B_OK;
5220 }
5221 
5222 
5223 status_t
5224 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5225 	ino_t coveredNodeID)
5226 {
5227 	// get the vnodes
5228 	Vnode* vnode;
5229 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5230 	if (error != B_OK)
5231 		return B_BAD_VALUE;
5232 	VNodePutter vnodePutter(vnode);
5233 
5234 	Vnode* coveredVnode;
5235 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5236 		false);
5237 	if (error != B_OK)
5238 		return B_BAD_VALUE;
5239 	VNodePutter coveredVnodePutter(coveredVnode);
5240 
5241 	// establish the covered/covering links
5242 	WriteLocker locker(sVnodeLock);
5243 
5244 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5245 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5246 		return B_BUSY;
5247 	}
5248 
5249 	vnode->covers = coveredVnode;
5250 	vnode->SetCovering(true);
5251 
5252 	coveredVnode->covered_by = vnode;
5253 	coveredVnode->SetCovered(true);
5254 
5255 	// the vnodes do now reference each other
5256 	inc_vnode_ref_count(vnode);
5257 	inc_vnode_ref_count(coveredVnode);
5258 
5259 	return B_OK;
5260 }
5261 
5262 
5263 int
5264 vfs_getrlimit(int resource, struct rlimit* rlp)
5265 {
5266 	if (!rlp)
5267 		return B_BAD_ADDRESS;
5268 
5269 	switch (resource) {
5270 		case RLIMIT_NOFILE:
5271 		{
5272 			struct io_context* context = get_current_io_context(false);
5273 			MutexLocker _(context->io_mutex);
5274 
5275 			rlp->rlim_cur = context->table_size;
5276 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5277 			return 0;
5278 		}
5279 
5280 		case RLIMIT_NOVMON:
5281 		{
5282 			struct io_context* context = get_current_io_context(false);
5283 			MutexLocker _(context->io_mutex);
5284 
5285 			rlp->rlim_cur = context->max_monitors;
5286 			rlp->rlim_max = MAX_NODE_MONITORS;
5287 			return 0;
5288 		}
5289 
5290 		default:
5291 			return B_BAD_VALUE;
5292 	}
5293 }
5294 
5295 
5296 int
5297 vfs_setrlimit(int resource, const struct rlimit* rlp)
5298 {
5299 	if (!rlp)
5300 		return B_BAD_ADDRESS;
5301 
5302 	switch (resource) {
5303 		case RLIMIT_NOFILE:
5304 			/* TODO: check getuid() */
5305 			if (rlp->rlim_max != RLIM_SAVED_MAX
5306 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5307 				return B_NOT_ALLOWED;
5308 
5309 			return vfs_resize_fd_table(get_current_io_context(false),
5310 				rlp->rlim_cur);
5311 
5312 		case RLIMIT_NOVMON:
5313 			/* TODO: check getuid() */
5314 			if (rlp->rlim_max != RLIM_SAVED_MAX
5315 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5316 				return B_NOT_ALLOWED;
5317 
5318 			return resize_monitor_table(get_current_io_context(false),
5319 				rlp->rlim_cur);
5320 
5321 		default:
5322 			return B_BAD_VALUE;
5323 	}
5324 }
5325 
5326 
5327 status_t
5328 vfs_init(kernel_args* args)
5329 {
5330 	vnode::StaticInit();
5331 
5332 	sVnodeTable = new(std::nothrow) VnodeTable();
5333 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5334 		panic("vfs_init: error creating vnode hash table\n");
5335 
5336 	struct vnode dummy_vnode;
5337 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5338 
5339 	struct fs_mount dummyMount;
5340 	sMountsTable = new(std::nothrow) MountTable();
5341 	if (sMountsTable == NULL
5342 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5343 		panic("vfs_init: error creating mounts hash table\n");
5344 
5345 	sPathNameCache = create_object_cache("vfs path names",
5346 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5347 	if (sPathNameCache == NULL)
5348 		panic("vfs_init: error creating path name object_cache\n");
5349 
5350 	sVnodeCache = create_object_cache("vfs vnodes",
5351 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5352 	if (sVnodeCache == NULL)
5353 		panic("vfs_init: error creating vnode object_cache\n");
5354 
5355 	sFileDescriptorCache = create_object_cache("vfs fds",
5356 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5357 	if (sFileDescriptorCache == NULL)
5358 		panic("vfs_init: error creating file descriptor object_cache\n");
5359 
5360 	node_monitor_init();
5361 
5362 	sRoot = NULL;
5363 
5364 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5365 
5366 	if (block_cache_init() != B_OK)
5367 		return B_ERROR;
5368 
5369 #ifdef ADD_DEBUGGER_COMMANDS
5370 	// add some debugger commands
5371 	add_debugger_command_etc("vnode", &dump_vnode,
5372 		"Print info about the specified vnode",
5373 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5374 		"Prints information about the vnode specified by address <vnode> or\n"
5375 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5376 		"constructed and printed. It might not be possible to construct a\n"
5377 		"complete path, though.\n",
5378 		0);
5379 	add_debugger_command("vnodes", &dump_vnodes,
5380 		"list all vnodes (from the specified device)");
5381 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5382 		"list all vnode caches");
5383 	add_debugger_command("mount", &dump_mount,
5384 		"info about the specified fs_mount");
5385 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5386 	add_debugger_command("io_context", &dump_io_context,
5387 		"info about the I/O context");
5388 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5389 		"info about vnode usage");
5390 #endif
5391 
5392 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5393 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5394 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5395 		0);
5396 
5397 	fifo_init();
5398 	file_map_init();
5399 
5400 	return file_cache_init();
5401 }
5402 
5403 
5404 //	#pragma mark - fd_ops implementations
5405 
5406 
5407 /*!
5408 	Calls fs_open() on the given vnode and returns a new
5409 	file descriptor for it
5410 */
5411 static int
5412 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5413 {
5414 	void* cookie;
5415 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5416 	if (status != B_OK)
5417 		return status;
5418 
5419 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5420 	if (fd < 0) {
5421 		FS_CALL(vnode, close, cookie);
5422 		FS_CALL(vnode, free_cookie, cookie);
5423 	}
5424 	return fd;
5425 }
5426 
5427 
5428 /*!
5429 	Calls fs_open() on the given vnode and returns a new
5430 	file descriptor for it
5431 */
5432 static int
5433 create_vnode(struct vnode* directory, const char* name, int openMode,
5434 	int perms, bool kernel)
5435 {
5436 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5437 	status_t status = B_ERROR;
5438 	struct vnode* vnode;
5439 	void* cookie;
5440 	ino_t newID;
5441 
5442 	// This is somewhat tricky: If the entry already exists, the FS responsible
5443 	// for the directory might not necessarily also be the one responsible for
5444 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5445 	// we can actually never call the create() hook without O_EXCL. Instead we
5446 	// try to look the entry up first. If it already exists, we just open the
5447 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5448 	// introduces a race condition, since someone else might have created the
5449 	// entry in the meantime. We hope the respective FS returns the correct
5450 	// error code and retry (up to 3 times) again.
5451 
5452 	for (int i = 0; i < 3 && status != B_OK; i++) {
5453 		// look the node up
5454 		status = lookup_dir_entry(directory, name, &vnode);
5455 		if (status == B_OK) {
5456 			VNodePutter putter(vnode);
5457 
5458 			if ((openMode & O_EXCL) != 0)
5459 				return B_FILE_EXISTS;
5460 
5461 			// If the node is a symlink, we have to follow it, unless
5462 			// O_NOTRAVERSE is set.
5463 			if (S_ISLNK(vnode->Type()) && traverse) {
5464 				putter.Put();
5465 				char clonedName[B_FILE_NAME_LENGTH + 1];
5466 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5467 						>= B_FILE_NAME_LENGTH) {
5468 					return B_NAME_TOO_LONG;
5469 				}
5470 
5471 				inc_vnode_ref_count(directory);
5472 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5473 					kernel, &vnode, NULL);
5474 				if (status != B_OK)
5475 					return status;
5476 
5477 				putter.SetTo(vnode);
5478 			}
5479 
5480 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5481 				return B_LINK_LIMIT;
5482 
5483 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5484 			// on success keep the vnode reference for the FD
5485 			if (fd >= 0)
5486 				putter.Detach();
5487 
5488 			return fd;
5489 		}
5490 
5491 		// it doesn't exist yet -- try to create it
5492 
5493 		if (!HAS_FS_CALL(directory, create))
5494 			return B_READ_ONLY_DEVICE;
5495 
5496 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5497 			&cookie, &newID);
5498 		if (status != B_OK
5499 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5500 			return status;
5501 		}
5502 	}
5503 
5504 	if (status != B_OK)
5505 		return status;
5506 
5507 	// the node has been created successfully
5508 
5509 	rw_lock_read_lock(&sVnodeLock);
5510 	vnode = lookup_vnode(directory->device, newID);
5511 	rw_lock_read_unlock(&sVnodeLock);
5512 
5513 	if (vnode == NULL) {
5514 		panic("vfs: fs_create() returned success but there is no vnode, "
5515 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5516 		return B_BAD_VALUE;
5517 	}
5518 
5519 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5520 	if (fd >= 0)
5521 		return fd;
5522 
5523 	status = fd;
5524 
5525 	// something went wrong, clean up
5526 
5527 	FS_CALL(vnode, close, cookie);
5528 	FS_CALL(vnode, free_cookie, cookie);
5529 	put_vnode(vnode);
5530 
5531 	FS_CALL(directory, unlink, name);
5532 
5533 	return status;
5534 }
5535 
5536 
5537 /*! Calls fs open_dir() on the given vnode and returns a new
5538 	file descriptor for it
5539 */
5540 static int
5541 open_dir_vnode(struct vnode* vnode, bool kernel)
5542 {
5543 	if (!HAS_FS_CALL(vnode, open_dir))
5544 		return B_UNSUPPORTED;
5545 
5546 	void* cookie;
5547 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5548 	if (status != B_OK)
5549 		return status;
5550 
5551 	// directory is opened, create a fd
5552 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5553 	if (status >= 0)
5554 		return status;
5555 
5556 	FS_CALL(vnode, close_dir, cookie);
5557 	FS_CALL(vnode, free_dir_cookie, cookie);
5558 
5559 	return status;
5560 }
5561 
5562 
5563 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5564 	file descriptor for it.
5565 	Used by attr_dir_open(), and attr_dir_open_fd().
5566 */
5567 static int
5568 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5569 {
5570 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5571 		return B_UNSUPPORTED;
5572 
5573 	void* cookie;
5574 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5575 	if (status != B_OK)
5576 		return status;
5577 
5578 	// directory is opened, create a fd
5579 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5580 		kernel);
5581 	if (status >= 0)
5582 		return status;
5583 
5584 	FS_CALL(vnode, close_attr_dir, cookie);
5585 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5586 
5587 	return status;
5588 }
5589 
5590 
5591 static int
5592 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5593 	int openMode, int perms, bool kernel)
5594 {
5595 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5596 		"kernel %d\n", name, openMode, perms, kernel));
5597 
5598 	// get directory to put the new file in
5599 	struct vnode* directory;
5600 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5601 	if (status != B_OK)
5602 		return status;
5603 
5604 	status = create_vnode(directory, name, openMode, perms, kernel);
5605 	put_vnode(directory);
5606 
5607 	return status;
5608 }
5609 
5610 
5611 static int
5612 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5613 {
5614 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5615 		openMode, perms, kernel));
5616 
5617 	// get directory to put the new file in
5618 	char name[B_FILE_NAME_LENGTH];
5619 	struct vnode* directory;
5620 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5621 		kernel);
5622 	if (status < 0)
5623 		return status;
5624 
5625 	status = create_vnode(directory, name, openMode, perms, kernel);
5626 
5627 	put_vnode(directory);
5628 	return status;
5629 }
5630 
5631 
5632 static int
5633 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5634 	int openMode, bool kernel)
5635 {
5636 	if (name == NULL || *name == '\0')
5637 		return B_BAD_VALUE;
5638 
5639 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5640 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5641 
5642 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5643 
5644 	// get the vnode matching the entry_ref
5645 	struct vnode* vnode;
5646 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5647 		kernel, &vnode);
5648 	if (status != B_OK)
5649 		return status;
5650 
5651 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5652 		put_vnode(vnode);
5653 		return B_LINK_LIMIT;
5654 	}
5655 
5656 	int newFD = open_vnode(vnode, openMode, kernel);
5657 	if (newFD >= 0) {
5658 		// The vnode reference has been transferred to the FD
5659 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5660 			directoryID, vnode->id, name);
5661 	} else
5662 		put_vnode(vnode);
5663 
5664 	return newFD;
5665 }
5666 
5667 
5668 static int
5669 file_open(int fd, char* path, int openMode, bool kernel)
5670 {
5671 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5672 
5673 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5674 		fd, path, openMode, kernel));
5675 
5676 	// get the vnode matching the vnode + path combination
5677 	struct vnode* vnode;
5678 	ino_t parentID;
5679 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5680 		&parentID, kernel);
5681 	if (status != B_OK)
5682 		return status;
5683 
5684 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5685 		put_vnode(vnode);
5686 		return B_LINK_LIMIT;
5687 	}
5688 
5689 	// open the vnode
5690 	int newFD = open_vnode(vnode, openMode, kernel);
5691 	if (newFD >= 0) {
5692 		// The vnode reference has been transferred to the FD
5693 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5694 			vnode->device, parentID, vnode->id, NULL);
5695 	} else
5696 		put_vnode(vnode);
5697 
5698 	return newFD;
5699 }
5700 
5701 
5702 static status_t
5703 file_close(struct file_descriptor* descriptor)
5704 {
5705 	struct vnode* vnode = descriptor->u.vnode;
5706 	status_t status = B_OK;
5707 
5708 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5709 
5710 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5711 		vnode->id);
5712 	if (HAS_FS_CALL(vnode, close)) {
5713 		status = FS_CALL(vnode, close, descriptor->cookie);
5714 	}
5715 
5716 	if (status == B_OK) {
5717 		// remove all outstanding locks for this team
5718 		if (HAS_FS_CALL(vnode, release_lock))
5719 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5720 		else
5721 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5722 	}
5723 	return status;
5724 }
5725 
5726 
5727 static void
5728 file_free_fd(struct file_descriptor* descriptor)
5729 {
5730 	struct vnode* vnode = descriptor->u.vnode;
5731 
5732 	if (vnode != NULL) {
5733 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5734 		put_vnode(vnode);
5735 	}
5736 }
5737 
5738 
5739 static status_t
5740 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5741 	size_t* length)
5742 {
5743 	struct vnode* vnode = descriptor->u.vnode;
5744 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5745 		pos, length, *length));
5746 
5747 	if (S_ISDIR(vnode->Type()))
5748 		return B_IS_A_DIRECTORY;
5749 
5750 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5751 }
5752 
5753 
5754 static status_t
5755 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5756 	size_t* length)
5757 {
5758 	struct vnode* vnode = descriptor->u.vnode;
5759 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5760 		length));
5761 
5762 	if (S_ISDIR(vnode->Type()))
5763 		return B_IS_A_DIRECTORY;
5764 	if (!HAS_FS_CALL(vnode, write))
5765 		return B_READ_ONLY_DEVICE;
5766 
5767 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5768 }
5769 
5770 
5771 static off_t
5772 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5773 {
5774 	struct vnode* vnode = descriptor->u.vnode;
5775 	off_t offset;
5776 	bool isDevice = false;
5777 
5778 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5779 		seekType));
5780 
5781 	// some kinds of files are not seekable
5782 	switch (vnode->Type() & S_IFMT) {
5783 		case S_IFIFO:
5784 		case S_IFSOCK:
5785 			return ESPIPE;
5786 
5787 		// drivers publish block devices as chr, so pick both
5788 		case S_IFBLK:
5789 		case S_IFCHR:
5790 			isDevice = true;
5791 			break;
5792 		// The Open Group Base Specs don't mention any file types besides pipes,
5793 		// fifos, and sockets specially, so we allow seeking them.
5794 		case S_IFREG:
5795 		case S_IFDIR:
5796 		case S_IFLNK:
5797 			break;
5798 	}
5799 
5800 	switch (seekType) {
5801 		case SEEK_SET:
5802 			offset = 0;
5803 			break;
5804 		case SEEK_CUR:
5805 			offset = descriptor->pos;
5806 			break;
5807 		case SEEK_END:
5808 		{
5809 			// stat() the node
5810 			if (!HAS_FS_CALL(vnode, read_stat))
5811 				return B_UNSUPPORTED;
5812 
5813 			struct stat stat;
5814 			status_t status = FS_CALL(vnode, read_stat, &stat);
5815 			if (status != B_OK)
5816 				return status;
5817 
5818 			offset = stat.st_size;
5819 
5820 			if (offset == 0 && isDevice) {
5821 				// stat() on regular drivers doesn't report size
5822 				device_geometry geometry;
5823 
5824 				if (HAS_FS_CALL(vnode, ioctl)) {
5825 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5826 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5827 					if (status == B_OK)
5828 						offset = (off_t)geometry.bytes_per_sector
5829 							* geometry.sectors_per_track
5830 							* geometry.cylinder_count
5831 							* geometry.head_count;
5832 				}
5833 			}
5834 
5835 			break;
5836 		}
5837 		case SEEK_DATA:
5838 		case SEEK_HOLE:
5839 		{
5840 			status_t status = B_BAD_VALUE;
5841 			if (HAS_FS_CALL(vnode, ioctl)) {
5842 				offset = pos;
5843 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5844 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5845 					&offset, sizeof(offset));
5846 				if (status == B_OK) {
5847 					if (offset > pos)
5848 						offset -= pos;
5849 					break;
5850 				}
5851 			}
5852 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5853 				return status;
5854 
5855 			// basic implementation with stat() the node
5856 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5857 				return B_BAD_VALUE;
5858 
5859 			struct stat stat;
5860 			status = FS_CALL(vnode, read_stat, &stat);
5861 			if (status != B_OK)
5862 				return status;
5863 
5864 			off_t end = stat.st_size;
5865 			if (pos >= end)
5866 				return ENXIO;
5867 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5868 			break;
5869 		}
5870 		default:
5871 			return B_BAD_VALUE;
5872 	}
5873 
5874 	// assumes off_t is 64 bits wide
5875 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5876 		return B_BUFFER_OVERFLOW;
5877 
5878 	pos += offset;
5879 	if (pos < 0)
5880 		return B_BAD_VALUE;
5881 
5882 	return descriptor->pos = pos;
5883 }
5884 
5885 
5886 static status_t
5887 file_select(struct file_descriptor* descriptor, uint8 event,
5888 	struct selectsync* sync)
5889 {
5890 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5891 
5892 	struct vnode* vnode = descriptor->u.vnode;
5893 
5894 	// If the FS has no select() hook, notify select() now.
5895 	if (!HAS_FS_CALL(vnode, select)) {
5896 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5897 			return notify_select_event(sync, event);
5898 		else
5899 			return B_OK;
5900 	}
5901 
5902 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5903 }
5904 
5905 
5906 static status_t
5907 file_deselect(struct file_descriptor* descriptor, uint8 event,
5908 	struct selectsync* sync)
5909 {
5910 	struct vnode* vnode = descriptor->u.vnode;
5911 
5912 	if (!HAS_FS_CALL(vnode, deselect))
5913 		return B_OK;
5914 
5915 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5916 }
5917 
5918 
5919 static status_t
5920 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5921 	bool kernel)
5922 {
5923 	struct vnode* vnode;
5924 	status_t status;
5925 
5926 	if (name == NULL || *name == '\0')
5927 		return B_BAD_VALUE;
5928 
5929 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5930 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5931 
5932 	status = get_vnode(mountID, parentID, &vnode, true, false);
5933 	if (status != B_OK)
5934 		return status;
5935 
5936 	if (HAS_FS_CALL(vnode, create_dir))
5937 		status = FS_CALL(vnode, create_dir, name, perms);
5938 	else
5939 		status = B_READ_ONLY_DEVICE;
5940 
5941 	put_vnode(vnode);
5942 	return status;
5943 }
5944 
5945 
5946 static status_t
5947 dir_create(int fd, char* path, int perms, bool kernel)
5948 {
5949 	char filename[B_FILE_NAME_LENGTH];
5950 	struct vnode* vnode;
5951 	status_t status;
5952 
5953 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5954 		kernel));
5955 
5956 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5957 	if (status < 0)
5958 		return status;
5959 
5960 	if (HAS_FS_CALL(vnode, create_dir)) {
5961 		status = FS_CALL(vnode, create_dir, filename, perms);
5962 	} else
5963 		status = B_READ_ONLY_DEVICE;
5964 
5965 	put_vnode(vnode);
5966 	return status;
5967 }
5968 
5969 
5970 static int
5971 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5972 {
5973 	FUNCTION(("dir_open_entry_ref()\n"));
5974 
5975 	if (name && name[0] == '\0')
5976 		return B_BAD_VALUE;
5977 
5978 	// get the vnode matching the entry_ref/node_ref
5979 	struct vnode* vnode;
5980 	status_t status;
5981 	if (name) {
5982 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5983 			&vnode);
5984 	} else
5985 		status = get_vnode(mountID, parentID, &vnode, true, false);
5986 	if (status != B_OK)
5987 		return status;
5988 
5989 	int newFD = open_dir_vnode(vnode, kernel);
5990 	if (newFD >= 0) {
5991 		// The vnode reference has been transferred to the FD
5992 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5993 			vnode->id, name);
5994 	} else
5995 		put_vnode(vnode);
5996 
5997 	return newFD;
5998 }
5999 
6000 
6001 static int
6002 dir_open(int fd, char* path, bool kernel)
6003 {
6004 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
6005 		kernel));
6006 
6007 	// get the vnode matching the vnode + path combination
6008 	struct vnode* vnode = NULL;
6009 	ino_t parentID;
6010 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
6011 		kernel);
6012 	if (status != B_OK)
6013 		return status;
6014 
6015 	// open the dir
6016 	int newFD = open_dir_vnode(vnode, kernel);
6017 	if (newFD >= 0) {
6018 		// The vnode reference has been transferred to the FD
6019 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6020 			parentID, vnode->id, NULL);
6021 	} else
6022 		put_vnode(vnode);
6023 
6024 	return newFD;
6025 }
6026 
6027 
6028 static status_t
6029 dir_close(struct file_descriptor* descriptor)
6030 {
6031 	struct vnode* vnode = descriptor->u.vnode;
6032 
6033 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6034 
6035 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6036 		vnode->id);
6037 	if (HAS_FS_CALL(vnode, close_dir))
6038 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6039 
6040 	return B_OK;
6041 }
6042 
6043 
6044 static void
6045 dir_free_fd(struct file_descriptor* descriptor)
6046 {
6047 	struct vnode* vnode = descriptor->u.vnode;
6048 
6049 	if (vnode != NULL) {
6050 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6051 		put_vnode(vnode);
6052 	}
6053 }
6054 
6055 
6056 static status_t
6057 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6058 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6059 {
6060 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6061 		bufferSize, _count);
6062 }
6063 
6064 
6065 static status_t
6066 fix_dirent(struct vnode* parent, struct dirent* entry,
6067 	struct io_context* ioContext)
6068 {
6069 	// set d_pdev and d_pino
6070 	entry->d_pdev = parent->device;
6071 	entry->d_pino = parent->id;
6072 
6073 	// If this is the ".." entry and the directory covering another vnode,
6074 	// we need to replace d_dev and d_ino with the actual values.
6075 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6076 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6077 			ioContext);
6078 	}
6079 
6080 	// resolve covered vnodes
6081 	ReadLocker _(&sVnodeLock);
6082 
6083 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6084 	if (vnode != NULL && vnode->covered_by != NULL) {
6085 		do {
6086 			vnode = vnode->covered_by;
6087 		} while (vnode->covered_by != NULL);
6088 
6089 		entry->d_dev = vnode->device;
6090 		entry->d_ino = vnode->id;
6091 	}
6092 
6093 	return B_OK;
6094 }
6095 
6096 
6097 static status_t
6098 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6099 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6100 {
6101 	if (!HAS_FS_CALL(vnode, read_dir))
6102 		return B_UNSUPPORTED;
6103 
6104 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6105 		_count);
6106 	if (error != B_OK)
6107 		return error;
6108 
6109 	// we need to adjust the read dirents
6110 	uint32 count = *_count;
6111 	for (uint32 i = 0; i < count; i++) {
6112 		error = fix_dirent(vnode, buffer, ioContext);
6113 		if (error != B_OK)
6114 			return error;
6115 
6116 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6117 	}
6118 
6119 	return error;
6120 }
6121 
6122 
6123 static status_t
6124 dir_rewind(struct file_descriptor* descriptor)
6125 {
6126 	struct vnode* vnode = descriptor->u.vnode;
6127 
6128 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6129 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6130 	}
6131 
6132 	return B_UNSUPPORTED;
6133 }
6134 
6135 
6136 static status_t
6137 dir_remove(int fd, char* path, bool kernel)
6138 {
6139 	char name[B_FILE_NAME_LENGTH];
6140 	struct vnode* directory;
6141 	status_t status;
6142 
6143 	if (path != NULL) {
6144 		// we need to make sure our path name doesn't stop with "/", ".",
6145 		// or ".."
6146 		char* lastSlash;
6147 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6148 			char* leaf = lastSlash + 1;
6149 			if (!strcmp(leaf, ".."))
6150 				return B_NOT_ALLOWED;
6151 
6152 			// omit multiple slashes
6153 			while (lastSlash > path && lastSlash[-1] == '/')
6154 				lastSlash--;
6155 
6156 			if (leaf[0]
6157 				&& strcmp(leaf, ".")) {
6158 				break;
6159 			}
6160 			// "name/" -> "name", or "name/." -> "name"
6161 			lastSlash[0] = '\0';
6162 		}
6163 
6164 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6165 			return B_NOT_ALLOWED;
6166 	}
6167 
6168 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6169 	if (status != B_OK)
6170 		return status;
6171 
6172 	if (HAS_FS_CALL(directory, remove_dir))
6173 		status = FS_CALL(directory, remove_dir, name);
6174 	else
6175 		status = B_READ_ONLY_DEVICE;
6176 
6177 	put_vnode(directory);
6178 	return status;
6179 }
6180 
6181 
6182 static status_t
6183 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6184 	size_t length)
6185 {
6186 	struct vnode* vnode = descriptor->u.vnode;
6187 
6188 	if (HAS_FS_CALL(vnode, ioctl))
6189 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6190 
6191 	return B_DEV_INVALID_IOCTL;
6192 }
6193 
6194 
6195 static status_t
6196 common_fcntl(int fd, int op, size_t argument, bool kernel)
6197 {
6198 	struct flock flock;
6199 
6200 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6201 		fd, op, argument, kernel ? "kernel" : "user"));
6202 
6203 	struct io_context* context = get_current_io_context(kernel);
6204 
6205 	struct file_descriptor* descriptor = get_fd(context, fd);
6206 	if (descriptor == NULL)
6207 		return B_FILE_ERROR;
6208 
6209 	struct vnode* vnode = fd_vnode(descriptor);
6210 
6211 	status_t status = B_OK;
6212 
6213 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6214 		if (descriptor->type != FDTYPE_FILE)
6215 			status = B_BAD_VALUE;
6216 		else if (kernel)
6217 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6218 		else if (user_memcpy(&flock, (struct flock*)argument,
6219 				sizeof(struct flock)) != B_OK)
6220 			status = B_BAD_ADDRESS;
6221 		if (status != B_OK) {
6222 			put_fd(descriptor);
6223 			return status;
6224 		}
6225 	}
6226 
6227 	switch (op) {
6228 		case F_SETFD:
6229 		{
6230 			// Set file descriptor flags
6231 
6232 			// O_CLOEXEC is the only flag available at this time
6233 			mutex_lock(&context->io_mutex);
6234 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6235 			mutex_unlock(&context->io_mutex);
6236 
6237 			status = B_OK;
6238 			break;
6239 		}
6240 
6241 		case F_GETFD:
6242 		{
6243 			// Get file descriptor flags
6244 			mutex_lock(&context->io_mutex);
6245 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6246 			mutex_unlock(&context->io_mutex);
6247 			break;
6248 		}
6249 
6250 		case F_SETFL:
6251 			// Set file descriptor open mode
6252 
6253 			// we only accept changes to O_APPEND and O_NONBLOCK
6254 			argument &= O_APPEND | O_NONBLOCK;
6255 			if (descriptor->ops->fd_set_flags != NULL) {
6256 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6257 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6258 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6259 					(int)argument);
6260 			} else
6261 				status = B_UNSUPPORTED;
6262 
6263 			if (status == B_OK) {
6264 				// update this descriptor's open_mode field
6265 				descriptor->open_mode = (descriptor->open_mode
6266 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6267 			}
6268 
6269 			break;
6270 
6271 		case F_GETFL:
6272 			// Get file descriptor open mode
6273 			status = descriptor->open_mode;
6274 			break;
6275 
6276 		case F_DUPFD:
6277 		case F_DUPFD_CLOEXEC:
6278 		{
6279 			status = new_fd_etc(context, descriptor, (int)argument);
6280 			if (status >= 0) {
6281 				mutex_lock(&context->io_mutex);
6282 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6283 				mutex_unlock(&context->io_mutex);
6284 
6285 				atomic_add(&descriptor->ref_count, 1);
6286 			}
6287 			break;
6288 		}
6289 
6290 		case F_GETLK:
6291 			if (vnode != NULL) {
6292 				struct flock normalizedLock;
6293 
6294 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6295 				status = normalize_flock(descriptor, &normalizedLock);
6296 				if (status != B_OK)
6297 					break;
6298 
6299 				if (HAS_FS_CALL(vnode, test_lock)) {
6300 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6301 						&normalizedLock);
6302 				} else
6303 					status = test_advisory_lock(vnode, &normalizedLock);
6304 				if (status == B_OK) {
6305 					if (normalizedLock.l_type == F_UNLCK) {
6306 						// no conflicting lock found, copy back the same struct
6307 						// we were given except change type to F_UNLCK
6308 						flock.l_type = F_UNLCK;
6309 						if (kernel) {
6310 							memcpy((struct flock*)argument, &flock,
6311 								sizeof(struct flock));
6312 						} else {
6313 							status = user_memcpy((struct flock*)argument,
6314 								&flock, sizeof(struct flock));
6315 						}
6316 					} else {
6317 						// a conflicting lock was found, copy back its range and
6318 						// type
6319 						if (normalizedLock.l_len == OFF_MAX)
6320 							normalizedLock.l_len = 0;
6321 
6322 						if (kernel) {
6323 							memcpy((struct flock*)argument,
6324 								&normalizedLock, sizeof(struct flock));
6325 						} else {
6326 							status = user_memcpy((struct flock*)argument,
6327 								&normalizedLock, sizeof(struct flock));
6328 						}
6329 					}
6330 				}
6331 			} else
6332 				status = B_BAD_VALUE;
6333 			break;
6334 
6335 		case F_SETLK:
6336 		case F_SETLKW:
6337 			status = normalize_flock(descriptor, &flock);
6338 			if (status != B_OK)
6339 				break;
6340 
6341 			if (vnode == NULL) {
6342 				status = B_BAD_VALUE;
6343 			} else if (flock.l_type == F_UNLCK) {
6344 				if (HAS_FS_CALL(vnode, release_lock)) {
6345 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6346 						&flock);
6347 				} else {
6348 					status = release_advisory_lock(vnode, context, NULL,
6349 						&flock);
6350 				}
6351 			} else {
6352 				// the open mode must match the lock type
6353 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6354 						&& flock.l_type == F_WRLCK)
6355 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6356 						&& flock.l_type == F_RDLCK))
6357 					status = B_FILE_ERROR;
6358 				else {
6359 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6360 						status = FS_CALL(vnode, acquire_lock,
6361 							descriptor->cookie, &flock, op == F_SETLKW);
6362 					} else {
6363 						status = acquire_advisory_lock(vnode, context, NULL,
6364 							&flock, op == F_SETLKW);
6365 					}
6366 				}
6367 			}
6368 			break;
6369 
6370 		// ToDo: add support for more ops?
6371 
6372 		default:
6373 			status = B_BAD_VALUE;
6374 	}
6375 
6376 	put_fd(descriptor);
6377 	return status;
6378 }
6379 
6380 
6381 static status_t
6382 common_sync(int fd, bool kernel)
6383 {
6384 	struct file_descriptor* descriptor;
6385 	struct vnode* vnode;
6386 	status_t status;
6387 
6388 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6389 
6390 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6391 	if (descriptor == NULL)
6392 		return B_FILE_ERROR;
6393 
6394 	if (HAS_FS_CALL(vnode, fsync))
6395 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6396 	else
6397 		status = B_UNSUPPORTED;
6398 
6399 	put_fd(descriptor);
6400 	return status;
6401 }
6402 
6403 
6404 static status_t
6405 common_lock_node(int fd, bool kernel)
6406 {
6407 	struct file_descriptor* descriptor;
6408 	struct vnode* vnode;
6409 
6410 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6411 	if (descriptor == NULL)
6412 		return B_FILE_ERROR;
6413 
6414 	status_t status = B_OK;
6415 
6416 	// We need to set the locking atomically - someone
6417 	// else might set one at the same time
6418 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6419 			(file_descriptor*)NULL) != NULL)
6420 		status = B_BUSY;
6421 
6422 	put_fd(descriptor);
6423 	return status;
6424 }
6425 
6426 
6427 static status_t
6428 common_unlock_node(int fd, bool kernel)
6429 {
6430 	struct file_descriptor* descriptor;
6431 	struct vnode* vnode;
6432 
6433 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6434 	if (descriptor == NULL)
6435 		return B_FILE_ERROR;
6436 
6437 	status_t status = B_OK;
6438 
6439 	// We need to set the locking atomically - someone
6440 	// else might set one at the same time
6441 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6442 			(file_descriptor*)NULL, descriptor) != descriptor)
6443 		status = B_BAD_VALUE;
6444 
6445 	put_fd(descriptor);
6446 	return status;
6447 }
6448 
6449 
6450 static status_t
6451 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6452 {
6453 	struct file_descriptor* descriptor;
6454 	struct vnode* vnode;
6455 
6456 	if (offset < 0 || length == 0)
6457 		return B_BAD_VALUE;
6458 	if (offset > OFF_MAX - length)
6459 		return B_FILE_TOO_LARGE;
6460 
6461 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6462 	if (descriptor == NULL || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6463 		return B_FILE_ERROR;
6464 
6465 	switch (vnode->Type() & S_IFMT) {
6466 		case S_IFIFO:
6467 		case S_IFSOCK:
6468 			return ESPIPE;
6469 
6470 		case S_IFBLK:
6471 		case S_IFCHR:
6472 		case S_IFDIR:
6473 		case S_IFLNK:
6474 			return B_DEVICE_NOT_FOUND;
6475 
6476 		case S_IFREG:
6477 			break;
6478 	}
6479 
6480 	status_t status = B_OK;
6481 	if (HAS_FS_CALL(vnode, preallocate)) {
6482 		status = FS_CALL(vnode, preallocate, offset, length);
6483 	} else {
6484 		status = HAS_FS_CALL(vnode, write)
6485 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6486 	}
6487 
6488 	return status;
6489 }
6490 
6491 
6492 static status_t
6493 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6494 	bool kernel)
6495 {
6496 	struct vnode* vnode;
6497 	status_t status;
6498 
6499 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6500 	if (status != B_OK)
6501 		return status;
6502 
6503 	if (HAS_FS_CALL(vnode, read_symlink)) {
6504 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6505 	} else
6506 		status = B_BAD_VALUE;
6507 
6508 	put_vnode(vnode);
6509 	return status;
6510 }
6511 
6512 
6513 static status_t
6514 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6515 	bool kernel)
6516 {
6517 	// path validity checks have to be in the calling function!
6518 	char name[B_FILE_NAME_LENGTH];
6519 	struct vnode* vnode;
6520 	status_t status;
6521 
6522 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6523 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6524 
6525 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6526 	if (status != B_OK)
6527 		return status;
6528 
6529 	if (HAS_FS_CALL(vnode, create_symlink))
6530 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6531 	else {
6532 		status = HAS_FS_CALL(vnode, write)
6533 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6534 	}
6535 
6536 	put_vnode(vnode);
6537 
6538 	return status;
6539 }
6540 
6541 
6542 static status_t
6543 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6544 	bool traverseLeafLink, bool kernel)
6545 {
6546 	// path validity checks have to be in the calling function!
6547 
6548 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6549 		toPath, kernel));
6550 
6551 	char name[B_FILE_NAME_LENGTH];
6552 	struct vnode* directory;
6553 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6554 		kernel);
6555 	if (status != B_OK)
6556 		return status;
6557 
6558 	struct vnode* vnode;
6559 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6560 		kernel);
6561 	if (status != B_OK)
6562 		goto err;
6563 
6564 	if (directory->mount != vnode->mount) {
6565 		status = B_CROSS_DEVICE_LINK;
6566 		goto err1;
6567 	}
6568 
6569 	if (HAS_FS_CALL(directory, link))
6570 		status = FS_CALL(directory, link, name, vnode);
6571 	else
6572 		status = B_READ_ONLY_DEVICE;
6573 
6574 err1:
6575 	put_vnode(vnode);
6576 err:
6577 	put_vnode(directory);
6578 
6579 	return status;
6580 }
6581 
6582 
6583 static status_t
6584 common_unlink(int fd, char* path, bool kernel)
6585 {
6586 	char filename[B_FILE_NAME_LENGTH];
6587 	struct vnode* vnode;
6588 	status_t status;
6589 
6590 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6591 		kernel));
6592 
6593 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6594 	if (status < 0)
6595 		return status;
6596 
6597 	if (HAS_FS_CALL(vnode, unlink))
6598 		status = FS_CALL(vnode, unlink, filename);
6599 	else
6600 		status = B_READ_ONLY_DEVICE;
6601 
6602 	put_vnode(vnode);
6603 
6604 	return status;
6605 }
6606 
6607 
6608 static status_t
6609 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6610 {
6611 	struct vnode* vnode;
6612 	status_t status;
6613 
6614 	// TODO: honor effectiveUserGroup argument
6615 
6616 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6617 	if (status != B_OK)
6618 		return status;
6619 
6620 	if (HAS_FS_CALL(vnode, access))
6621 		status = FS_CALL(vnode, access, mode);
6622 	else
6623 		status = B_OK;
6624 
6625 	put_vnode(vnode);
6626 
6627 	return status;
6628 }
6629 
6630 
6631 static status_t
6632 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6633 {
6634 	struct vnode* fromVnode;
6635 	struct vnode* toVnode;
6636 	char fromName[B_FILE_NAME_LENGTH];
6637 	char toName[B_FILE_NAME_LENGTH];
6638 	status_t status;
6639 
6640 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6641 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6642 
6643 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6644 	if (status != B_OK)
6645 		return status;
6646 
6647 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6648 	if (status != B_OK)
6649 		goto err1;
6650 
6651 	if (fromVnode->device != toVnode->device) {
6652 		status = B_CROSS_DEVICE_LINK;
6653 		goto err2;
6654 	}
6655 
6656 	if (fromName[0] == '\0' || toName[0] == '\0'
6657 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6658 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6659 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6660 		status = B_BAD_VALUE;
6661 		goto err2;
6662 	}
6663 
6664 	if (HAS_FS_CALL(fromVnode, rename))
6665 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6666 	else
6667 		status = B_READ_ONLY_DEVICE;
6668 
6669 err2:
6670 	put_vnode(toVnode);
6671 err1:
6672 	put_vnode(fromVnode);
6673 
6674 	return status;
6675 }
6676 
6677 
6678 static status_t
6679 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6680 {
6681 	struct vnode* vnode = descriptor->u.vnode;
6682 
6683 	FUNCTION(("common_read_stat: stat %p\n", stat));
6684 
6685 	// TODO: remove this once all file systems properly set them!
6686 	stat->st_crtim.tv_nsec = 0;
6687 	stat->st_ctim.tv_nsec = 0;
6688 	stat->st_mtim.tv_nsec = 0;
6689 	stat->st_atim.tv_nsec = 0;
6690 
6691 	return vfs_stat_vnode(vnode, stat);
6692 }
6693 
6694 
6695 static status_t
6696 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6697 	int statMask)
6698 {
6699 	struct vnode* vnode = descriptor->u.vnode;
6700 
6701 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6702 		vnode, stat, statMask));
6703 
6704 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6705 		&& (statMask & B_STAT_SIZE) != 0) {
6706 		return B_BAD_VALUE;
6707 	}
6708 
6709 	if (!HAS_FS_CALL(vnode, write_stat))
6710 		return B_READ_ONLY_DEVICE;
6711 
6712 	return FS_CALL(vnode, write_stat, stat, statMask);
6713 }
6714 
6715 
6716 static status_t
6717 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6718 	struct stat* stat, bool kernel)
6719 {
6720 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6721 		stat));
6722 
6723 	struct vnode* vnode;
6724 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6725 		NULL, kernel);
6726 	if (status != B_OK)
6727 		return status;
6728 
6729 	status = vfs_stat_vnode(vnode, stat);
6730 
6731 	put_vnode(vnode);
6732 	return status;
6733 }
6734 
6735 
6736 static status_t
6737 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6738 	const struct stat* stat, int statMask, bool kernel)
6739 {
6740 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6741 		"kernel %d\n", fd, path, stat, statMask, kernel));
6742 
6743 	struct vnode* vnode;
6744 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6745 		NULL, kernel);
6746 	if (status != B_OK)
6747 		return status;
6748 
6749 	if (HAS_FS_CALL(vnode, write_stat))
6750 		status = FS_CALL(vnode, write_stat, stat, statMask);
6751 	else
6752 		status = B_READ_ONLY_DEVICE;
6753 
6754 	put_vnode(vnode);
6755 
6756 	return status;
6757 }
6758 
6759 
6760 static int
6761 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6762 {
6763 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6764 		kernel));
6765 
6766 	struct vnode* vnode;
6767 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6768 		NULL, kernel);
6769 	if (status != B_OK)
6770 		return status;
6771 
6772 	status = open_attr_dir_vnode(vnode, kernel);
6773 	if (status < 0)
6774 		put_vnode(vnode);
6775 
6776 	return status;
6777 }
6778 
6779 
6780 static status_t
6781 attr_dir_close(struct file_descriptor* descriptor)
6782 {
6783 	struct vnode* vnode = descriptor->u.vnode;
6784 
6785 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6786 
6787 	if (HAS_FS_CALL(vnode, close_attr_dir))
6788 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6789 
6790 	return B_OK;
6791 }
6792 
6793 
6794 static void
6795 attr_dir_free_fd(struct file_descriptor* descriptor)
6796 {
6797 	struct vnode* vnode = descriptor->u.vnode;
6798 
6799 	if (vnode != NULL) {
6800 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6801 		put_vnode(vnode);
6802 	}
6803 }
6804 
6805 
6806 static status_t
6807 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6808 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6809 {
6810 	struct vnode* vnode = descriptor->u.vnode;
6811 
6812 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6813 
6814 	if (HAS_FS_CALL(vnode, read_attr_dir))
6815 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6816 			bufferSize, _count);
6817 
6818 	return B_UNSUPPORTED;
6819 }
6820 
6821 
6822 static status_t
6823 attr_dir_rewind(struct file_descriptor* descriptor)
6824 {
6825 	struct vnode* vnode = descriptor->u.vnode;
6826 
6827 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6828 
6829 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6830 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6831 
6832 	return B_UNSUPPORTED;
6833 }
6834 
6835 
6836 static int
6837 attr_create(int fd, char* path, const char* name, uint32 type,
6838 	int openMode, bool kernel)
6839 {
6840 	if (name == NULL || *name == '\0')
6841 		return B_BAD_VALUE;
6842 
6843 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6844 	struct vnode* vnode;
6845 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6846 		kernel);
6847 	if (status != B_OK)
6848 		return status;
6849 
6850 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6851 		status = B_LINK_LIMIT;
6852 		goto err;
6853 	}
6854 
6855 	if (!HAS_FS_CALL(vnode, create_attr)) {
6856 		status = B_READ_ONLY_DEVICE;
6857 		goto err;
6858 	}
6859 
6860 	void* cookie;
6861 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6862 	if (status != B_OK)
6863 		goto err;
6864 
6865 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6866 	if (fd >= 0)
6867 		return fd;
6868 
6869 	status = fd;
6870 
6871 	FS_CALL(vnode, close_attr, cookie);
6872 	FS_CALL(vnode, free_attr_cookie, cookie);
6873 
6874 	FS_CALL(vnode, remove_attr, name);
6875 
6876 err:
6877 	put_vnode(vnode);
6878 
6879 	return status;
6880 }
6881 
6882 
6883 static int
6884 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6885 {
6886 	if (name == NULL || *name == '\0')
6887 		return B_BAD_VALUE;
6888 
6889 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6890 	struct vnode* vnode;
6891 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6892 		kernel);
6893 	if (status != B_OK)
6894 		return status;
6895 
6896 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6897 		status = B_LINK_LIMIT;
6898 		goto err;
6899 	}
6900 
6901 	if (!HAS_FS_CALL(vnode, open_attr)) {
6902 		status = B_UNSUPPORTED;
6903 		goto err;
6904 	}
6905 
6906 	void* cookie;
6907 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6908 	if (status != B_OK)
6909 		goto err;
6910 
6911 	// now we only need a file descriptor for this attribute and we're done
6912 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6913 	if (fd >= 0)
6914 		return fd;
6915 
6916 	status = fd;
6917 
6918 	FS_CALL(vnode, close_attr, cookie);
6919 	FS_CALL(vnode, free_attr_cookie, cookie);
6920 
6921 err:
6922 	put_vnode(vnode);
6923 
6924 	return status;
6925 }
6926 
6927 
6928 static status_t
6929 attr_close(struct file_descriptor* descriptor)
6930 {
6931 	struct vnode* vnode = descriptor->u.vnode;
6932 
6933 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6934 
6935 	if (HAS_FS_CALL(vnode, close_attr))
6936 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6937 
6938 	return B_OK;
6939 }
6940 
6941 
6942 static void
6943 attr_free_fd(struct file_descriptor* descriptor)
6944 {
6945 	struct vnode* vnode = descriptor->u.vnode;
6946 
6947 	if (vnode != NULL) {
6948 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6949 		put_vnode(vnode);
6950 	}
6951 }
6952 
6953 
6954 static status_t
6955 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6956 	size_t* length)
6957 {
6958 	struct vnode* vnode = descriptor->u.vnode;
6959 
6960 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6961 		pos, length, *length));
6962 
6963 	if (!HAS_FS_CALL(vnode, read_attr))
6964 		return B_UNSUPPORTED;
6965 
6966 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6967 }
6968 
6969 
6970 static status_t
6971 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6972 	size_t* length)
6973 {
6974 	struct vnode* vnode = descriptor->u.vnode;
6975 
6976 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6977 		length));
6978 
6979 	if (!HAS_FS_CALL(vnode, write_attr))
6980 		return B_UNSUPPORTED;
6981 
6982 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6983 }
6984 
6985 
6986 static off_t
6987 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6988 {
6989 	off_t offset;
6990 
6991 	switch (seekType) {
6992 		case SEEK_SET:
6993 			offset = 0;
6994 			break;
6995 		case SEEK_CUR:
6996 			offset = descriptor->pos;
6997 			break;
6998 		case SEEK_END:
6999 		{
7000 			struct vnode* vnode = descriptor->u.vnode;
7001 			if (!HAS_FS_CALL(vnode, read_stat))
7002 				return B_UNSUPPORTED;
7003 
7004 			struct stat stat;
7005 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
7006 				&stat);
7007 			if (status != B_OK)
7008 				return status;
7009 
7010 			offset = stat.st_size;
7011 			break;
7012 		}
7013 		default:
7014 			return B_BAD_VALUE;
7015 	}
7016 
7017 	// assumes off_t is 64 bits wide
7018 	if (offset > 0 && LONGLONG_MAX - offset < pos)
7019 		return B_BUFFER_OVERFLOW;
7020 
7021 	pos += offset;
7022 	if (pos < 0)
7023 		return B_BAD_VALUE;
7024 
7025 	return descriptor->pos = pos;
7026 }
7027 
7028 
7029 static status_t
7030 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7031 {
7032 	struct vnode* vnode = descriptor->u.vnode;
7033 
7034 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7035 
7036 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7037 		return B_UNSUPPORTED;
7038 
7039 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7040 }
7041 
7042 
7043 static status_t
7044 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7045 	int statMask)
7046 {
7047 	struct vnode* vnode = descriptor->u.vnode;
7048 
7049 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7050 
7051 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7052 		return B_READ_ONLY_DEVICE;
7053 
7054 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7055 }
7056 
7057 
7058 static status_t
7059 attr_remove(int fd, const char* name, bool kernel)
7060 {
7061 	struct file_descriptor* descriptor;
7062 	struct vnode* vnode;
7063 	status_t status;
7064 
7065 	if (name == NULL || *name == '\0')
7066 		return B_BAD_VALUE;
7067 
7068 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7069 		kernel));
7070 
7071 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
7072 	if (descriptor == NULL)
7073 		return B_FILE_ERROR;
7074 
7075 	if (HAS_FS_CALL(vnode, remove_attr))
7076 		status = FS_CALL(vnode, remove_attr, name);
7077 	else
7078 		status = B_READ_ONLY_DEVICE;
7079 
7080 	put_fd(descriptor);
7081 
7082 	return status;
7083 }
7084 
7085 
7086 static status_t
7087 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7088 	bool kernel)
7089 {
7090 	struct file_descriptor* fromDescriptor;
7091 	struct file_descriptor* toDescriptor;
7092 	struct vnode* fromVnode;
7093 	struct vnode* toVnode;
7094 	status_t status;
7095 
7096 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7097 		|| *toName == '\0')
7098 		return B_BAD_VALUE;
7099 
7100 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7101 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7102 
7103 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
7104 	if (fromDescriptor == NULL)
7105 		return B_FILE_ERROR;
7106 
7107 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
7108 	if (toDescriptor == NULL) {
7109 		status = B_FILE_ERROR;
7110 		goto err;
7111 	}
7112 
7113 	// are the files on the same volume?
7114 	if (fromVnode->device != toVnode->device) {
7115 		status = B_CROSS_DEVICE_LINK;
7116 		goto err1;
7117 	}
7118 
7119 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7120 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7121 	} else
7122 		status = B_READ_ONLY_DEVICE;
7123 
7124 err1:
7125 	put_fd(toDescriptor);
7126 err:
7127 	put_fd(fromDescriptor);
7128 
7129 	return status;
7130 }
7131 
7132 
7133 static int
7134 index_dir_open(dev_t mountID, bool kernel)
7135 {
7136 	struct fs_mount* mount;
7137 	void* cookie;
7138 
7139 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7140 		kernel));
7141 
7142 	status_t status = get_mount(mountID, &mount);
7143 	if (status != B_OK)
7144 		return status;
7145 
7146 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7147 		status = B_UNSUPPORTED;
7148 		goto error;
7149 	}
7150 
7151 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7152 	if (status != B_OK)
7153 		goto error;
7154 
7155 	// get fd for the index directory
7156 	int fd;
7157 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7158 	if (fd >= 0)
7159 		return fd;
7160 
7161 	// something went wrong
7162 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7163 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7164 
7165 	status = fd;
7166 
7167 error:
7168 	put_mount(mount);
7169 	return status;
7170 }
7171 
7172 
7173 static status_t
7174 index_dir_close(struct file_descriptor* descriptor)
7175 {
7176 	struct fs_mount* mount = descriptor->u.mount;
7177 
7178 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7179 
7180 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7181 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7182 
7183 	return B_OK;
7184 }
7185 
7186 
7187 static void
7188 index_dir_free_fd(struct file_descriptor* descriptor)
7189 {
7190 	struct fs_mount* mount = descriptor->u.mount;
7191 
7192 	if (mount != NULL) {
7193 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7194 		put_mount(mount);
7195 	}
7196 }
7197 
7198 
7199 static status_t
7200 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7201 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7202 {
7203 	struct fs_mount* mount = descriptor->u.mount;
7204 
7205 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7206 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7207 			bufferSize, _count);
7208 	}
7209 
7210 	return B_UNSUPPORTED;
7211 }
7212 
7213 
7214 static status_t
7215 index_dir_rewind(struct file_descriptor* descriptor)
7216 {
7217 	struct fs_mount* mount = descriptor->u.mount;
7218 
7219 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7220 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7221 
7222 	return B_UNSUPPORTED;
7223 }
7224 
7225 
7226 static status_t
7227 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7228 	bool kernel)
7229 {
7230 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7231 		mountID, name, kernel));
7232 
7233 	struct fs_mount* mount;
7234 	status_t status = get_mount(mountID, &mount);
7235 	if (status != B_OK)
7236 		return status;
7237 
7238 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7239 		status = B_READ_ONLY_DEVICE;
7240 		goto out;
7241 	}
7242 
7243 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7244 
7245 out:
7246 	put_mount(mount);
7247 	return status;
7248 }
7249 
7250 
7251 #if 0
7252 static status_t
7253 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7254 {
7255 	struct vnode* vnode = descriptor->u.vnode;
7256 
7257 	// ToDo: currently unused!
7258 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7259 	if (!HAS_FS_CALL(vnode, read_index_stat))
7260 		return B_UNSUPPORTED;
7261 
7262 	return B_UNSUPPORTED;
7263 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7264 }
7265 
7266 
7267 static void
7268 index_free_fd(struct file_descriptor* descriptor)
7269 {
7270 	struct vnode* vnode = descriptor->u.vnode;
7271 
7272 	if (vnode != NULL) {
7273 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7274 		put_vnode(vnode);
7275 	}
7276 }
7277 #endif
7278 
7279 
7280 static status_t
7281 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7282 	bool kernel)
7283 {
7284 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7285 		mountID, name, kernel));
7286 
7287 	struct fs_mount* mount;
7288 	status_t status = get_mount(mountID, &mount);
7289 	if (status != B_OK)
7290 		return status;
7291 
7292 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7293 		status = B_UNSUPPORTED;
7294 		goto out;
7295 	}
7296 
7297 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7298 
7299 out:
7300 	put_mount(mount);
7301 	return status;
7302 }
7303 
7304 
7305 static status_t
7306 index_remove(dev_t mountID, const char* name, bool kernel)
7307 {
7308 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7309 		mountID, name, kernel));
7310 
7311 	struct fs_mount* mount;
7312 	status_t status = get_mount(mountID, &mount);
7313 	if (status != B_OK)
7314 		return status;
7315 
7316 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7317 		status = B_READ_ONLY_DEVICE;
7318 		goto out;
7319 	}
7320 
7321 	status = FS_MOUNT_CALL(mount, remove_index, name);
7322 
7323 out:
7324 	put_mount(mount);
7325 	return status;
7326 }
7327 
7328 
7329 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7330 		It would be nice if the FS would find some more kernel support
7331 		for them.
7332 		For example, query parsing should be moved into the kernel.
7333 */
7334 static int
7335 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7336 	int32 token, bool kernel)
7337 {
7338 	struct fs_mount* mount;
7339 	void* cookie;
7340 
7341 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7342 		device, query, kernel));
7343 
7344 	status_t status = get_mount(device, &mount);
7345 	if (status != B_OK)
7346 		return status;
7347 
7348 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7349 		status = B_UNSUPPORTED;
7350 		goto error;
7351 	}
7352 
7353 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7354 		&cookie);
7355 	if (status != B_OK)
7356 		goto error;
7357 
7358 	// get fd for the index directory
7359 	int fd;
7360 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7361 	if (fd >= 0)
7362 		return fd;
7363 
7364 	status = fd;
7365 
7366 	// something went wrong
7367 	FS_MOUNT_CALL(mount, close_query, cookie);
7368 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7369 
7370 error:
7371 	put_mount(mount);
7372 	return status;
7373 }
7374 
7375 
7376 static status_t
7377 query_close(struct file_descriptor* descriptor)
7378 {
7379 	struct fs_mount* mount = descriptor->u.mount;
7380 
7381 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7382 
7383 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7384 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7385 
7386 	return B_OK;
7387 }
7388 
7389 
7390 static void
7391 query_free_fd(struct file_descriptor* descriptor)
7392 {
7393 	struct fs_mount* mount = descriptor->u.mount;
7394 
7395 	if (mount != NULL) {
7396 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7397 		put_mount(mount);
7398 	}
7399 }
7400 
7401 
7402 static status_t
7403 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7404 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7405 {
7406 	struct fs_mount* mount = descriptor->u.mount;
7407 
7408 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7409 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7410 			bufferSize, _count);
7411 	}
7412 
7413 	return B_UNSUPPORTED;
7414 }
7415 
7416 
7417 static status_t
7418 query_rewind(struct file_descriptor* descriptor)
7419 {
7420 	struct fs_mount* mount = descriptor->u.mount;
7421 
7422 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7423 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7424 
7425 	return B_UNSUPPORTED;
7426 }
7427 
7428 
7429 //	#pragma mark - General File System functions
7430 
7431 
7432 static dev_t
7433 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7434 	const char* args, bool kernel)
7435 {
7436 	struct ::fs_mount* mount;
7437 	status_t status = B_OK;
7438 	fs_volume* volume = NULL;
7439 	int32 layer = 0;
7440 	Vnode* coveredNode = NULL;
7441 
7442 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7443 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7444 
7445 	// The path is always safe, we just have to make sure that fsName is
7446 	// almost valid - we can't make any assumptions about args, though.
7447 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7448 	// We'll get it from the DDM later.
7449 	if (fsName == NULL) {
7450 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7451 			return B_BAD_VALUE;
7452 	} else if (fsName[0] == '\0')
7453 		return B_BAD_VALUE;
7454 
7455 	RecursiveLocker mountOpLocker(sMountOpLock);
7456 
7457 	// Helper to delete a newly created file device on failure.
7458 	// Not exactly beautiful, but helps to keep the code below cleaner.
7459 	struct FileDeviceDeleter {
7460 		FileDeviceDeleter() : id(-1) {}
7461 		~FileDeviceDeleter()
7462 		{
7463 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7464 		}
7465 
7466 		partition_id id;
7467 	} fileDeviceDeleter;
7468 
7469 	// If the file system is not a "virtual" one, the device argument should
7470 	// point to a real file/device (if given at all).
7471 	// get the partition
7472 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7473 	KPartition* partition = NULL;
7474 	KPath normalizedDevice;
7475 	bool newlyCreatedFileDevice = false;
7476 
7477 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7478 		// normalize the device path
7479 		status = normalizedDevice.SetTo(device, true);
7480 		if (status != B_OK)
7481 			return status;
7482 
7483 		// get a corresponding partition from the DDM
7484 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7485 		if (partition == NULL) {
7486 			// Partition not found: This either means, the user supplied
7487 			// an invalid path, or the path refers to an image file. We try
7488 			// to let the DDM create a file device for the path.
7489 			partition_id deviceID = ddm->CreateFileDevice(
7490 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7491 			if (deviceID >= 0) {
7492 				partition = ddm->RegisterPartition(deviceID);
7493 				if (newlyCreatedFileDevice)
7494 					fileDeviceDeleter.id = deviceID;
7495 			}
7496 		}
7497 
7498 		if (!partition) {
7499 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7500 				normalizedDevice.Path()));
7501 			return B_ENTRY_NOT_FOUND;
7502 		}
7503 
7504 		device = normalizedDevice.Path();
7505 			// correct path to file device
7506 	}
7507 	PartitionRegistrar partitionRegistrar(partition, true);
7508 
7509 	// Write lock the partition's device. For the time being, we keep the lock
7510 	// until we're done mounting -- not nice, but ensure, that no-one is
7511 	// interfering.
7512 	// TODO: Just mark the partition busy while mounting!
7513 	KDiskDevice* diskDevice = NULL;
7514 	if (partition) {
7515 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7516 		if (!diskDevice) {
7517 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7518 			return B_ERROR;
7519 		}
7520 	}
7521 
7522 	DeviceWriteLocker writeLocker(diskDevice, true);
7523 		// this takes over the write lock acquired before
7524 
7525 	if (partition != NULL) {
7526 		// make sure, that the partition is not busy
7527 		if (partition->IsBusy()) {
7528 			TRACE(("fs_mount(): Partition is busy.\n"));
7529 			return B_BUSY;
7530 		}
7531 
7532 		// if no FS name had been supplied, we get it from the partition
7533 		if (fsName == NULL) {
7534 			KDiskSystem* diskSystem = partition->DiskSystem();
7535 			if (!diskSystem) {
7536 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7537 					"recognize it.\n"));
7538 				return B_BAD_VALUE;
7539 			}
7540 
7541 			if (!diskSystem->IsFileSystem()) {
7542 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7543 					"partitioning system.\n"));
7544 				return B_BAD_VALUE;
7545 			}
7546 
7547 			// The disk system name will not change, and the KDiskSystem
7548 			// object will not go away while the disk device is locked (and
7549 			// the partition has a reference to it), so this is safe.
7550 			fsName = diskSystem->Name();
7551 		}
7552 	}
7553 
7554 	mount = new(std::nothrow) (struct ::fs_mount);
7555 	if (mount == NULL)
7556 		return B_NO_MEMORY;
7557 
7558 	mount->device_name = strdup(device);
7559 		// "device" can be NULL
7560 
7561 	status = mount->entry_cache.Init();
7562 	if (status != B_OK)
7563 		goto err1;
7564 
7565 	// initialize structure
7566 	mount->id = sNextMountID++;
7567 	mount->partition = NULL;
7568 	mount->root_vnode = NULL;
7569 	mount->covers_vnode = NULL;
7570 	mount->unmounting = false;
7571 	mount->owns_file_device = false;
7572 	mount->volume = NULL;
7573 
7574 	// build up the volume(s)
7575 	while (true) {
7576 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7577 		if (layerFSName == NULL) {
7578 			if (layer == 0) {
7579 				status = B_NO_MEMORY;
7580 				goto err1;
7581 			}
7582 
7583 			break;
7584 		}
7585 		MemoryDeleter layerFSNameDeleter(layerFSName);
7586 
7587 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7588 		if (volume == NULL) {
7589 			status = B_NO_MEMORY;
7590 			goto err1;
7591 		}
7592 
7593 		volume->id = mount->id;
7594 		volume->partition = partition != NULL ? partition->ID() : -1;
7595 		volume->layer = layer++;
7596 		volume->private_volume = NULL;
7597 		volume->ops = NULL;
7598 		volume->sub_volume = NULL;
7599 		volume->super_volume = NULL;
7600 		volume->file_system = NULL;
7601 		volume->file_system_name = NULL;
7602 
7603 		volume->file_system_name = get_file_system_name(layerFSName);
7604 		if (volume->file_system_name == NULL) {
7605 			status = B_NO_MEMORY;
7606 			free(volume);
7607 			goto err1;
7608 		}
7609 
7610 		volume->file_system = get_file_system(layerFSName);
7611 		if (volume->file_system == NULL) {
7612 			status = B_DEVICE_NOT_FOUND;
7613 			free(volume->file_system_name);
7614 			free(volume);
7615 			goto err1;
7616 		}
7617 
7618 		if (mount->volume == NULL)
7619 			mount->volume = volume;
7620 		else {
7621 			volume->super_volume = mount->volume;
7622 			mount->volume->sub_volume = volume;
7623 			mount->volume = volume;
7624 		}
7625 	}
7626 
7627 	// insert mount struct into list before we call FS's mount() function
7628 	// so that vnodes can be created for this mount
7629 	rw_lock_write_lock(&sMountLock);
7630 	sMountsTable->Insert(mount);
7631 	rw_lock_write_unlock(&sMountLock);
7632 
7633 	ino_t rootID;
7634 
7635 	if (!sRoot) {
7636 		// we haven't mounted anything yet
7637 		if (strcmp(path, "/") != 0) {
7638 			status = B_ERROR;
7639 			goto err2;
7640 		}
7641 
7642 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7643 			args, &rootID);
7644 		if (status != B_OK || mount->volume->ops == NULL)
7645 			goto err2;
7646 	} else {
7647 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7648 		if (status != B_OK)
7649 			goto err2;
7650 
7651 		mount->covers_vnode = coveredNode;
7652 
7653 		// make sure covered_vnode is a directory
7654 		if (!S_ISDIR(coveredNode->Type())) {
7655 			status = B_NOT_A_DIRECTORY;
7656 			goto err3;
7657 		}
7658 
7659 		if (coveredNode->IsCovered()) {
7660 			// this is already a covered vnode
7661 			status = B_BUSY;
7662 			goto err3;
7663 		}
7664 
7665 		// mount it/them
7666 		fs_volume* volume = mount->volume;
7667 		while (volume) {
7668 			status = volume->file_system->mount(volume, device, flags, args,
7669 				&rootID);
7670 			if (status != B_OK || volume->ops == NULL) {
7671 				if (status == B_OK && volume->ops == NULL)
7672 					panic("fs_mount: mount() succeeded but ops is NULL!");
7673 				if (volume->sub_volume)
7674 					goto err4;
7675 				goto err3;
7676 			}
7677 
7678 			volume = volume->super_volume;
7679 		}
7680 
7681 		volume = mount->volume;
7682 		while (volume) {
7683 			if (volume->ops->all_layers_mounted != NULL)
7684 				volume->ops->all_layers_mounted(volume);
7685 			volume = volume->super_volume;
7686 		}
7687 	}
7688 
7689 	// the root node is supposed to be owned by the file system - it must
7690 	// exist at this point
7691 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7692 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7693 		panic("fs_mount: file system does not own its root node!\n");
7694 		status = B_ERROR;
7695 		goto err4;
7696 	}
7697 
7698 	// set up the links between the root vnode and the vnode it covers
7699 	rw_lock_write_lock(&sVnodeLock);
7700 	if (coveredNode != NULL) {
7701 		if (coveredNode->IsCovered()) {
7702 			// the vnode is covered now
7703 			status = B_BUSY;
7704 			rw_lock_write_unlock(&sVnodeLock);
7705 			goto err4;
7706 		}
7707 
7708 		mount->root_vnode->covers = coveredNode;
7709 		mount->root_vnode->SetCovering(true);
7710 
7711 		coveredNode->covered_by = mount->root_vnode;
7712 		coveredNode->SetCovered(true);
7713 	}
7714 	rw_lock_write_unlock(&sVnodeLock);
7715 
7716 	if (!sRoot) {
7717 		sRoot = mount->root_vnode;
7718 		mutex_lock(&sIOContextRootLock);
7719 		get_current_io_context(true)->root = sRoot;
7720 		mutex_unlock(&sIOContextRootLock);
7721 		inc_vnode_ref_count(sRoot);
7722 	}
7723 
7724 	// supply the partition (if any) with the mount cookie and mark it mounted
7725 	if (partition) {
7726 		partition->SetMountCookie(mount->volume->private_volume);
7727 		partition->SetVolumeID(mount->id);
7728 
7729 		// keep a partition reference as long as the partition is mounted
7730 		partitionRegistrar.Detach();
7731 		mount->partition = partition;
7732 		mount->owns_file_device = newlyCreatedFileDevice;
7733 		fileDeviceDeleter.id = -1;
7734 	}
7735 
7736 	notify_mount(mount->id,
7737 		coveredNode != NULL ? coveredNode->device : -1,
7738 		coveredNode ? coveredNode->id : -1);
7739 
7740 	return mount->id;
7741 
7742 err4:
7743 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7744 err3:
7745 	if (coveredNode != NULL)
7746 		put_vnode(coveredNode);
7747 err2:
7748 	rw_lock_write_lock(&sMountLock);
7749 	sMountsTable->Remove(mount);
7750 	rw_lock_write_unlock(&sMountLock);
7751 err1:
7752 	delete mount;
7753 
7754 	return status;
7755 }
7756 
7757 
7758 static status_t
7759 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7760 {
7761 	struct fs_mount* mount;
7762 	status_t err;
7763 
7764 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7765 		mountID, kernel));
7766 
7767 	struct vnode* pathVnode = NULL;
7768 	if (path != NULL) {
7769 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7770 		if (err != B_OK)
7771 			return B_ENTRY_NOT_FOUND;
7772 	}
7773 
7774 	RecursiveLocker mountOpLocker(sMountOpLock);
7775 	ReadLocker mountLocker(sMountLock);
7776 
7777 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7778 	if (mount == NULL) {
7779 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7780 			pathVnode);
7781 	}
7782 
7783 	mountLocker.Unlock();
7784 
7785 	if (path != NULL) {
7786 		put_vnode(pathVnode);
7787 
7788 		if (mount->root_vnode != pathVnode) {
7789 			// not mountpoint
7790 			return B_BAD_VALUE;
7791 		}
7792 	}
7793 
7794 	// if the volume is associated with a partition, lock the device of the
7795 	// partition as long as we are unmounting
7796 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7797 	KPartition* partition = mount->partition;
7798 	KDiskDevice* diskDevice = NULL;
7799 	if (partition != NULL) {
7800 		if (partition->Device() == NULL) {
7801 			dprintf("fs_unmount(): There is no device!\n");
7802 			return B_ERROR;
7803 		}
7804 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7805 		if (!diskDevice) {
7806 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7807 			return B_ERROR;
7808 		}
7809 	}
7810 	DeviceWriteLocker writeLocker(diskDevice, true);
7811 
7812 	// make sure, that the partition is not busy
7813 	if (partition != NULL) {
7814 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7815 			TRACE(("fs_unmount(): Partition is busy.\n"));
7816 			return B_BUSY;
7817 		}
7818 	}
7819 
7820 	// grab the vnode master mutex to keep someone from creating
7821 	// a vnode while we're figuring out if we can continue
7822 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7823 
7824 	bool disconnectedDescriptors = false;
7825 
7826 	while (true) {
7827 		bool busy = false;
7828 
7829 		// cycle through the list of vnodes associated with this mount and
7830 		// make sure all of them are not busy or have refs on them
7831 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7832 		while (struct vnode* vnode = iterator.Next()) {
7833 			if (vnode->IsBusy()) {
7834 				busy = true;
7835 				break;
7836 			}
7837 
7838 			// check the vnode's ref count -- subtract additional references for
7839 			// covering
7840 			int32 refCount = vnode->ref_count;
7841 			if (vnode->covers != NULL)
7842 				refCount--;
7843 			if (vnode->covered_by != NULL)
7844 				refCount--;
7845 
7846 			if (refCount != 0) {
7847 				// there are still vnodes in use on this mount, so we cannot
7848 				// unmount yet
7849 				busy = true;
7850 				break;
7851 			}
7852 		}
7853 
7854 		if (!busy)
7855 			break;
7856 
7857 		if ((flags & B_FORCE_UNMOUNT) == 0)
7858 			return B_BUSY;
7859 
7860 		if (disconnectedDescriptors) {
7861 			// wait a bit until the last access is finished, and then try again
7862 			vnodesWriteLocker.Unlock();
7863 			snooze(100000);
7864 			// TODO: if there is some kind of bug that prevents the ref counts
7865 			// from getting back to zero, this will fall into an endless loop...
7866 			vnodesWriteLocker.Lock();
7867 			continue;
7868 		}
7869 
7870 		// the file system is still busy - but we're forced to unmount it,
7871 		// so let's disconnect all open file descriptors
7872 
7873 		mount->unmounting = true;
7874 			// prevent new vnodes from being created
7875 
7876 		vnodesWriteLocker.Unlock();
7877 
7878 		disconnect_mount_or_vnode_fds(mount, NULL);
7879 		disconnectedDescriptors = true;
7880 
7881 		vnodesWriteLocker.Lock();
7882 	}
7883 
7884 	// We can safely continue. Mark all of the vnodes busy and this mount
7885 	// structure in unmounting state. Also undo the vnode covers/covered_by
7886 	// links.
7887 	mount->unmounting = true;
7888 
7889 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7890 	while (struct vnode* vnode = iterator.Next()) {
7891 		// Remove all covers/covered_by links from other mounts' nodes to this
7892 		// vnode and adjust the node ref count accordingly. We will release the
7893 		// references to the external vnodes below.
7894 		if (Vnode* coveredNode = vnode->covers) {
7895 			if (Vnode* coveringNode = vnode->covered_by) {
7896 				// We have both covered and covering vnodes, so just remove us
7897 				// from the chain.
7898 				coveredNode->covered_by = coveringNode;
7899 				coveringNode->covers = coveredNode;
7900 				vnode->ref_count -= 2;
7901 
7902 				vnode->covered_by = NULL;
7903 				vnode->covers = NULL;
7904 				vnode->SetCovering(false);
7905 				vnode->SetCovered(false);
7906 			} else {
7907 				// We only have a covered vnode. Remove its link to us.
7908 				coveredNode->covered_by = NULL;
7909 				coveredNode->SetCovered(false);
7910 				vnode->ref_count--;
7911 
7912 				// If the other node is an external vnode, we keep its link
7913 				// link around so we can put the reference later on. Otherwise
7914 				// we get rid of it right now.
7915 				if (coveredNode->mount == mount) {
7916 					vnode->covers = NULL;
7917 					coveredNode->ref_count--;
7918 				}
7919 			}
7920 		} else if (Vnode* coveringNode = vnode->covered_by) {
7921 			// We only have a covering vnode. Remove its link to us.
7922 			coveringNode->covers = NULL;
7923 			coveringNode->SetCovering(false);
7924 			vnode->ref_count--;
7925 
7926 			// If the other node is an external vnode, we keep its link
7927 			// link around so we can put the reference later on. Otherwise
7928 			// we get rid of it right now.
7929 			if (coveringNode->mount == mount) {
7930 				vnode->covered_by = NULL;
7931 				coveringNode->ref_count--;
7932 			}
7933 		}
7934 
7935 		vnode->SetBusy(true);
7936 		vnode_to_be_freed(vnode);
7937 	}
7938 
7939 	vnodesWriteLocker.Unlock();
7940 
7941 	// Free all vnodes associated with this mount.
7942 	// They will be removed from the mount list by free_vnode(), so
7943 	// we don't have to do this.
7944 	while (struct vnode* vnode = mount->vnodes.Head()) {
7945 		// Put the references to external covered/covering vnodes we kept above.
7946 		if (Vnode* coveredNode = vnode->covers)
7947 			put_vnode(coveredNode);
7948 		if (Vnode* coveringNode = vnode->covered_by)
7949 			put_vnode(coveringNode);
7950 
7951 		free_vnode(vnode, false);
7952 	}
7953 
7954 	// remove the mount structure from the hash table
7955 	rw_lock_write_lock(&sMountLock);
7956 	sMountsTable->Remove(mount);
7957 	rw_lock_write_unlock(&sMountLock);
7958 
7959 	mountOpLocker.Unlock();
7960 
7961 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7962 	notify_unmount(mount->id);
7963 
7964 	// dereference the partition and mark it unmounted
7965 	if (partition) {
7966 		partition->SetVolumeID(-1);
7967 		partition->SetMountCookie(NULL);
7968 
7969 		if (mount->owns_file_device)
7970 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7971 		partition->Unregister();
7972 	}
7973 
7974 	delete mount;
7975 	return B_OK;
7976 }
7977 
7978 
7979 static status_t
7980 fs_sync(dev_t device)
7981 {
7982 	struct fs_mount* mount;
7983 	status_t status = get_mount(device, &mount);
7984 	if (status != B_OK)
7985 		return status;
7986 
7987 	struct vnode marker;
7988 	memset(&marker, 0, sizeof(marker));
7989 	marker.SetBusy(true);
7990 	marker.SetRemoved(true);
7991 
7992 	// First, synchronize all file caches
7993 
7994 	while (true) {
7995 		WriteLocker locker(sVnodeLock);
7996 			// Note: That's the easy way. Which is probably OK for sync(),
7997 			// since it's a relatively rare call and doesn't need to allow for
7998 			// a lot of concurrency. Using a read lock would be possible, but
7999 			// also more involved, since we had to lock the individual nodes
8000 			// and take care of the locking order, which we might not want to
8001 			// do while holding fs_mount::lock.
8002 
8003 		// synchronize access to vnode list
8004 		mutex_lock(&mount->lock);
8005 
8006 		struct vnode* vnode;
8007 		if (!marker.IsRemoved()) {
8008 			vnode = mount->vnodes.GetNext(&marker);
8009 			mount->vnodes.Remove(&marker);
8010 			marker.SetRemoved(true);
8011 		} else
8012 			vnode = mount->vnodes.First();
8013 
8014 		while (vnode != NULL && (vnode->cache == NULL
8015 			|| vnode->IsRemoved() || vnode->IsBusy())) {
8016 			// TODO: we could track writes (and writable mapped vnodes)
8017 			//	and have a simple flag that we could test for here
8018 			vnode = mount->vnodes.GetNext(vnode);
8019 		}
8020 
8021 		if (vnode != NULL) {
8022 			// insert marker vnode again
8023 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
8024 			marker.SetRemoved(false);
8025 		}
8026 
8027 		mutex_unlock(&mount->lock);
8028 
8029 		if (vnode == NULL)
8030 			break;
8031 
8032 		vnode = lookup_vnode(mount->id, vnode->id);
8033 		if (vnode == NULL || vnode->IsBusy())
8034 			continue;
8035 
8036 		if (vnode->ref_count == 0) {
8037 			// this vnode has been unused before
8038 			vnode_used(vnode);
8039 		}
8040 		inc_vnode_ref_count(vnode);
8041 
8042 		locker.Unlock();
8043 
8044 		if (vnode->cache != NULL && !vnode->IsRemoved())
8045 			vnode->cache->WriteModified();
8046 
8047 		put_vnode(vnode);
8048 	}
8049 
8050 	// Let the file systems do their synchronizing work
8051 	if (HAS_FS_MOUNT_CALL(mount, sync))
8052 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8053 
8054 	// Finally, flush the underlying device's write cache (if possible.)
8055 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8056 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8057 
8058 	put_mount(mount);
8059 	return status;
8060 }
8061 
8062 
8063 static status_t
8064 fs_read_info(dev_t device, struct fs_info* info)
8065 {
8066 	struct fs_mount* mount;
8067 	status_t status = get_mount(device, &mount);
8068 	if (status != B_OK)
8069 		return status;
8070 
8071 	memset(info, 0, sizeof(struct fs_info));
8072 
8073 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8074 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8075 
8076 	// fill in info the file system doesn't (have to) know about
8077 	if (status == B_OK) {
8078 		info->dev = mount->id;
8079 		info->root = mount->root_vnode->id;
8080 
8081 		fs_volume* volume = mount->volume;
8082 		while (volume->super_volume != NULL)
8083 			volume = volume->super_volume;
8084 
8085 		strlcpy(info->fsh_name, volume->file_system_name,
8086 			sizeof(info->fsh_name));
8087 		if (mount->device_name != NULL) {
8088 			strlcpy(info->device_name, mount->device_name,
8089 				sizeof(info->device_name));
8090 		}
8091 	}
8092 
8093 	// if the call is not supported by the file system, there are still
8094 	// the parts that we filled out ourselves
8095 
8096 	put_mount(mount);
8097 	return status;
8098 }
8099 
8100 
8101 static status_t
8102 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8103 {
8104 	struct fs_mount* mount;
8105 	status_t status = get_mount(device, &mount);
8106 	if (status != B_OK)
8107 		return status;
8108 
8109 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8110 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8111 	else
8112 		status = B_READ_ONLY_DEVICE;
8113 
8114 	put_mount(mount);
8115 	return status;
8116 }
8117 
8118 
8119 static dev_t
8120 fs_next_device(int32* _cookie)
8121 {
8122 	struct fs_mount* mount = NULL;
8123 	dev_t device = *_cookie;
8124 
8125 	rw_lock_read_lock(&sMountLock);
8126 
8127 	// Since device IDs are assigned sequentially, this algorithm
8128 	// does work good enough. It makes sure that the device list
8129 	// returned is sorted, and that no device is skipped when an
8130 	// already visited device got unmounted.
8131 
8132 	while (device < sNextMountID) {
8133 		mount = find_mount(device++);
8134 		if (mount != NULL && mount->volume->private_volume != NULL)
8135 			break;
8136 	}
8137 
8138 	*_cookie = device;
8139 
8140 	if (mount != NULL)
8141 		device = mount->id;
8142 	else
8143 		device = B_BAD_VALUE;
8144 
8145 	rw_lock_read_unlock(&sMountLock);
8146 
8147 	return device;
8148 }
8149 
8150 
8151 ssize_t
8152 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8153 	void *buffer, size_t readBytes)
8154 {
8155 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8156 	if (attrFD < 0)
8157 		return attrFD;
8158 
8159 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8160 
8161 	_kern_close(attrFD);
8162 
8163 	return bytesRead;
8164 }
8165 
8166 
8167 static status_t
8168 get_cwd(char* buffer, size_t size, bool kernel)
8169 {
8170 	// Get current working directory from io context
8171 	struct io_context* context = get_current_io_context(kernel);
8172 	status_t status;
8173 
8174 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8175 
8176 	mutex_lock(&context->io_mutex);
8177 
8178 	struct vnode* vnode = context->cwd;
8179 	if (vnode)
8180 		inc_vnode_ref_count(vnode);
8181 
8182 	mutex_unlock(&context->io_mutex);
8183 
8184 	if (vnode) {
8185 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8186 		put_vnode(vnode);
8187 	} else
8188 		status = B_ERROR;
8189 
8190 	return status;
8191 }
8192 
8193 
8194 static status_t
8195 set_cwd(int fd, char* path, bool kernel)
8196 {
8197 	struct io_context* context;
8198 	struct vnode* vnode = NULL;
8199 	struct vnode* oldDirectory;
8200 	status_t status;
8201 
8202 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8203 
8204 	// Get vnode for passed path, and bail if it failed
8205 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8206 	if (status < 0)
8207 		return status;
8208 
8209 	if (!S_ISDIR(vnode->Type())) {
8210 		// nope, can't cwd to here
8211 		status = B_NOT_A_DIRECTORY;
8212 		goto err;
8213 	}
8214 
8215 	// We need to have the permission to enter the directory, too
8216 	if (HAS_FS_CALL(vnode, access)) {
8217 		status = FS_CALL(vnode, access, X_OK);
8218 		if (status != B_OK)
8219 			goto err;
8220 	}
8221 
8222 	// Get current io context and lock
8223 	context = get_current_io_context(kernel);
8224 	mutex_lock(&context->io_mutex);
8225 
8226 	// save the old current working directory first
8227 	oldDirectory = context->cwd;
8228 	context->cwd = vnode;
8229 
8230 	mutex_unlock(&context->io_mutex);
8231 
8232 	if (oldDirectory)
8233 		put_vnode(oldDirectory);
8234 
8235 	return B_NO_ERROR;
8236 
8237 err:
8238 	put_vnode(vnode);
8239 	return status;
8240 }
8241 
8242 
8243 static status_t
8244 user_copy_name(char* to, const char* from, size_t length)
8245 {
8246 	ssize_t len = user_strlcpy(to, from, length);
8247 	if (len < 0)
8248 		return len;
8249 	if (len >= (ssize_t)length)
8250 		return B_NAME_TOO_LONG;
8251 	return B_OK;
8252 }
8253 
8254 
8255 //	#pragma mark - kernel mirrored syscalls
8256 
8257 
8258 dev_t
8259 _kern_mount(const char* path, const char* device, const char* fsName,
8260 	uint32 flags, const char* args, size_t argsLength)
8261 {
8262 	KPath pathBuffer(path);
8263 	if (pathBuffer.InitCheck() != B_OK)
8264 		return B_NO_MEMORY;
8265 
8266 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8267 }
8268 
8269 
8270 status_t
8271 _kern_unmount(const char* path, uint32 flags)
8272 {
8273 	KPath pathBuffer(path);
8274 	if (pathBuffer.InitCheck() != B_OK)
8275 		return B_NO_MEMORY;
8276 
8277 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8278 }
8279 
8280 
8281 status_t
8282 _kern_read_fs_info(dev_t device, struct fs_info* info)
8283 {
8284 	if (info == NULL)
8285 		return B_BAD_VALUE;
8286 
8287 	return fs_read_info(device, info);
8288 }
8289 
8290 
8291 status_t
8292 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8293 {
8294 	if (info == NULL)
8295 		return B_BAD_VALUE;
8296 
8297 	return fs_write_info(device, info, mask);
8298 }
8299 
8300 
8301 status_t
8302 _kern_sync(void)
8303 {
8304 	// Note: _kern_sync() is also called from _user_sync()
8305 	int32 cookie = 0;
8306 	dev_t device;
8307 	while ((device = next_dev(&cookie)) >= 0) {
8308 		status_t status = fs_sync(device);
8309 		if (status != B_OK && status != B_BAD_VALUE) {
8310 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8311 				strerror(status));
8312 		}
8313 	}
8314 
8315 	return B_OK;
8316 }
8317 
8318 
8319 dev_t
8320 _kern_next_device(int32* _cookie)
8321 {
8322 	return fs_next_device(_cookie);
8323 }
8324 
8325 
8326 status_t
8327 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8328 	size_t infoSize)
8329 {
8330 	if (infoSize != sizeof(fd_info))
8331 		return B_BAD_VALUE;
8332 
8333 	// get the team
8334 	Team* team = Team::Get(teamID);
8335 	if (team == NULL)
8336 		return B_BAD_TEAM_ID;
8337 	BReference<Team> teamReference(team, true);
8338 
8339 	// now that we have a team reference, its I/O context won't go away
8340 	io_context* context = team->io_context;
8341 	MutexLocker contextLocker(context->io_mutex);
8342 
8343 	uint32 slot = *_cookie;
8344 
8345 	struct file_descriptor* descriptor;
8346 	while (slot < context->table_size
8347 		&& (descriptor = context->fds[slot]) == NULL) {
8348 		slot++;
8349 	}
8350 
8351 	if (slot >= context->table_size)
8352 		return B_ENTRY_NOT_FOUND;
8353 
8354 	info->number = slot;
8355 	info->open_mode = descriptor->open_mode;
8356 
8357 	struct vnode* vnode = fd_vnode(descriptor);
8358 	if (vnode != NULL) {
8359 		info->device = vnode->device;
8360 		info->node = vnode->id;
8361 	} else if (descriptor->u.mount != NULL) {
8362 		info->device = descriptor->u.mount->id;
8363 		info->node = -1;
8364 	}
8365 
8366 	*_cookie = slot + 1;
8367 	return B_OK;
8368 }
8369 
8370 
8371 int
8372 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8373 	int perms)
8374 {
8375 	if ((openMode & O_CREAT) != 0) {
8376 		return file_create_entry_ref(device, inode, name, openMode, perms,
8377 			true);
8378 	}
8379 
8380 	return file_open_entry_ref(device, inode, name, openMode, true);
8381 }
8382 
8383 
8384 /*!	\brief Opens a node specified by a FD + path pair.
8385 
8386 	At least one of \a fd and \a path must be specified.
8387 	If only \a fd is given, the function opens the node identified by this
8388 	FD. If only a path is given, this path is opened. If both are given and
8389 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8390 	of the directory (!) identified by \a fd.
8391 
8392 	\param fd The FD. May be < 0.
8393 	\param path The absolute or relative path. May be \c NULL.
8394 	\param openMode The open mode.
8395 	\return A FD referring to the newly opened node, or an error code,
8396 			if an error occurs.
8397 */
8398 int
8399 _kern_open(int fd, const char* path, int openMode, int perms)
8400 {
8401 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8402 	if (pathBuffer.InitCheck() != B_OK)
8403 		return B_NO_MEMORY;
8404 
8405 	if ((openMode & O_CREAT) != 0)
8406 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8407 
8408 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8409 }
8410 
8411 
8412 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8413 
8414 	The supplied name may be \c NULL, in which case directory identified
8415 	by \a device and \a inode will be opened. Otherwise \a device and
8416 	\a inode identify the parent directory of the directory to be opened
8417 	and \a name its entry name.
8418 
8419 	\param device If \a name is specified the ID of the device the parent
8420 		   directory of the directory to be opened resides on, otherwise
8421 		   the device of the directory itself.
8422 	\param inode If \a name is specified the node ID of the parent
8423 		   directory of the directory to be opened, otherwise node ID of the
8424 		   directory itself.
8425 	\param name The entry name of the directory to be opened. If \c NULL,
8426 		   the \a device + \a inode pair identify the node to be opened.
8427 	\return The FD of the newly opened directory or an error code, if
8428 			something went wrong.
8429 */
8430 int
8431 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8432 {
8433 	return dir_open_entry_ref(device, inode, name, true);
8434 }
8435 
8436 
8437 /*!	\brief Opens a directory specified by a FD + path pair.
8438 
8439 	At least one of \a fd and \a path must be specified.
8440 	If only \a fd is given, the function opens the directory identified by this
8441 	FD. If only a path is given, this path is opened. If both are given and
8442 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8443 	of the directory (!) identified by \a fd.
8444 
8445 	\param fd The FD. May be < 0.
8446 	\param path The absolute or relative path. May be \c NULL.
8447 	\return A FD referring to the newly opened directory, or an error code,
8448 			if an error occurs.
8449 */
8450 int
8451 _kern_open_dir(int fd, const char* path)
8452 {
8453 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8454 	if (pathBuffer.InitCheck() != B_OK)
8455 		return B_NO_MEMORY;
8456 
8457 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8458 }
8459 
8460 
8461 status_t
8462 _kern_fcntl(int fd, int op, size_t argument)
8463 {
8464 	return common_fcntl(fd, op, argument, true);
8465 }
8466 
8467 
8468 status_t
8469 _kern_fsync(int fd)
8470 {
8471 	return common_sync(fd, true);
8472 }
8473 
8474 
8475 status_t
8476 _kern_lock_node(int fd)
8477 {
8478 	return common_lock_node(fd, true);
8479 }
8480 
8481 
8482 status_t
8483 _kern_unlock_node(int fd)
8484 {
8485 	return common_unlock_node(fd, true);
8486 }
8487 
8488 
8489 status_t
8490 _kern_preallocate(int fd, off_t offset, off_t length)
8491 {
8492 	return common_preallocate(fd, offset, length, true);
8493 }
8494 
8495 
8496 status_t
8497 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8498 	int perms)
8499 {
8500 	return dir_create_entry_ref(device, inode, name, perms, true);
8501 }
8502 
8503 
8504 /*!	\brief Creates a directory specified by a FD + path pair.
8505 
8506 	\a path must always be specified (it contains the name of the new directory
8507 	at least). If only a path is given, this path identifies the location at
8508 	which the directory shall be created. If both \a fd and \a path are given
8509 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8510 	of the directory (!) identified by \a fd.
8511 
8512 	\param fd The FD. May be < 0.
8513 	\param path The absolute or relative path. Must not be \c NULL.
8514 	\param perms The access permissions the new directory shall have.
8515 	\return \c B_OK, if the directory has been created successfully, another
8516 			error code otherwise.
8517 */
8518 status_t
8519 _kern_create_dir(int fd, const char* path, int perms)
8520 {
8521 	KPath pathBuffer(path, KPath::DEFAULT);
8522 	if (pathBuffer.InitCheck() != B_OK)
8523 		return B_NO_MEMORY;
8524 
8525 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8526 }
8527 
8528 
8529 status_t
8530 _kern_remove_dir(int fd, const char* path)
8531 {
8532 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8533 	if (pathBuffer.InitCheck() != B_OK)
8534 		return B_NO_MEMORY;
8535 
8536 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8537 }
8538 
8539 
8540 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8541 
8542 	At least one of \a fd and \a path must be specified.
8543 	If only \a fd is given, the function the symlink to be read is the node
8544 	identified by this FD. If only a path is given, this path identifies the
8545 	symlink to be read. If both are given and the path is absolute, \a fd is
8546 	ignored; a relative path is reckoned off of the directory (!) identified
8547 	by \a fd.
8548 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8549 	will still be updated to reflect the required buffer size.
8550 
8551 	\param fd The FD. May be < 0.
8552 	\param path The absolute or relative path. May be \c NULL.
8553 	\param buffer The buffer into which the contents of the symlink shall be
8554 		   written.
8555 	\param _bufferSize A pointer to the size of the supplied buffer.
8556 	\return The length of the link on success or an appropriate error code
8557 */
8558 status_t
8559 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8560 {
8561 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8562 	if (pathBuffer.InitCheck() != B_OK)
8563 		return B_NO_MEMORY;
8564 
8565 	return common_read_link(fd, pathBuffer.LockBuffer(),
8566 		buffer, _bufferSize, true);
8567 }
8568 
8569 
8570 /*!	\brief Creates a symlink specified by a FD + path pair.
8571 
8572 	\a path must always be specified (it contains the name of the new symlink
8573 	at least). If only a path is given, this path identifies the location at
8574 	which the symlink shall be created. If both \a fd and \a path are given and
8575 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8576 	of the directory (!) identified by \a fd.
8577 
8578 	\param fd The FD. May be < 0.
8579 	\param toPath The absolute or relative path. Must not be \c NULL.
8580 	\param mode The access permissions the new symlink shall have.
8581 	\return \c B_OK, if the symlink has been created successfully, another
8582 			error code otherwise.
8583 */
8584 status_t
8585 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8586 {
8587 	KPath pathBuffer(path);
8588 	if (pathBuffer.InitCheck() != B_OK)
8589 		return B_NO_MEMORY;
8590 
8591 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8592 		toPath, mode, true);
8593 }
8594 
8595 
8596 status_t
8597 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8598 	bool traverseLeafLink)
8599 {
8600 	KPath pathBuffer(path);
8601 	KPath toPathBuffer(toPath);
8602 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8603 		return B_NO_MEMORY;
8604 
8605 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8606 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8607 }
8608 
8609 
8610 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8611 
8612 	\a path must always be specified (it contains at least the name of the entry
8613 	to be deleted). If only a path is given, this path identifies the entry
8614 	directly. If both \a fd and \a path are given and the path is absolute,
8615 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8616 	identified by \a fd.
8617 
8618 	\param fd The FD. May be < 0.
8619 	\param path The absolute or relative path. Must not be \c NULL.
8620 	\return \c B_OK, if the entry has been removed successfully, another
8621 			error code otherwise.
8622 */
8623 status_t
8624 _kern_unlink(int fd, const char* path)
8625 {
8626 	KPath pathBuffer(path);
8627 	if (pathBuffer.InitCheck() != B_OK)
8628 		return B_NO_MEMORY;
8629 
8630 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8631 }
8632 
8633 
8634 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8635 		   by another FD + path pair.
8636 
8637 	\a oldPath and \a newPath must always be specified (they contain at least
8638 	the name of the entry). If only a path is given, this path identifies the
8639 	entry directly. If both a FD and a path are given and the path is absolute,
8640 	the FD is ignored; a relative path is reckoned off of the directory (!)
8641 	identified by the respective FD.
8642 
8643 	\param oldFD The FD of the old location. May be < 0.
8644 	\param oldPath The absolute or relative path of the old location. Must not
8645 		   be \c NULL.
8646 	\param newFD The FD of the new location. May be < 0.
8647 	\param newPath The absolute or relative path of the new location. Must not
8648 		   be \c NULL.
8649 	\return \c B_OK, if the entry has been moved successfully, another
8650 			error code otherwise.
8651 */
8652 status_t
8653 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8654 {
8655 	KPath oldPathBuffer(oldPath);
8656 	KPath newPathBuffer(newPath);
8657 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8658 		return B_NO_MEMORY;
8659 
8660 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8661 		newFD, newPathBuffer.LockBuffer(), true);
8662 }
8663 
8664 
8665 status_t
8666 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8667 {
8668 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8669 	if (pathBuffer.InitCheck() != B_OK)
8670 		return B_NO_MEMORY;
8671 
8672 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8673 		true);
8674 }
8675 
8676 
8677 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8678 
8679 	If only \a fd is given, the stat operation associated with the type
8680 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8681 	given, this path identifies the entry for whose node to retrieve the
8682 	stat data. If both \a fd and \a path are given and the path is absolute,
8683 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8684 	identified by \a fd and specifies the entry whose stat data shall be
8685 	retrieved.
8686 
8687 	\param fd The FD. May be < 0.
8688 	\param path The absolute or relative path. Must not be \c NULL.
8689 	\param traverseLeafLink If \a path is given, \c true specifies that the
8690 		   function shall not stick to symlinks, but traverse them.
8691 	\param stat The buffer the stat data shall be written into.
8692 	\param statSize The size of the supplied stat buffer.
8693 	\return \c B_OK, if the the stat data have been read successfully, another
8694 			error code otherwise.
8695 */
8696 status_t
8697 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8698 	struct stat* stat, size_t statSize)
8699 {
8700 	struct stat completeStat;
8701 	struct stat* originalStat = NULL;
8702 	status_t status;
8703 
8704 	if (statSize > sizeof(struct stat))
8705 		return B_BAD_VALUE;
8706 
8707 	// this supports different stat extensions
8708 	if (statSize < sizeof(struct stat)) {
8709 		originalStat = stat;
8710 		stat = &completeStat;
8711 	}
8712 
8713 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8714 
8715 	if (status == B_OK && originalStat != NULL)
8716 		memcpy(originalStat, stat, statSize);
8717 
8718 	return status;
8719 }
8720 
8721 
8722 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8723 
8724 	If only \a fd is given, the stat operation associated with the type
8725 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8726 	given, this path identifies the entry for whose node to write the
8727 	stat data. If both \a fd and \a path are given and the path is absolute,
8728 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8729 	identified by \a fd and specifies the entry whose stat data shall be
8730 	written.
8731 
8732 	\param fd The FD. May be < 0.
8733 	\param path The absolute or relative path. May be \c NULL.
8734 	\param traverseLeafLink If \a path is given, \c true specifies that the
8735 		   function shall not stick to symlinks, but traverse them.
8736 	\param stat The buffer containing the stat data to be written.
8737 	\param statSize The size of the supplied stat buffer.
8738 	\param statMask A mask specifying which parts of the stat data shall be
8739 		   written.
8740 	\return \c B_OK, if the the stat data have been written successfully,
8741 			another error code otherwise.
8742 */
8743 status_t
8744 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8745 	const struct stat* stat, size_t statSize, int statMask)
8746 {
8747 	struct stat completeStat;
8748 
8749 	if (statSize > sizeof(struct stat))
8750 		return B_BAD_VALUE;
8751 
8752 	// this supports different stat extensions
8753 	if (statSize < sizeof(struct stat)) {
8754 		memset((uint8*)&completeStat + statSize, 0,
8755 			sizeof(struct stat) - statSize);
8756 		memcpy(&completeStat, stat, statSize);
8757 		stat = &completeStat;
8758 	}
8759 
8760 	status_t status;
8761 
8762 	if (path != NULL) {
8763 		// path given: write the stat of the node referred to by (fd, path)
8764 		KPath pathBuffer(path);
8765 		if (pathBuffer.InitCheck() != B_OK)
8766 			return B_NO_MEMORY;
8767 
8768 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8769 			traverseLeafLink, stat, statMask, true);
8770 	} else {
8771 		// no path given: get the FD and use the FD operation
8772 		struct file_descriptor* descriptor
8773 			= get_fd(get_current_io_context(true), fd);
8774 		if (descriptor == NULL)
8775 			return B_FILE_ERROR;
8776 
8777 		if (descriptor->ops->fd_write_stat)
8778 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8779 		else
8780 			status = B_UNSUPPORTED;
8781 
8782 		put_fd(descriptor);
8783 	}
8784 
8785 	return status;
8786 }
8787 
8788 
8789 int
8790 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8791 {
8792 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8793 	if (pathBuffer.InitCheck() != B_OK)
8794 		return B_NO_MEMORY;
8795 
8796 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8797 }
8798 
8799 
8800 int
8801 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8802 	int openMode)
8803 {
8804 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8805 	if (pathBuffer.InitCheck() != B_OK)
8806 		return B_NO_MEMORY;
8807 
8808 	if ((openMode & O_CREAT) != 0) {
8809 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8810 			true);
8811 	}
8812 
8813 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8814 }
8815 
8816 
8817 status_t
8818 _kern_remove_attr(int fd, const char* name)
8819 {
8820 	return attr_remove(fd, name, true);
8821 }
8822 
8823 
8824 status_t
8825 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8826 	const char* toName)
8827 {
8828 	return attr_rename(fromFile, fromName, toFile, toName, true);
8829 }
8830 
8831 
8832 int
8833 _kern_open_index_dir(dev_t device)
8834 {
8835 	return index_dir_open(device, true);
8836 }
8837 
8838 
8839 status_t
8840 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8841 {
8842 	return index_create(device, name, type, flags, true);
8843 }
8844 
8845 
8846 status_t
8847 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8848 {
8849 	return index_name_read_stat(device, name, stat, true);
8850 }
8851 
8852 
8853 status_t
8854 _kern_remove_index(dev_t device, const char* name)
8855 {
8856 	return index_remove(device, name, true);
8857 }
8858 
8859 
8860 status_t
8861 _kern_getcwd(char* buffer, size_t size)
8862 {
8863 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8864 
8865 	// Call vfs to get current working directory
8866 	return get_cwd(buffer, size, true);
8867 }
8868 
8869 
8870 status_t
8871 _kern_setcwd(int fd, const char* path)
8872 {
8873 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8874 	if (pathBuffer.InitCheck() != B_OK)
8875 		return B_NO_MEMORY;
8876 
8877 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8878 }
8879 
8880 
8881 //	#pragma mark - userland syscalls
8882 
8883 
8884 dev_t
8885 _user_mount(const char* userPath, const char* userDevice,
8886 	const char* userFileSystem, uint32 flags, const char* userArgs,
8887 	size_t argsLength)
8888 {
8889 	char fileSystem[B_FILE_NAME_LENGTH];
8890 	KPath path, device;
8891 	char* args = NULL;
8892 	status_t status;
8893 
8894 	if (!IS_USER_ADDRESS(userPath))
8895 		return B_BAD_ADDRESS;
8896 
8897 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8898 		return B_NO_MEMORY;
8899 
8900 	status = user_copy_name(path.LockBuffer(), userPath,
8901 		B_PATH_NAME_LENGTH);
8902 	if (status != B_OK)
8903 		return status;
8904 	path.UnlockBuffer();
8905 
8906 	if (userFileSystem != NULL) {
8907 		if (!IS_USER_ADDRESS(userFileSystem))
8908 			return B_BAD_ADDRESS;
8909 
8910 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8911 		if (status != B_OK)
8912 			return status;
8913 	}
8914 
8915 	if (userDevice != NULL) {
8916 		if (!IS_USER_ADDRESS(userDevice))
8917 			return B_BAD_ADDRESS;
8918 
8919 		status = user_copy_name(device.LockBuffer(), userDevice,
8920 			B_PATH_NAME_LENGTH);
8921 		if (status != B_OK)
8922 			return status;
8923 		device.UnlockBuffer();
8924 	}
8925 
8926 	if (userArgs != NULL && argsLength > 0) {
8927 		if (!IS_USER_ADDRESS(userArgs))
8928 			return B_BAD_ADDRESS;
8929 
8930 		// this is a safety restriction
8931 		if (argsLength >= 65536)
8932 			return B_NAME_TOO_LONG;
8933 
8934 		args = (char*)malloc(argsLength + 1);
8935 		if (args == NULL)
8936 			return B_NO_MEMORY;
8937 
8938 		status = user_copy_name(args, userArgs, argsLength + 1);
8939 		if (status != B_OK) {
8940 			free(args);
8941 			return status;
8942 		}
8943 	}
8944 
8945 	status = fs_mount(path.LockBuffer(),
8946 		userDevice != NULL ? device.Path() : NULL,
8947 		userFileSystem ? fileSystem : NULL, flags, args, false);
8948 
8949 	free(args);
8950 	return status;
8951 }
8952 
8953 
8954 status_t
8955 _user_unmount(const char* userPath, uint32 flags)
8956 {
8957 	if (!IS_USER_ADDRESS(userPath))
8958 		return B_BAD_ADDRESS;
8959 
8960 	KPath pathBuffer;
8961 	if (pathBuffer.InitCheck() != B_OK)
8962 		return B_NO_MEMORY;
8963 
8964 	char* path = pathBuffer.LockBuffer();
8965 
8966 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8967 	if (status != B_OK)
8968 		return status;
8969 
8970 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8971 }
8972 
8973 
8974 status_t
8975 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8976 {
8977 	struct fs_info info;
8978 	status_t status;
8979 
8980 	if (userInfo == NULL)
8981 		return B_BAD_VALUE;
8982 
8983 	if (!IS_USER_ADDRESS(userInfo))
8984 		return B_BAD_ADDRESS;
8985 
8986 	status = fs_read_info(device, &info);
8987 	if (status != B_OK)
8988 		return status;
8989 
8990 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8991 		return B_BAD_ADDRESS;
8992 
8993 	return B_OK;
8994 }
8995 
8996 
8997 status_t
8998 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8999 {
9000 	struct fs_info info;
9001 
9002 	if (userInfo == NULL)
9003 		return B_BAD_VALUE;
9004 
9005 	if (!IS_USER_ADDRESS(userInfo)
9006 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
9007 		return B_BAD_ADDRESS;
9008 
9009 	return fs_write_info(device, &info, mask);
9010 }
9011 
9012 
9013 dev_t
9014 _user_next_device(int32* _userCookie)
9015 {
9016 	int32 cookie;
9017 	dev_t device;
9018 
9019 	if (!IS_USER_ADDRESS(_userCookie)
9020 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
9021 		return B_BAD_ADDRESS;
9022 
9023 	device = fs_next_device(&cookie);
9024 
9025 	if (device >= B_OK) {
9026 		// update user cookie
9027 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
9028 			return B_BAD_ADDRESS;
9029 	}
9030 
9031 	return device;
9032 }
9033 
9034 
9035 status_t
9036 _user_sync(void)
9037 {
9038 	return _kern_sync();
9039 }
9040 
9041 
9042 status_t
9043 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
9044 	size_t infoSize)
9045 {
9046 	struct fd_info info;
9047 	uint32 cookie;
9048 
9049 	// only root can do this
9050 	if (geteuid() != 0)
9051 		return B_NOT_ALLOWED;
9052 
9053 	if (infoSize != sizeof(fd_info))
9054 		return B_BAD_VALUE;
9055 
9056 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9057 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9058 		return B_BAD_ADDRESS;
9059 
9060 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9061 	if (status != B_OK)
9062 		return status;
9063 
9064 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9065 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9066 		return B_BAD_ADDRESS;
9067 
9068 	return status;
9069 }
9070 
9071 
9072 status_t
9073 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9074 	char* userPath, size_t pathLength)
9075 {
9076 	if (!IS_USER_ADDRESS(userPath))
9077 		return B_BAD_ADDRESS;
9078 
9079 	KPath path;
9080 	if (path.InitCheck() != B_OK)
9081 		return B_NO_MEMORY;
9082 
9083 	// copy the leaf name onto the stack
9084 	char stackLeaf[B_FILE_NAME_LENGTH];
9085 	if (leaf != NULL) {
9086 		if (!IS_USER_ADDRESS(leaf))
9087 			return B_BAD_ADDRESS;
9088 
9089 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9090 		if (status != B_OK)
9091 			return status;
9092 
9093 		leaf = stackLeaf;
9094 	}
9095 
9096 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9097 		false, path.LockBuffer(), path.BufferSize());
9098 	if (status != B_OK)
9099 		return status;
9100 
9101 	path.UnlockBuffer();
9102 
9103 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9104 	if (length < 0)
9105 		return length;
9106 	if (length >= (int)pathLength)
9107 		return B_BUFFER_OVERFLOW;
9108 
9109 	return B_OK;
9110 }
9111 
9112 
9113 status_t
9114 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9115 {
9116 	if (userPath == NULL || buffer == NULL)
9117 		return B_BAD_VALUE;
9118 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9119 		return B_BAD_ADDRESS;
9120 
9121 	// copy path from userland
9122 	KPath pathBuffer;
9123 	if (pathBuffer.InitCheck() != B_OK)
9124 		return B_NO_MEMORY;
9125 	char* path = pathBuffer.LockBuffer();
9126 
9127 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9128 	if (status != B_OK)
9129 		return status;
9130 
9131 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9132 		false);
9133 	if (error != B_OK)
9134 		return error;
9135 
9136 	// copy back to userland
9137 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9138 	if (len < 0)
9139 		return len;
9140 	if (len >= B_PATH_NAME_LENGTH)
9141 		return B_BUFFER_OVERFLOW;
9142 
9143 	return B_OK;
9144 }
9145 
9146 
9147 int
9148 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9149 	int openMode, int perms)
9150 {
9151 	char name[B_FILE_NAME_LENGTH];
9152 
9153 	if (userName == NULL || device < 0 || inode < 0)
9154 		return B_BAD_VALUE;
9155 	if (!IS_USER_ADDRESS(userName))
9156 		return B_BAD_ADDRESS;
9157 	status_t status = user_copy_name(name, userName, sizeof(name));
9158 	if (status != B_OK)
9159 		return status;
9160 
9161 	if ((openMode & O_CREAT) != 0) {
9162 		return file_create_entry_ref(device, inode, name, openMode, perms,
9163 			false);
9164 	}
9165 
9166 	return file_open_entry_ref(device, inode, name, openMode, false);
9167 }
9168 
9169 
9170 int
9171 _user_open(int fd, const char* userPath, int openMode, int perms)
9172 {
9173 	KPath path;
9174 	if (path.InitCheck() != B_OK)
9175 		return B_NO_MEMORY;
9176 
9177 	char* buffer = path.LockBuffer();
9178 
9179 	if (!IS_USER_ADDRESS(userPath))
9180 		return B_BAD_ADDRESS;
9181 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9182 	if (status != B_OK)
9183 		return status;
9184 
9185 	if ((openMode & O_CREAT) != 0)
9186 		return file_create(fd, buffer, openMode, perms, false);
9187 
9188 	return file_open(fd, buffer, openMode, false);
9189 }
9190 
9191 
9192 int
9193 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9194 {
9195 	if (userName != NULL) {
9196 		char name[B_FILE_NAME_LENGTH];
9197 
9198 		if (!IS_USER_ADDRESS(userName))
9199 			return B_BAD_ADDRESS;
9200 		status_t status = user_copy_name(name, userName, sizeof(name));
9201 		if (status != B_OK)
9202 			return status;
9203 
9204 		return dir_open_entry_ref(device, inode, name, false);
9205 	}
9206 	return dir_open_entry_ref(device, inode, NULL, false);
9207 }
9208 
9209 
9210 int
9211 _user_open_dir(int fd, const char* userPath)
9212 {
9213 	if (userPath == NULL)
9214 		return dir_open(fd, NULL, false);
9215 
9216 	KPath path;
9217 	if (path.InitCheck() != B_OK)
9218 		return B_NO_MEMORY;
9219 
9220 	char* buffer = path.LockBuffer();
9221 
9222 	if (!IS_USER_ADDRESS(userPath))
9223 		return B_BAD_ADDRESS;
9224 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9225 	if (status != B_OK)
9226 		return status;
9227 
9228 	return dir_open(fd, buffer, false);
9229 }
9230 
9231 
9232 /*!	\brief Opens a directory's parent directory and returns the entry name
9233 		   of the former.
9234 
9235 	Aside from that it returns the directory's entry name, this method is
9236 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9237 	equivalent, if \a userName is \c NULL.
9238 
9239 	If a name buffer is supplied and the name does not fit the buffer, the
9240 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9241 
9242 	\param fd A FD referring to a directory.
9243 	\param userName Buffer the directory's entry name shall be written into.
9244 		   May be \c NULL.
9245 	\param nameLength Size of the name buffer.
9246 	\return The file descriptor of the opened parent directory, if everything
9247 			went fine, an error code otherwise.
9248 */
9249 int
9250 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9251 {
9252 	bool kernel = false;
9253 
9254 	if (userName && !IS_USER_ADDRESS(userName))
9255 		return B_BAD_ADDRESS;
9256 
9257 	// open the parent dir
9258 	int parentFD = dir_open(fd, (char*)"..", kernel);
9259 	if (parentFD < 0)
9260 		return parentFD;
9261 	FDCloser fdCloser(parentFD, kernel);
9262 
9263 	if (userName) {
9264 		// get the vnodes
9265 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9266 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9267 		VNodePutter parentVNodePutter(parentVNode);
9268 		VNodePutter dirVNodePutter(dirVNode);
9269 		if (!parentVNode || !dirVNode)
9270 			return B_FILE_ERROR;
9271 
9272 		// get the vnode name
9273 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9274 		struct dirent* buffer = (struct dirent*)_buffer;
9275 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9276 			sizeof(_buffer), get_current_io_context(false));
9277 		if (status != B_OK)
9278 			return status;
9279 
9280 		// copy the name to the userland buffer
9281 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9282 		if (len < 0)
9283 			return len;
9284 		if (len >= (int)nameLength)
9285 			return B_BUFFER_OVERFLOW;
9286 	}
9287 
9288 	return fdCloser.Detach();
9289 }
9290 
9291 
9292 status_t
9293 _user_fcntl(int fd, int op, size_t argument)
9294 {
9295 	status_t status = common_fcntl(fd, op, argument, false);
9296 	if (op == F_SETLKW)
9297 		syscall_restart_handle_post(status);
9298 
9299 	return status;
9300 }
9301 
9302 
9303 status_t
9304 _user_fsync(int fd)
9305 {
9306 	return common_sync(fd, false);
9307 }
9308 
9309 
9310 status_t
9311 _user_flock(int fd, int operation)
9312 {
9313 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9314 
9315 	// Check if the operation is valid
9316 	switch (operation & ~LOCK_NB) {
9317 		case LOCK_UN:
9318 		case LOCK_SH:
9319 		case LOCK_EX:
9320 			break;
9321 
9322 		default:
9323 			return B_BAD_VALUE;
9324 	}
9325 
9326 	struct file_descriptor* descriptor;
9327 	struct vnode* vnode;
9328 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9329 	if (descriptor == NULL)
9330 		return B_FILE_ERROR;
9331 
9332 	if (descriptor->type != FDTYPE_FILE) {
9333 		put_fd(descriptor);
9334 		return B_BAD_VALUE;
9335 	}
9336 
9337 	struct flock flock;
9338 	flock.l_start = 0;
9339 	flock.l_len = OFF_MAX;
9340 	flock.l_whence = 0;
9341 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9342 
9343 	status_t status;
9344 	if ((operation & LOCK_UN) != 0) {
9345 		if (HAS_FS_CALL(vnode, release_lock))
9346 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9347 		else
9348 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9349 	} else {
9350 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9351 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9352 				(operation & LOCK_NB) == 0);
9353 		} else {
9354 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9355 				(operation & LOCK_NB) == 0);
9356 		}
9357 	}
9358 
9359 	syscall_restart_handle_post(status);
9360 
9361 	put_fd(descriptor);
9362 	return status;
9363 }
9364 
9365 
9366 status_t
9367 _user_lock_node(int fd)
9368 {
9369 	return common_lock_node(fd, false);
9370 }
9371 
9372 
9373 status_t
9374 _user_unlock_node(int fd)
9375 {
9376 	return common_unlock_node(fd, false);
9377 }
9378 
9379 
9380 status_t
9381 _user_preallocate(int fd, off_t offset, off_t length)
9382 {
9383 	return common_preallocate(fd, offset, length, false);
9384 }
9385 
9386 
9387 status_t
9388 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9389 	int perms)
9390 {
9391 	char name[B_FILE_NAME_LENGTH];
9392 	status_t status;
9393 
9394 	if (!IS_USER_ADDRESS(userName))
9395 		return B_BAD_ADDRESS;
9396 
9397 	status = user_copy_name(name, userName, sizeof(name));
9398 	if (status != B_OK)
9399 		return status;
9400 
9401 	return dir_create_entry_ref(device, inode, name, perms, false);
9402 }
9403 
9404 
9405 status_t
9406 _user_create_dir(int fd, const char* userPath, int perms)
9407 {
9408 	KPath pathBuffer;
9409 	if (pathBuffer.InitCheck() != B_OK)
9410 		return B_NO_MEMORY;
9411 
9412 	char* path = pathBuffer.LockBuffer();
9413 
9414 	if (!IS_USER_ADDRESS(userPath))
9415 		return B_BAD_ADDRESS;
9416 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9417 	if (status != B_OK)
9418 		return status;
9419 
9420 	return dir_create(fd, path, perms, false);
9421 }
9422 
9423 
9424 status_t
9425 _user_remove_dir(int fd, const char* userPath)
9426 {
9427 	KPath pathBuffer;
9428 	if (pathBuffer.InitCheck() != B_OK)
9429 		return B_NO_MEMORY;
9430 
9431 	char* path = pathBuffer.LockBuffer();
9432 
9433 	if (userPath != NULL) {
9434 		if (!IS_USER_ADDRESS(userPath))
9435 			return B_BAD_ADDRESS;
9436 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9437 		if (status != B_OK)
9438 			return status;
9439 	}
9440 
9441 	return dir_remove(fd, userPath ? path : NULL, false);
9442 }
9443 
9444 
9445 status_t
9446 _user_read_link(int fd, const char* userPath, char* userBuffer,
9447 	size_t* userBufferSize)
9448 {
9449 	KPath pathBuffer, linkBuffer;
9450 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9451 		return B_NO_MEMORY;
9452 
9453 	size_t bufferSize;
9454 
9455 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9456 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9457 		return B_BAD_ADDRESS;
9458 
9459 	char* path = pathBuffer.LockBuffer();
9460 	char* buffer = linkBuffer.LockBuffer();
9461 
9462 	if (userPath) {
9463 		if (!IS_USER_ADDRESS(userPath))
9464 			return B_BAD_ADDRESS;
9465 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9466 		if (status != B_OK)
9467 			return status;
9468 
9469 		if (bufferSize > B_PATH_NAME_LENGTH)
9470 			bufferSize = B_PATH_NAME_LENGTH;
9471 	}
9472 
9473 	size_t newBufferSize = bufferSize;
9474 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9475 		&newBufferSize, false);
9476 
9477 	// we also update the bufferSize in case of errors
9478 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9479 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9480 		return B_BAD_ADDRESS;
9481 
9482 	if (status != B_OK)
9483 		return status;
9484 
9485 	bufferSize = min_c(newBufferSize, bufferSize);
9486 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9487 		return B_BAD_ADDRESS;
9488 
9489 	return B_OK;
9490 }
9491 
9492 
9493 status_t
9494 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9495 	int mode)
9496 {
9497 	KPath pathBuffer;
9498 	KPath toPathBuffer;
9499 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9500 		return B_NO_MEMORY;
9501 
9502 	char* path = pathBuffer.LockBuffer();
9503 	char* toPath = toPathBuffer.LockBuffer();
9504 
9505 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9506 		return B_BAD_ADDRESS;
9507 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9508 	if (status != B_OK)
9509 		return status;
9510 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9511 	if (status != B_OK)
9512 		return status;
9513 
9514 	return common_create_symlink(fd, path, toPath, mode, false);
9515 }
9516 
9517 
9518 status_t
9519 _user_create_link(int pathFD, const char* userPath, int toFD,
9520 	const char* userToPath, bool traverseLeafLink)
9521 {
9522 	KPath pathBuffer;
9523 	KPath toPathBuffer;
9524 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9525 		return B_NO_MEMORY;
9526 
9527 	char* path = pathBuffer.LockBuffer();
9528 	char* toPath = toPathBuffer.LockBuffer();
9529 
9530 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9531 		return B_BAD_ADDRESS;
9532 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9533 	if (status != B_OK)
9534 		return status;
9535 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9536 	if (status != B_OK)
9537 		return status;
9538 
9539 	status = check_path(toPath);
9540 	if (status != B_OK)
9541 		return status;
9542 
9543 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9544 		false);
9545 }
9546 
9547 
9548 status_t
9549 _user_unlink(int fd, const char* userPath)
9550 {
9551 	KPath pathBuffer;
9552 	if (pathBuffer.InitCheck() != B_OK)
9553 		return B_NO_MEMORY;
9554 
9555 	char* path = pathBuffer.LockBuffer();
9556 
9557 	if (!IS_USER_ADDRESS(userPath))
9558 		return B_BAD_ADDRESS;
9559 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9560 	if (status != B_OK)
9561 		return status;
9562 
9563 	return common_unlink(fd, path, false);
9564 }
9565 
9566 
9567 status_t
9568 _user_rename(int oldFD, const char* userOldPath, int newFD,
9569 	const char* userNewPath)
9570 {
9571 	KPath oldPathBuffer;
9572 	KPath newPathBuffer;
9573 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9574 		return B_NO_MEMORY;
9575 
9576 	char* oldPath = oldPathBuffer.LockBuffer();
9577 	char* newPath = newPathBuffer.LockBuffer();
9578 
9579 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9580 		return B_BAD_ADDRESS;
9581 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9582 	if (status != B_OK)
9583 		return status;
9584 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9585 	if (status != B_OK)
9586 		return status;
9587 
9588 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9589 }
9590 
9591 
9592 status_t
9593 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9594 {
9595 	KPath pathBuffer;
9596 	if (pathBuffer.InitCheck() != B_OK)
9597 		return B_NO_MEMORY;
9598 
9599 	char* path = pathBuffer.LockBuffer();
9600 
9601 	if (!IS_USER_ADDRESS(userPath))
9602 		return B_BAD_ADDRESS;
9603 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9604 	if (status != B_OK)
9605 		return status;
9606 
9607 	// split into directory vnode and filename path
9608 	char filename[B_FILE_NAME_LENGTH];
9609 	struct vnode* dir;
9610 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9611 	if (status != B_OK)
9612 		return status;
9613 
9614 	VNodePutter _(dir);
9615 
9616 	// the underlying FS needs to support creating FIFOs
9617 	if (!HAS_FS_CALL(dir, create_special_node))
9618 		return B_UNSUPPORTED;
9619 
9620 	// create the entry	-- the FIFO sub node is set up automatically
9621 	fs_vnode superVnode;
9622 	ino_t nodeID;
9623 	status = FS_CALL(dir, create_special_node, filename, NULL,
9624 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9625 
9626 	// create_special_node() acquired a reference for us that we don't need.
9627 	if (status == B_OK)
9628 		put_vnode(dir->mount->volume, nodeID);
9629 
9630 	return status;
9631 }
9632 
9633 
9634 status_t
9635 _user_create_pipe(int* userFDs)
9636 {
9637 	// rootfs should support creating FIFOs, but let's be sure
9638 	if (!HAS_FS_CALL(sRoot, create_special_node))
9639 		return B_UNSUPPORTED;
9640 
9641 	// create the node	-- the FIFO sub node is set up automatically
9642 	fs_vnode superVnode;
9643 	ino_t nodeID;
9644 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9645 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9646 	if (status != B_OK)
9647 		return status;
9648 
9649 	// We've got one reference to the node and need another one.
9650 	struct vnode* vnode;
9651 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9652 	if (status != B_OK) {
9653 		// that should not happen
9654 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9655 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9656 		return status;
9657 	}
9658 
9659 	// Everything looks good so far. Open two FDs for reading respectively
9660 	// writing.
9661 	int fds[2];
9662 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9663 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9664 
9665 	FDCloser closer0(fds[0], false);
9666 	FDCloser closer1(fds[1], false);
9667 
9668 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9669 
9670 	// copy FDs to userland
9671 	if (status == B_OK) {
9672 		if (!IS_USER_ADDRESS(userFDs)
9673 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9674 			status = B_BAD_ADDRESS;
9675 		}
9676 	}
9677 
9678 	// keep FDs, if everything went fine
9679 	if (status == B_OK) {
9680 		closer0.Detach();
9681 		closer1.Detach();
9682 	}
9683 
9684 	return status;
9685 }
9686 
9687 
9688 status_t
9689 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9690 {
9691 	KPath pathBuffer;
9692 	if (pathBuffer.InitCheck() != B_OK)
9693 		return B_NO_MEMORY;
9694 
9695 	char* path = pathBuffer.LockBuffer();
9696 
9697 	if (!IS_USER_ADDRESS(userPath))
9698 		return B_BAD_ADDRESS;
9699 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9700 	if (status != B_OK)
9701 		return status;
9702 
9703 	return common_access(fd, path, mode, effectiveUserGroup, false);
9704 }
9705 
9706 
9707 status_t
9708 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9709 	struct stat* userStat, size_t statSize)
9710 {
9711 	struct stat stat = {0};
9712 	status_t status;
9713 
9714 	if (statSize > sizeof(struct stat))
9715 		return B_BAD_VALUE;
9716 
9717 	if (!IS_USER_ADDRESS(userStat))
9718 		return B_BAD_ADDRESS;
9719 
9720 	if (userPath != NULL) {
9721 		// path given: get the stat of the node referred to by (fd, path)
9722 		if (!IS_USER_ADDRESS(userPath))
9723 			return B_BAD_ADDRESS;
9724 
9725 		KPath pathBuffer;
9726 		if (pathBuffer.InitCheck() != B_OK)
9727 			return B_NO_MEMORY;
9728 
9729 		char* path = pathBuffer.LockBuffer();
9730 
9731 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9732 		if (status != B_OK)
9733 			return status;
9734 
9735 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9736 	} else {
9737 		// no path given: get the FD and use the FD operation
9738 		struct file_descriptor* descriptor
9739 			= get_fd(get_current_io_context(false), fd);
9740 		if (descriptor == NULL)
9741 			return B_FILE_ERROR;
9742 
9743 		if (descriptor->ops->fd_read_stat)
9744 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9745 		else
9746 			status = B_UNSUPPORTED;
9747 
9748 		put_fd(descriptor);
9749 	}
9750 
9751 	if (status != B_OK)
9752 		return status;
9753 
9754 	return user_memcpy(userStat, &stat, statSize);
9755 }
9756 
9757 
9758 status_t
9759 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9760 	const struct stat* userStat, size_t statSize, int statMask)
9761 {
9762 	if (statSize > sizeof(struct stat))
9763 		return B_BAD_VALUE;
9764 
9765 	struct stat stat;
9766 
9767 	if (!IS_USER_ADDRESS(userStat)
9768 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9769 		return B_BAD_ADDRESS;
9770 
9771 	// clear additional stat fields
9772 	if (statSize < sizeof(struct stat))
9773 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9774 
9775 	status_t status;
9776 
9777 	if (userPath != NULL) {
9778 		// path given: write the stat of the node referred to by (fd, path)
9779 		if (!IS_USER_ADDRESS(userPath))
9780 			return B_BAD_ADDRESS;
9781 
9782 		KPath pathBuffer;
9783 		if (pathBuffer.InitCheck() != B_OK)
9784 			return B_NO_MEMORY;
9785 
9786 		char* path = pathBuffer.LockBuffer();
9787 
9788 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9789 		if (status != B_OK)
9790 			return status;
9791 
9792 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9793 			statMask, false);
9794 	} else {
9795 		// no path given: get the FD and use the FD operation
9796 		struct file_descriptor* descriptor
9797 			= get_fd(get_current_io_context(false), fd);
9798 		if (descriptor == NULL)
9799 			return B_FILE_ERROR;
9800 
9801 		if (descriptor->ops->fd_write_stat) {
9802 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9803 				statMask);
9804 		} else
9805 			status = B_UNSUPPORTED;
9806 
9807 		put_fd(descriptor);
9808 	}
9809 
9810 	return status;
9811 }
9812 
9813 
9814 int
9815 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9816 {
9817 	KPath pathBuffer;
9818 	if (pathBuffer.InitCheck() != B_OK)
9819 		return B_NO_MEMORY;
9820 
9821 	char* path = pathBuffer.LockBuffer();
9822 
9823 	if (userPath != NULL) {
9824 		if (!IS_USER_ADDRESS(userPath))
9825 			return B_BAD_ADDRESS;
9826 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9827 		if (status != B_OK)
9828 			return status;
9829 	}
9830 
9831 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9832 }
9833 
9834 
9835 ssize_t
9836 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9837 	size_t readBytes)
9838 {
9839 	char attribute[B_FILE_NAME_LENGTH];
9840 
9841 	if (userAttribute == NULL)
9842 		return B_BAD_VALUE;
9843 	if (!IS_USER_ADDRESS(userAttribute))
9844 		return B_BAD_ADDRESS;
9845 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9846 	if (status != B_OK)
9847 		return status;
9848 
9849 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9850 	if (attr < 0)
9851 		return attr;
9852 
9853 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9854 	_user_close(attr);
9855 
9856 	return bytes;
9857 }
9858 
9859 
9860 ssize_t
9861 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9862 	const void* buffer, size_t writeBytes)
9863 {
9864 	char attribute[B_FILE_NAME_LENGTH];
9865 
9866 	if (userAttribute == NULL)
9867 		return B_BAD_VALUE;
9868 	if (!IS_USER_ADDRESS(userAttribute))
9869 		return B_BAD_ADDRESS;
9870 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9871 	if (status != B_OK)
9872 		return status;
9873 
9874 	// Try to support the BeOS typical truncation as well as the position
9875 	// argument
9876 	int attr = attr_create(fd, NULL, attribute, type,
9877 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9878 	if (attr < 0)
9879 		return attr;
9880 
9881 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9882 	_user_close(attr);
9883 
9884 	return bytes;
9885 }
9886 
9887 
9888 status_t
9889 _user_stat_attr(int fd, const char* userAttribute,
9890 	struct attr_info* userAttrInfo)
9891 {
9892 	char attribute[B_FILE_NAME_LENGTH];
9893 
9894 	if (userAttribute == NULL || userAttrInfo == NULL)
9895 		return B_BAD_VALUE;
9896 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9897 		return B_BAD_ADDRESS;
9898 	status_t status = user_copy_name(attribute, userAttribute,
9899 		sizeof(attribute));
9900 	if (status != B_OK)
9901 		return status;
9902 
9903 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9904 	if (attr < 0)
9905 		return attr;
9906 
9907 	struct file_descriptor* descriptor
9908 		= get_fd(get_current_io_context(false), attr);
9909 	if (descriptor == NULL) {
9910 		_user_close(attr);
9911 		return B_FILE_ERROR;
9912 	}
9913 
9914 	struct stat stat;
9915 	if (descriptor->ops->fd_read_stat)
9916 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9917 	else
9918 		status = B_UNSUPPORTED;
9919 
9920 	put_fd(descriptor);
9921 	_user_close(attr);
9922 
9923 	if (status == B_OK) {
9924 		attr_info info;
9925 		info.type = stat.st_type;
9926 		info.size = stat.st_size;
9927 
9928 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9929 			return B_BAD_ADDRESS;
9930 	}
9931 
9932 	return status;
9933 }
9934 
9935 
9936 int
9937 _user_open_attr(int fd, const char* userPath, const char* userName,
9938 	uint32 type, int openMode)
9939 {
9940 	char name[B_FILE_NAME_LENGTH];
9941 
9942 	if (!IS_USER_ADDRESS(userName))
9943 		return B_BAD_ADDRESS;
9944 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9945 	if (status != B_OK)
9946 		return status;
9947 
9948 	KPath pathBuffer;
9949 	if (pathBuffer.InitCheck() != B_OK)
9950 		return B_NO_MEMORY;
9951 
9952 	char* path = pathBuffer.LockBuffer();
9953 
9954 	if (userPath != NULL) {
9955 		if (!IS_USER_ADDRESS(userPath))
9956 			return B_BAD_ADDRESS;
9957 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9958 		if (status != B_OK)
9959 			return status;
9960 	}
9961 
9962 	if ((openMode & O_CREAT) != 0) {
9963 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9964 			false);
9965 	}
9966 
9967 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9968 }
9969 
9970 
9971 status_t
9972 _user_remove_attr(int fd, const char* userName)
9973 {
9974 	char name[B_FILE_NAME_LENGTH];
9975 
9976 	if (!IS_USER_ADDRESS(userName))
9977 		return B_BAD_ADDRESS;
9978 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9979 	if (status != B_OK)
9980 		return status;
9981 
9982 	return attr_remove(fd, name, false);
9983 }
9984 
9985 
9986 status_t
9987 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9988 	const char* userToName)
9989 {
9990 	if (!IS_USER_ADDRESS(userFromName)
9991 		|| !IS_USER_ADDRESS(userToName))
9992 		return B_BAD_ADDRESS;
9993 
9994 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9995 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9996 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9997 		return B_NO_MEMORY;
9998 
9999 	char* fromName = fromNameBuffer.LockBuffer();
10000 	char* toName = toNameBuffer.LockBuffer();
10001 
10002 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
10003 	if (status != B_OK)
10004 		return status;
10005 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
10006 	if (status != B_OK)
10007 		return status;
10008 
10009 	return attr_rename(fromFile, fromName, toFile, toName, false);
10010 }
10011 
10012 
10013 int
10014 _user_open_index_dir(dev_t device)
10015 {
10016 	return index_dir_open(device, false);
10017 }
10018 
10019 
10020 status_t
10021 _user_create_index(dev_t device, const char* userName, uint32 type,
10022 	uint32 flags)
10023 {
10024 	char name[B_FILE_NAME_LENGTH];
10025 
10026 	if (!IS_USER_ADDRESS(userName))
10027 		return B_BAD_ADDRESS;
10028 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10029 	if (status != B_OK)
10030 		return status;
10031 
10032 	return index_create(device, name, type, flags, false);
10033 }
10034 
10035 
10036 status_t
10037 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
10038 {
10039 	char name[B_FILE_NAME_LENGTH];
10040 	struct stat stat = {0};
10041 	status_t status;
10042 
10043 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
10044 		return B_BAD_ADDRESS;
10045 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10046 	if (status != B_OK)
10047 		return status;
10048 
10049 	status = index_name_read_stat(device, name, &stat, false);
10050 	if (status == B_OK) {
10051 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
10052 			return B_BAD_ADDRESS;
10053 	}
10054 
10055 	return status;
10056 }
10057 
10058 
10059 status_t
10060 _user_remove_index(dev_t device, const char* userName)
10061 {
10062 	char name[B_FILE_NAME_LENGTH];
10063 
10064 	if (!IS_USER_ADDRESS(userName))
10065 		return B_BAD_ADDRESS;
10066 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10067 	if (status != B_OK)
10068 		return status;
10069 
10070 	return index_remove(device, name, false);
10071 }
10072 
10073 
10074 status_t
10075 _user_getcwd(char* userBuffer, size_t size)
10076 {
10077 	if (size == 0)
10078 		return B_BAD_VALUE;
10079 	if (!IS_USER_ADDRESS(userBuffer))
10080 		return B_BAD_ADDRESS;
10081 
10082 	if (size > kMaxPathLength)
10083 		size = kMaxPathLength;
10084 
10085 	KPath pathBuffer(size);
10086 	if (pathBuffer.InitCheck() != B_OK)
10087 		return B_NO_MEMORY;
10088 
10089 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10090 
10091 	char* path = pathBuffer.LockBuffer();
10092 
10093 	status_t status = get_cwd(path, size, false);
10094 	if (status != B_OK)
10095 		return status;
10096 
10097 	// Copy back the result
10098 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10099 		return B_BAD_ADDRESS;
10100 
10101 	return status;
10102 }
10103 
10104 
10105 status_t
10106 _user_setcwd(int fd, const char* userPath)
10107 {
10108 	TRACE(("user_setcwd: path = %p\n", userPath));
10109 
10110 	KPath pathBuffer;
10111 	if (pathBuffer.InitCheck() != B_OK)
10112 		return B_NO_MEMORY;
10113 
10114 	char* path = pathBuffer.LockBuffer();
10115 
10116 	if (userPath != NULL) {
10117 		if (!IS_USER_ADDRESS(userPath))
10118 			return B_BAD_ADDRESS;
10119 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10120 		if (status != B_OK)
10121 			return status;
10122 	}
10123 
10124 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10125 }
10126 
10127 
10128 status_t
10129 _user_change_root(const char* userPath)
10130 {
10131 	// only root is allowed to chroot()
10132 	if (geteuid() != 0)
10133 		return B_NOT_ALLOWED;
10134 
10135 	// alloc path buffer
10136 	KPath pathBuffer;
10137 	if (pathBuffer.InitCheck() != B_OK)
10138 		return B_NO_MEMORY;
10139 
10140 	// copy userland path to kernel
10141 	char* path = pathBuffer.LockBuffer();
10142 	if (userPath != NULL) {
10143 		if (!IS_USER_ADDRESS(userPath))
10144 			return B_BAD_ADDRESS;
10145 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10146 		if (status != B_OK)
10147 			return status;
10148 	}
10149 
10150 	// get the vnode
10151 	struct vnode* vnode;
10152 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
10153 	if (status != B_OK)
10154 		return status;
10155 
10156 	// set the new root
10157 	struct io_context* context = get_current_io_context(false);
10158 	mutex_lock(&sIOContextRootLock);
10159 	struct vnode* oldRoot = context->root;
10160 	context->root = vnode;
10161 	mutex_unlock(&sIOContextRootLock);
10162 
10163 	put_vnode(oldRoot);
10164 
10165 	return B_OK;
10166 }
10167 
10168 
10169 int
10170 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10171 	uint32 flags, port_id port, int32 token)
10172 {
10173 	if (device < 0 || userQuery == NULL || queryLength == 0)
10174 		return B_BAD_VALUE;
10175 
10176 	if (!IS_USER_ADDRESS(userQuery))
10177 		return B_BAD_ADDRESS;
10178 
10179 	// this is a safety restriction
10180 	if (queryLength >= 65536)
10181 		return B_NAME_TOO_LONG;
10182 
10183 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10184 	if (!query.IsValid())
10185 		return B_NO_MEMORY;
10186 
10187 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10188 		return B_BAD_ADDRESS;
10189 
10190 	return query_open(device, query, flags, port, token, false);
10191 }
10192 
10193 
10194 #include "vfs_request_io.cpp"
10195