xref: /haiku/src/system/kernel/fs/vfs.cpp (revision eea5774f46bba925156498abf9cb1a1165647bf7)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <AutoDeleterDrivers.h>
36 #include <block_cache.h>
37 #include <boot/kernel_args.h>
38 #include <debug_heap.h>
39 #include <disk_device_manager/KDiskDevice.h>
40 #include <disk_device_manager/KDiskDeviceManager.h>
41 #include <disk_device_manager/KDiskDeviceUtils.h>
42 #include <disk_device_manager/KDiskSystem.h>
43 #include <fd.h>
44 #include <file_cache.h>
45 #include <fs/node_monitor.h>
46 #include <KPath.h>
47 #include <lock.h>
48 #include <low_resource_manager.h>
49 #include <slab/Slab.h>
50 #include <StackOrHeapArray.h>
51 #include <syscalls.h>
52 #include <syscall_restart.h>
53 #include <tracing.h>
54 #include <util/atomic.h>
55 #include <util/AutoLock.h>
56 #include <util/ThreadAutoLock.h>
57 #include <util/DoublyLinkedList.h>
58 #include <vfs.h>
59 #include <vm/vm.h>
60 #include <vm/VMCache.h>
61 #include <wait_for_objects.h>
62 
63 #include "EntryCache.h"
64 #include "fifo.h"
65 #include "IORequest.h"
66 #include "unused_vnodes.h"
67 #include "vfs_tracing.h"
68 #include "Vnode.h"
69 #include "../cache/vnode_store.h"
70 
71 
72 //#define TRACE_VFS
73 #ifdef TRACE_VFS
74 #	define TRACE(x) dprintf x
75 #	define FUNCTION(x) dprintf x
76 #else
77 #	define TRACE(x) ;
78 #	define FUNCTION(x) ;
79 #endif
80 
81 #define ADD_DEBUGGER_COMMANDS
82 
83 
84 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
85 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
86 
87 #if KDEBUG
88 #	define FS_CALL(vnode, op, params...) \
89 		( HAS_FS_CALL(vnode, op) ? \
90 			vnode->ops->op(vnode->mount->volume, vnode, params) \
91 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
92 #	define FS_CALL_NO_PARAMS(vnode, op) \
93 		( HAS_FS_CALL(vnode, op) ? \
94 			vnode->ops->op(vnode->mount->volume, vnode) \
95 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
96 #	define FS_MOUNT_CALL(mount, op, params...) \
97 		( HAS_FS_MOUNT_CALL(mount, op) ? \
98 			mount->volume->ops->op(mount->volume, params) \
99 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
100 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
101 		( HAS_FS_MOUNT_CALL(mount, op) ? \
102 			mount->volume->ops->op(mount->volume) \
103 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
104 #else
105 #	define FS_CALL(vnode, op, params...) \
106 			vnode->ops->op(vnode->mount->volume, vnode, params)
107 #	define FS_CALL_NO_PARAMS(vnode, op) \
108 			vnode->ops->op(vnode->mount->volume, vnode)
109 #	define FS_MOUNT_CALL(mount, op, params...) \
110 			mount->volume->ops->op(mount->volume, params)
111 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
112 			mount->volume->ops->op(mount->volume)
113 #endif
114 
115 
116 const static size_t kMaxPathLength = 65536;
117 	// The absolute maximum path length (for getcwd() - this is not depending
118 	// on PATH_MAX
119 
120 
121 typedef DoublyLinkedList<vnode> VnodeList;
122 
123 /*!	\brief Structure to manage a mounted file system
124 
125 	Note: The root_vnode and root_vnode->covers fields (what others?) are
126 	initialized in fs_mount() and not changed afterwards. That is as soon
127 	as the mount is mounted and it is made sure it won't be unmounted
128 	(e.g. by holding a reference to a vnode of that mount) (read) access
129 	to those fields is always safe, even without additional locking. Morever
130 	while mounted the mount holds a reference to the root_vnode->covers vnode,
131 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
132 	safe if a reference to vnode is held (note that for the root mount
133 	root_vnode->covers is NULL, though).
134 */
135 struct fs_mount {
136 	fs_mount()
137 		:
138 		volume(NULL),
139 		device_name(NULL)
140 	{
141 		mutex_init(&lock, "mount lock");
142 	}
143 
144 	~fs_mount()
145 	{
146 		mutex_destroy(&lock);
147 		free(device_name);
148 
149 		while (volume) {
150 			fs_volume* superVolume = volume->super_volume;
151 
152 			if (volume->file_system != NULL)
153 				put_module(volume->file_system->info.name);
154 
155 			free(volume->file_system_name);
156 			free(volume);
157 			volume = superVolume;
158 		}
159 	}
160 
161 	struct fs_mount* next;
162 	dev_t			id;
163 	fs_volume*		volume;
164 	char*			device_name;
165 	mutex			lock;	// guards the vnodes list
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 
176 namespace {
177 
178 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
179 	list_link		link;
180 	void*			bound_to;
181 	team_id			team;
182 	pid_t			session;
183 	off_t			start;
184 	off_t			end;
185 	bool			shared;
186 };
187 
188 typedef DoublyLinkedList<advisory_lock> LockList;
189 
190 } // namespace
191 
192 
193 struct advisory_locking {
194 	sem_id			lock;
195 	sem_id			wait_sem;
196 	LockList		locks;
197 
198 	advisory_locking()
199 		:
200 		lock(-1),
201 		wait_sem(-1)
202 	{
203 	}
204 
205 	~advisory_locking()
206 	{
207 		if (lock >= 0)
208 			delete_sem(lock);
209 		if (wait_sem >= 0)
210 			delete_sem(wait_sem);
211 	}
212 };
213 
214 /*!	\brief Guards sMountsTable.
215 
216 	The holder is allowed to read/write access the sMountsTable.
217 	Manipulation of the fs_mount structures themselves
218 	(and their destruction) requires different locks though.
219 */
220 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
221 
222 /*!	\brief Guards mount/unmount operations.
223 
224 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
225 	That is locking the lock ensures that no FS is mounted/unmounted. In
226 	particular this means that
227 	- sMountsTable will not be modified,
228 	- the fields immutable after initialization of the fs_mount structures in
229 	  sMountsTable will not be modified,
230 
231 	The thread trying to lock the lock must not hold sVnodeLock or
232 	sMountLock.
233 */
234 static recursive_lock sMountOpLock;
235 
236 /*!	\brief Guards sVnodeTable.
237 
238 	The holder is allowed read/write access to sVnodeTable and to
239 	any unbusy vnode in that table, save to the immutable fields (device, id,
240 	private_node, mount) to which only read-only access is allowed.
241 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
242 	well as the busy, removed, unused flags, and the vnode's type can also be
243 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
244 	locked. Write access to covered_by and covers requires to write lock
245 	sVnodeLock.
246 
247 	The thread trying to acquire the lock must not hold sMountLock.
248 	You must not hold this lock when calling create_sem(), as this might call
249 	vfs_free_unused_vnodes() and thus cause a deadlock.
250 */
251 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
252 
253 /*!	\brief Guards io_context::root.
254 
255 	Must be held when setting or getting the io_context::root field.
256 	The only operation allowed while holding this lock besides getting or
257 	setting the field is inc_vnode_ref_count() on io_context::root.
258 */
259 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
260 
261 
262 namespace {
263 
264 struct vnode_hash_key {
265 	dev_t	device;
266 	ino_t	vnode;
267 };
268 
269 struct VnodeHash {
270 	typedef vnode_hash_key	KeyType;
271 	typedef	struct vnode	ValueType;
272 
273 #define VHASH(mountid, vnodeid) \
274 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
275 
276 	size_t HashKey(KeyType key) const
277 	{
278 		return VHASH(key.device, key.vnode);
279 	}
280 
281 	size_t Hash(ValueType* vnode) const
282 	{
283 		return VHASH(vnode->device, vnode->id);
284 	}
285 
286 #undef VHASH
287 
288 	bool Compare(KeyType key, ValueType* vnode) const
289 	{
290 		return vnode->device == key.device && vnode->id == key.vnode;
291 	}
292 
293 	ValueType*& GetLink(ValueType* value) const
294 	{
295 		return value->next;
296 	}
297 };
298 
299 typedef BOpenHashTable<VnodeHash> VnodeTable;
300 
301 
302 struct MountHash {
303 	typedef dev_t			KeyType;
304 	typedef	struct fs_mount	ValueType;
305 
306 	size_t HashKey(KeyType key) const
307 	{
308 		return key;
309 	}
310 
311 	size_t Hash(ValueType* mount) const
312 	{
313 		return mount->id;
314 	}
315 
316 	bool Compare(KeyType key, ValueType* mount) const
317 	{
318 		return mount->id == key;
319 	}
320 
321 	ValueType*& GetLink(ValueType* value) const
322 	{
323 		return value->next;
324 	}
325 };
326 
327 typedef BOpenHashTable<MountHash> MountTable;
328 
329 } // namespace
330 
331 
332 object_cache* sPathNameCache;
333 object_cache* sVnodeCache;
334 object_cache* sFileDescriptorCache;
335 
336 #define VNODE_HASH_TABLE_SIZE 1024
337 static VnodeTable* sVnodeTable;
338 static struct vnode* sRoot;
339 
340 #define MOUNTS_HASH_TABLE_SIZE 16
341 static MountTable* sMountsTable;
342 static dev_t sNextMountID = 1;
343 
344 #define MAX_TEMP_IO_VECS 8
345 
346 // How long to wait for busy vnodes (10s)
347 #define BUSY_VNODE_RETRIES 2000
348 #define BUSY_VNODE_DELAY 5000
349 
350 mode_t __gUmask = 022;
351 
352 /* function declarations */
353 
354 static void free_unused_vnodes();
355 
356 // file descriptor operation prototypes
357 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
358 	void* buffer, size_t* _bytes);
359 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
360 	const void* buffer, size_t* _bytes);
361 static ssize_t file_readv(struct file_descriptor* descriptor, off_t pos,
362 	const struct iovec *vecs, int count);
363 static ssize_t file_writev(struct file_descriptor* descriptor, off_t pos,
364 	const struct iovec *vecs, int count);
365 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
366 	int seekType);
367 static void file_free_fd(struct file_descriptor* descriptor);
368 static status_t file_close(struct file_descriptor* descriptor);
369 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
370 	struct selectsync* sync);
371 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
372 	struct selectsync* sync);
373 static status_t dir_read(struct io_context* context,
374 	struct file_descriptor* descriptor, struct dirent* buffer,
375 	size_t bufferSize, uint32* _count);
376 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
377 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
378 static status_t dir_rewind(struct file_descriptor* descriptor);
379 static void dir_free_fd(struct file_descriptor* descriptor);
380 static status_t dir_close(struct file_descriptor* descriptor);
381 static status_t attr_dir_read(struct io_context* context,
382 	struct file_descriptor* descriptor, struct dirent* buffer,
383 	size_t bufferSize, uint32* _count);
384 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
385 static void attr_dir_free_fd(struct file_descriptor* descriptor);
386 static status_t attr_dir_close(struct file_descriptor* descriptor);
387 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
388 	void* buffer, size_t* _bytes);
389 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
390 	const void* buffer, size_t* _bytes);
391 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
392 	int seekType);
393 static void attr_free_fd(struct file_descriptor* descriptor);
394 static status_t attr_close(struct file_descriptor* descriptor);
395 static status_t attr_read_stat(struct file_descriptor* descriptor,
396 	struct stat* statData);
397 static status_t attr_write_stat(struct file_descriptor* descriptor,
398 	const struct stat* stat, int statMask);
399 static status_t index_dir_read(struct io_context* context,
400 	struct file_descriptor* descriptor, struct dirent* buffer,
401 	size_t bufferSize, uint32* _count);
402 static status_t index_dir_rewind(struct file_descriptor* descriptor);
403 static void index_dir_free_fd(struct file_descriptor* descriptor);
404 static status_t index_dir_close(struct file_descriptor* descriptor);
405 static status_t query_read(struct io_context* context,
406 	struct file_descriptor* descriptor, struct dirent* buffer,
407 	size_t bufferSize, uint32* _count);
408 static status_t query_rewind(struct file_descriptor* descriptor);
409 static void query_free_fd(struct file_descriptor* descriptor);
410 static status_t query_close(struct file_descriptor* descriptor);
411 
412 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
413 	void* buffer, size_t length);
414 static status_t common_read_stat(struct file_descriptor* descriptor,
415 	struct stat* statData);
416 static status_t common_write_stat(struct file_descriptor* descriptor,
417 	const struct stat* statData, int statMask);
418 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
419 	struct stat* stat, bool kernel);
420 
421 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
422 	bool traverseLeafLink, bool kernel,
423 	VnodePutter& _vnode, ino_t* _parentID, char* leafName = NULL);
424 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
425 	size_t bufferSize, bool kernel);
426 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
427 	VnodePutter& _vnode, ino_t* _parentID, bool kernel);
428 static void inc_vnode_ref_count(struct vnode* vnode);
429 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
430 	bool reenter);
431 static inline void put_vnode(struct vnode* vnode);
432 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
433 	bool kernel);
434 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
435 
436 
437 static struct fd_ops sFileOps = {
438 	file_close,
439 	file_free_fd,
440 	file_read,
441 	file_write,
442 	file_readv,
443 	file_writev,
444 	file_seek,
445 	common_ioctl,
446 	NULL,		// set_flags()
447 	file_select,
448 	file_deselect,
449 	NULL,		// read_dir()
450 	NULL,		// rewind_dir()
451 	common_read_stat,
452 	common_write_stat,
453 };
454 
455 static struct fd_ops sDirectoryOps = {
456 	dir_close,
457 	dir_free_fd,
458 	NULL, NULL,	// read(), write()
459 	NULL, NULL,	// readv(), writev()
460 	NULL,		// seek()
461 	common_ioctl,
462 	NULL,		// set_flags
463 	NULL,		// select()
464 	NULL,		// deselect()
465 	dir_read,
466 	dir_rewind,
467 	common_read_stat,
468 	common_write_stat,
469 };
470 
471 static struct fd_ops sAttributeDirectoryOps = {
472 	attr_dir_close,
473 	attr_dir_free_fd,
474 	NULL, NULL,	// read(), write()
475 	NULL, NULL,	// readv(), writev()
476 	NULL,		// seek()
477 	common_ioctl,
478 	NULL,		// set_flags
479 	NULL,		// select()
480 	NULL,		// deselect()
481 	attr_dir_read,
482 	attr_dir_rewind,
483 	common_read_stat,
484 	common_write_stat,
485 };
486 
487 static struct fd_ops sAttributeOps = {
488 	attr_close,
489 	attr_free_fd,
490 	attr_read,
491 	attr_write,
492 	NULL,		// readv()
493 	NULL,		// writev()
494 	attr_seek,
495 	common_ioctl,
496 	NULL,		// set_flags()
497 	NULL,		// select()
498 	NULL,		// deselect()
499 	NULL,		// read_dir()
500 	NULL,		// rewind_dir()
501 	attr_read_stat,
502 	attr_write_stat,
503 };
504 
505 static struct fd_ops sIndexDirectoryOps = {
506 	index_dir_close,
507 	index_dir_free_fd,
508 	NULL, NULL,	// read(), write()
509 	NULL, NULL,	// readv(), writev()
510 	NULL,		// seek()
511 	NULL,		// ioctl()
512 	NULL,		// set_flags()
513 	NULL,		// select()
514 	NULL,		// deselect()
515 	index_dir_read,
516 	index_dir_rewind,
517 	NULL,		// read_stat()
518 	NULL,		// write_stat()
519 };
520 
521 #if 0
522 static struct fd_ops sIndexOps = {
523 	NULL,		// dir_close()
524 	NULL,		// free_fd()
525 	NULL, NULL,	// read(), write()
526 	NULL, NULL,	// readv(), writev()
527 	NULL,		// seek()
528 	NULL,		// ioctl()
529 	NULL,		// set_flags
530 	NULL,		// select()
531 	NULL,		// deselect()
532 	NULL,		// dir_read()
533 	NULL,		// dir_rewind()
534 	index_read_stat,	// read_stat()
535 	NULL,		// write_stat()
536 };
537 #endif
538 
539 static struct fd_ops sQueryOps = {
540 	query_close,
541 	query_free_fd,
542 	NULL, NULL,	// read(), write()
543 	NULL, NULL,	// readv(), writev()
544 	NULL,		// seek()
545 	NULL,		// ioctl()
546 	NULL,		// set_flags()
547 	NULL,		// select()
548 	NULL,		// deselect()
549 	query_read,
550 	query_rewind,
551 	NULL,		// read_stat()
552 	NULL,		// write_stat()
553 };
554 
555 
556 namespace {
557 
558 class FDCloser {
559 public:
560 	FDCloser() : fFD(-1), fKernel(true) {}
561 
562 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
563 
564 	~FDCloser()
565 	{
566 		Close();
567 	}
568 
569 	void SetTo(int fd, bool kernel)
570 	{
571 		Close();
572 		fFD = fd;
573 		fKernel = kernel;
574 	}
575 
576 	void Close()
577 	{
578 		if (fFD >= 0) {
579 			if (fKernel)
580 				_kern_close(fFD);
581 			else
582 				_user_close(fFD);
583 			fFD = -1;
584 		}
585 	}
586 
587 	int Detach()
588 	{
589 		int fd = fFD;
590 		fFD = -1;
591 		return fd;
592 	}
593 
594 private:
595 	int		fFD;
596 	bool	fKernel;
597 };
598 
599 } // namespace
600 
601 
602 #if VFS_PAGES_IO_TRACING
603 
604 namespace VFSPagesIOTracing {
605 
606 class PagesIOTraceEntry : public AbstractTraceEntry {
607 protected:
608 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
609 		const generic_io_vec* vecs, uint32 count, uint32 flags,
610 		generic_size_t bytesRequested, status_t status,
611 		generic_size_t bytesTransferred)
612 		:
613 		fVnode(vnode),
614 		fMountID(vnode->mount->id),
615 		fNodeID(vnode->id),
616 		fCookie(cookie),
617 		fPos(pos),
618 		fCount(count),
619 		fFlags(flags),
620 		fBytesRequested(bytesRequested),
621 		fStatus(status),
622 		fBytesTransferred(bytesTransferred)
623 	{
624 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
625 			sizeof(generic_io_vec) * count, false);
626 	}
627 
628 	void AddDump(TraceOutput& out, const char* mode)
629 	{
630 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
631 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
632 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
633 			(uint64)fBytesRequested);
634 
635 		if (fVecs != NULL) {
636 			for (uint32 i = 0; i < fCount; i++) {
637 				if (i > 0)
638 					out.Print(", ");
639 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
640 					(uint64)fVecs[i].length);
641 			}
642 		}
643 
644 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
645 			"transferred: %" B_PRIu64, fFlags, fStatus,
646 			(uint64)fBytesTransferred);
647 	}
648 
649 protected:
650 	struct vnode*	fVnode;
651 	dev_t			fMountID;
652 	ino_t			fNodeID;
653 	void*			fCookie;
654 	off_t			fPos;
655 	generic_io_vec*	fVecs;
656 	uint32			fCount;
657 	uint32			fFlags;
658 	generic_size_t	fBytesRequested;
659 	status_t		fStatus;
660 	generic_size_t	fBytesTransferred;
661 };
662 
663 
664 class ReadPages : public PagesIOTraceEntry {
665 public:
666 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
667 		const generic_io_vec* vecs, uint32 count, uint32 flags,
668 		generic_size_t bytesRequested, status_t status,
669 		generic_size_t bytesTransferred)
670 		:
671 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
672 			bytesRequested, status, bytesTransferred)
673 	{
674 		Initialized();
675 	}
676 
677 	virtual void AddDump(TraceOutput& out)
678 	{
679 		PagesIOTraceEntry::AddDump(out, "read");
680 	}
681 };
682 
683 
684 class WritePages : public PagesIOTraceEntry {
685 public:
686 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
687 		const generic_io_vec* vecs, uint32 count, uint32 flags,
688 		generic_size_t bytesRequested, status_t status,
689 		generic_size_t bytesTransferred)
690 		:
691 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
692 			bytesRequested, status, bytesTransferred)
693 	{
694 		Initialized();
695 	}
696 
697 	virtual void AddDump(TraceOutput& out)
698 	{
699 		PagesIOTraceEntry::AddDump(out, "write");
700 	}
701 };
702 
703 }	// namespace VFSPagesIOTracing
704 
705 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
706 #else
707 #	define TPIO(x) ;
708 #endif	// VFS_PAGES_IO_TRACING
709 
710 
711 /*! Finds the mounted device (the fs_mount structure) with the given ID.
712 	Note, you must hold the sMountLock lock when you call this function.
713 */
714 static struct fs_mount*
715 find_mount(dev_t id)
716 {
717 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
718 
719 	return sMountsTable->Lookup(id);
720 }
721 
722 
723 static status_t
724 get_mount(dev_t id, struct fs_mount** _mount)
725 {
726 	struct fs_mount* mount;
727 
728 	ReadLocker nodeLocker(sVnodeLock);
729 	ReadLocker mountLocker(sMountLock);
730 
731 	mount = find_mount(id);
732 	if (mount == NULL)
733 		return B_BAD_VALUE;
734 
735 	struct vnode* rootNode = mount->root_vnode;
736 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
737 		|| rootNode->ref_count == 0) {
738 		// might have been called during a mount/unmount operation
739 		return B_BUSY;
740 	}
741 
742 	inc_vnode_ref_count(rootNode);
743 	*_mount = mount;
744 	return B_OK;
745 }
746 
747 
748 static void
749 put_mount(struct fs_mount* mount)
750 {
751 	if (mount)
752 		put_vnode(mount->root_vnode);
753 }
754 
755 
756 /*!	Tries to open the specified file system module.
757 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
758 	Returns a pointer to file system module interface, or NULL if it
759 	could not open the module.
760 */
761 static file_system_module_info*
762 get_file_system(const char* fsName)
763 {
764 	char name[B_FILE_NAME_LENGTH];
765 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
766 		// construct module name if we didn't get one
767 		// (we currently support only one API)
768 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
769 		fsName = NULL;
770 	}
771 
772 	file_system_module_info* info;
773 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
774 		return NULL;
775 
776 	return info;
777 }
778 
779 
780 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
781 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
782 	The name is allocated for you, and you have to free() it when you're
783 	done with it.
784 	Returns NULL if the required memory is not available.
785 */
786 static char*
787 get_file_system_name(const char* fsName)
788 {
789 	const size_t length = strlen("file_systems/");
790 
791 	if (strncmp(fsName, "file_systems/", length)) {
792 		// the name already seems to be the module's file name
793 		return strdup(fsName);
794 	}
795 
796 	fsName += length;
797 	const char* end = strchr(fsName, '/');
798 	if (end == NULL) {
799 		// this doesn't seem to be a valid name, but well...
800 		return strdup(fsName);
801 	}
802 
803 	// cut off the trailing /v1
804 
805 	char* name = (char*)malloc(end + 1 - fsName);
806 	if (name == NULL)
807 		return NULL;
808 
809 	strlcpy(name, fsName, end + 1 - fsName);
810 	return name;
811 }
812 
813 
814 /*!	Accepts a list of file system names separated by a colon, one for each
815 	layer and returns the file system name for the specified layer.
816 	The name is allocated for you, and you have to free() it when you're
817 	done with it.
818 	Returns NULL if the required memory is not available or if there is no
819 	name for the specified layer.
820 */
821 static char*
822 get_file_system_name_for_layer(const char* fsNames, int32 layer)
823 {
824 	while (layer >= 0) {
825 		const char* end = strchr(fsNames, ':');
826 		if (end == NULL) {
827 			if (layer == 0)
828 				return strdup(fsNames);
829 			return NULL;
830 		}
831 
832 		if (layer == 0) {
833 			size_t length = end - fsNames + 1;
834 			char* result = (char*)malloc(length);
835 			strlcpy(result, fsNames, length);
836 			return result;
837 		}
838 
839 		fsNames = end + 1;
840 		layer--;
841 	}
842 
843 	return NULL;
844 }
845 
846 
847 static void
848 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
849 {
850 	MutexLocker _(mount->lock);
851 	mount->vnodes.Add(vnode);
852 }
853 
854 
855 static void
856 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
857 {
858 	MutexLocker _(mount->lock);
859 	mount->vnodes.Remove(vnode);
860 }
861 
862 
863 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
864 
865 	The caller must hold the sVnodeLock (read lock at least).
866 
867 	\param mountID the mount ID.
868 	\param vnodeID the node ID.
869 
870 	\return The vnode structure, if it was found in the hash table, \c NULL
871 			otherwise.
872 */
873 static struct vnode*
874 lookup_vnode(dev_t mountID, ino_t vnodeID)
875 {
876 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
877 
878 	struct vnode_hash_key key;
879 
880 	key.device = mountID;
881 	key.vnode = vnodeID;
882 
883 	return sVnodeTable->Lookup(key);
884 }
885 
886 
887 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
888 
889 	This will also wait for BUSY_VNODE_DELAY before returning if one should
890 	still wait for the vnode becoming unbusy.
891 
892 	\return \c true if one should retry, \c false if not.
893 */
894 static bool
895 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
896 {
897 	if (--tries < 0) {
898 		// vnode doesn't seem to become unbusy
899 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
900 			" is not becoming unbusy!\n", mountID, vnodeID);
901 		return false;
902 	}
903 	snooze(BUSY_VNODE_DELAY);
904 	return true;
905 }
906 
907 
908 /*!	Creates a new vnode with the given mount and node ID.
909 	If the node already exists, it is returned instead and no new node is
910 	created. In either case -- but not, if an error occurs -- the function write
911 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
912 	error the lock is not held on return.
913 
914 	\param mountID The mount ID.
915 	\param vnodeID The vnode ID.
916 	\param _vnode Will be set to the new vnode on success.
917 	\param _nodeCreated Will be set to \c true when the returned vnode has
918 		been newly created, \c false when it already existed. Will not be
919 		changed on error.
920 	\return \c B_OK, when the vnode was successfully created and inserted or
921 		a node with the given ID was found, \c B_NO_MEMORY or
922 		\c B_ENTRY_NOT_FOUND on error.
923 */
924 static status_t
925 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
926 	bool& _nodeCreated)
927 {
928 	FUNCTION(("create_new_vnode_and_lock()\n"));
929 
930 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
931 	if (vnode == NULL)
932 		return B_NO_MEMORY;
933 
934 	// initialize basic values
935 	memset(vnode, 0, sizeof(struct vnode));
936 	vnode->device = mountID;
937 	vnode->id = vnodeID;
938 	vnode->ref_count = 1;
939 	vnode->SetBusy(true);
940 
941 	// look up the node -- it might have been added by someone else in the
942 	// meantime
943 	rw_lock_write_lock(&sVnodeLock);
944 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
945 	if (existingVnode != NULL) {
946 		object_cache_free(sVnodeCache, vnode, 0);
947 		_vnode = existingVnode;
948 		_nodeCreated = false;
949 		return B_OK;
950 	}
951 
952 	// get the mount structure
953 	rw_lock_read_lock(&sMountLock);
954 	vnode->mount = find_mount(mountID);
955 	if (!vnode->mount || vnode->mount->unmounting) {
956 		rw_lock_read_unlock(&sMountLock);
957 		rw_lock_write_unlock(&sVnodeLock);
958 		object_cache_free(sVnodeCache, vnode, 0);
959 		return B_ENTRY_NOT_FOUND;
960 	}
961 
962 	// add the vnode to the mount's node list and the hash table
963 	sVnodeTable->Insert(vnode);
964 	add_vnode_to_mount_list(vnode, vnode->mount);
965 
966 	rw_lock_read_unlock(&sMountLock);
967 
968 	_vnode = vnode;
969 	_nodeCreated = true;
970 
971 	// keep the vnode lock locked
972 	return B_OK;
973 }
974 
975 
976 /*!	Frees the vnode and all resources it has acquired, and removes
977 	it from the vnode hash as well as from its mount structure.
978 	Will also make sure that any cache modifications are written back.
979 */
980 static void
981 free_vnode(struct vnode* vnode, bool reenter)
982 {
983 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
984 		vnode);
985 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
986 
987 	// write back any changes in this vnode's cache -- but only
988 	// if the vnode won't be deleted, in which case the changes
989 	// will be discarded
990 
991 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
992 		FS_CALL_NO_PARAMS(vnode, fsync);
993 
994 	// Note: If this vnode has a cache attached, there will still be two
995 	// references to that cache at this point. The last one belongs to the vnode
996 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
997 	// cache. Each but the last reference to a cache also includes a reference
998 	// to the vnode. The file cache, however, released its reference (cf.
999 	// file_cache_create()), so that this vnode's ref count has the chance to
1000 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1001 	// cache reference to be released, which will also release a (no longer
1002 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1003 	// count, so that it will neither become negative nor 0.
1004 	vnode->ref_count = 2;
1005 
1006 	if (!vnode->IsUnpublished()) {
1007 		if (vnode->IsRemoved())
1008 			FS_CALL(vnode, remove_vnode, reenter);
1009 		else
1010 			FS_CALL(vnode, put_vnode, reenter);
1011 	}
1012 
1013 	// If the vnode has a VMCache attached, make sure that it won't try to get
1014 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1015 	// long as the vnode is busy and in the hash, that won't happen, but as
1016 	// soon as we've removed it from the hash, it could reload the vnode -- with
1017 	// a new cache attached!
1018 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1019 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1020 
1021 	// The file system has removed the resources of the vnode now, so we can
1022 	// make it available again (by removing the busy vnode from the hash).
1023 	rw_lock_write_lock(&sVnodeLock);
1024 	sVnodeTable->Remove(vnode);
1025 	rw_lock_write_unlock(&sVnodeLock);
1026 
1027 	// if we have a VMCache attached, remove it
1028 	if (vnode->cache)
1029 		vnode->cache->ReleaseRef();
1030 
1031 	vnode->cache = NULL;
1032 
1033 	remove_vnode_from_mount_list(vnode, vnode->mount);
1034 
1035 	object_cache_free(sVnodeCache, vnode, 0);
1036 }
1037 
1038 
1039 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1040 	if the counter dropped to 0.
1041 
1042 	The caller must, of course, own a reference to the vnode to call this
1043 	function.
1044 	The caller must not hold the sVnodeLock or the sMountLock.
1045 
1046 	\param vnode the vnode.
1047 	\param alwaysFree don't move this vnode into the unused list, but really
1048 		   delete it if possible.
1049 	\param reenter \c true, if this function is called (indirectly) from within
1050 		   a file system. This will be passed to file system hooks only.
1051 	\return \c B_OK, if everything went fine, an error code otherwise.
1052 */
1053 static status_t
1054 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1055 {
1056 	ReadLocker locker(sVnodeLock);
1057 	AutoLocker<Vnode> nodeLocker(vnode);
1058 
1059 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1060 
1061 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1062 
1063 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1064 		vnode->ref_count));
1065 
1066 	if (oldRefCount != 1)
1067 		return B_OK;
1068 
1069 	if (vnode->IsBusy())
1070 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1071 
1072 	bool freeNode = false;
1073 	bool freeUnusedNodes = false;
1074 
1075 	// Just insert the vnode into an unused list if we don't need
1076 	// to delete it
1077 	if (vnode->IsRemoved() || alwaysFree) {
1078 		vnode_to_be_freed(vnode);
1079 		vnode->SetBusy(true);
1080 		freeNode = true;
1081 	} else
1082 		freeUnusedNodes = vnode_unused(vnode);
1083 
1084 	nodeLocker.Unlock();
1085 	locker.Unlock();
1086 
1087 	if (freeNode)
1088 		free_vnode(vnode, reenter);
1089 	else if (freeUnusedNodes)
1090 		free_unused_vnodes();
1091 
1092 	return B_OK;
1093 }
1094 
1095 
1096 /*!	\brief Increments the reference counter of the given vnode.
1097 
1098 	The caller must make sure that the node isn't deleted while this function
1099 	is called. This can be done either:
1100 	- by ensuring that a reference to the node exists and remains in existence,
1101 	  or
1102 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1103 	  or by holding sVnodeLock write locked.
1104 
1105 	In the second case the caller is responsible for dealing with the ref count
1106 	0 -> 1 transition. That is 1. this function must not be invoked when the
1107 	node is busy in the first place and 2. vnode_used() must be called for the
1108 	node.
1109 
1110 	\param vnode the vnode.
1111 */
1112 static void
1113 inc_vnode_ref_count(struct vnode* vnode)
1114 {
1115 	atomic_add(&vnode->ref_count, 1);
1116 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1117 		vnode->ref_count));
1118 }
1119 
1120 
1121 static bool
1122 is_special_node_type(int type)
1123 {
1124 	// at the moment only FIFOs are supported
1125 	return S_ISFIFO(type);
1126 }
1127 
1128 
1129 static status_t
1130 create_special_sub_node(struct vnode* vnode, uint32 flags)
1131 {
1132 	if (S_ISFIFO(vnode->Type()))
1133 		return create_fifo_vnode(vnode->mount->volume, vnode);
1134 
1135 	return B_BAD_VALUE;
1136 }
1137 
1138 
1139 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1140 
1141 	If the node is not yet in memory, it will be loaded.
1142 
1143 	The caller must not hold the sVnodeLock or the sMountLock.
1144 
1145 	\param mountID the mount ID.
1146 	\param vnodeID the node ID.
1147 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1148 		   retrieved vnode structure shall be written.
1149 	\param reenter \c true, if this function is called (indirectly) from within
1150 		   a file system.
1151 	\return \c B_OK, if everything when fine, an error code otherwise.
1152 */
1153 static status_t
1154 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1155 	int reenter)
1156 {
1157 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1158 		mountID, vnodeID, _vnode));
1159 
1160 	rw_lock_read_lock(&sVnodeLock);
1161 
1162 	int32 tries = BUSY_VNODE_RETRIES;
1163 restart:
1164 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1165 	AutoLocker<Vnode> nodeLocker(vnode);
1166 
1167 	if (vnode && vnode->IsBusy()) {
1168 		// vnodes in the Removed state (except ones still Unpublished)
1169 		// which are also Busy will disappear soon, so we do not wait for them.
1170 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1171 
1172 		nodeLocker.Unlock();
1173 		rw_lock_read_unlock(&sVnodeLock);
1174 		if (!canWait) {
1175 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1176 				mountID, vnodeID);
1177 			return B_BUSY;
1178 		}
1179 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1180 			return B_BUSY;
1181 
1182 		rw_lock_read_lock(&sVnodeLock);
1183 		goto restart;
1184 	}
1185 
1186 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1187 
1188 	status_t status;
1189 
1190 	if (vnode) {
1191 		if (vnode->ref_count == 0) {
1192 			// this vnode has been unused before
1193 			vnode_used(vnode);
1194 		}
1195 		inc_vnode_ref_count(vnode);
1196 
1197 		nodeLocker.Unlock();
1198 		rw_lock_read_unlock(&sVnodeLock);
1199 	} else {
1200 		// we need to create a new vnode and read it in
1201 		rw_lock_read_unlock(&sVnodeLock);
1202 			// unlock -- create_new_vnode_and_lock() write-locks on success
1203 		bool nodeCreated;
1204 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1205 			nodeCreated);
1206 		if (status != B_OK)
1207 			return status;
1208 
1209 		if (!nodeCreated) {
1210 			rw_lock_read_lock(&sVnodeLock);
1211 			rw_lock_write_unlock(&sVnodeLock);
1212 			goto restart;
1213 		}
1214 
1215 		rw_lock_write_unlock(&sVnodeLock);
1216 
1217 		int type = 0;
1218 		uint32 flags = 0;
1219 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1220 			&flags, reenter);
1221 		if (status == B_OK && (vnode->private_node == NULL || vnode->ops == NULL)) {
1222 			KDEBUG_ONLY(panic("filesystem get_vnode returned 0 with unset fields"));
1223 			status = B_BAD_VALUE;
1224 		}
1225 
1226 		bool gotNode = status == B_OK;
1227 		bool publishSpecialSubNode = false;
1228 		if (gotNode) {
1229 			vnode->SetType(type);
1230 			publishSpecialSubNode = is_special_node_type(type)
1231 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1232 		}
1233 
1234 		if (gotNode && publishSpecialSubNode)
1235 			status = create_special_sub_node(vnode, flags);
1236 
1237 		if (status != B_OK) {
1238 			if (gotNode)
1239 				FS_CALL(vnode, put_vnode, reenter);
1240 
1241 			rw_lock_write_lock(&sVnodeLock);
1242 			sVnodeTable->Remove(vnode);
1243 			remove_vnode_from_mount_list(vnode, vnode->mount);
1244 			rw_lock_write_unlock(&sVnodeLock);
1245 
1246 			object_cache_free(sVnodeCache, vnode, 0);
1247 			return status;
1248 		}
1249 
1250 		rw_lock_read_lock(&sVnodeLock);
1251 		vnode->Lock();
1252 
1253 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1254 		vnode->SetBusy(false);
1255 
1256 		vnode->Unlock();
1257 		rw_lock_read_unlock(&sVnodeLock);
1258 	}
1259 
1260 	TRACE(("get_vnode: returning %p\n", vnode));
1261 
1262 	*_vnode = vnode;
1263 	return B_OK;
1264 }
1265 
1266 
1267 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1268 	if the counter dropped to 0.
1269 
1270 	The caller must, of course, own a reference to the vnode to call this
1271 	function.
1272 	The caller must not hold the sVnodeLock or the sMountLock.
1273 
1274 	\param vnode the vnode.
1275 */
1276 static inline void
1277 put_vnode(struct vnode* vnode)
1278 {
1279 	dec_vnode_ref_count(vnode, false, false);
1280 }
1281 
1282 
1283 static void
1284 free_unused_vnodes(int32 level)
1285 {
1286 	unused_vnodes_check_started();
1287 
1288 	if (level == B_NO_LOW_RESOURCE) {
1289 		unused_vnodes_check_done();
1290 		return;
1291 	}
1292 
1293 	flush_hot_vnodes();
1294 
1295 	// determine how many nodes to free
1296 	uint32 count = 1;
1297 	{
1298 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1299 
1300 		switch (level) {
1301 			case B_LOW_RESOURCE_NOTE:
1302 				count = sUnusedVnodes / 100;
1303 				break;
1304 			case B_LOW_RESOURCE_WARNING:
1305 				count = sUnusedVnodes / 10;
1306 				break;
1307 			case B_LOW_RESOURCE_CRITICAL:
1308 				count = sUnusedVnodes;
1309 				break;
1310 		}
1311 
1312 		if (count > sUnusedVnodes)
1313 			count = sUnusedVnodes;
1314 	}
1315 
1316 	// Write back the modified pages of some unused vnodes and free them.
1317 
1318 	for (uint32 i = 0; i < count; i++) {
1319 		ReadLocker vnodesReadLocker(sVnodeLock);
1320 
1321 		// get the first node
1322 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1323 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1324 			&sUnusedVnodeList);
1325 		unusedVnodesLocker.Unlock();
1326 
1327 		if (vnode == NULL)
1328 			break;
1329 
1330 		// lock the node
1331 		AutoLocker<Vnode> nodeLocker(vnode);
1332 
1333 		// Check whether the node is still unused -- since we only append to the
1334 		// tail of the unused queue, the vnode should still be at its head.
1335 		// Alternatively we could check its ref count for 0 and its busy flag,
1336 		// but if the node is no longer at the head of the queue, it means it
1337 		// has been touched in the meantime, i.e. it is no longer the least
1338 		// recently used unused vnode and we rather don't free it.
1339 		unusedVnodesLocker.Lock();
1340 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1341 			continue;
1342 		unusedVnodesLocker.Unlock();
1343 
1344 		ASSERT(!vnode->IsBusy());
1345 
1346 		// grab a reference
1347 		inc_vnode_ref_count(vnode);
1348 		vnode_used(vnode);
1349 
1350 		// write back changes and free the node
1351 		nodeLocker.Unlock();
1352 		vnodesReadLocker.Unlock();
1353 
1354 		if (vnode->cache != NULL)
1355 			vnode->cache->WriteModified();
1356 
1357 		dec_vnode_ref_count(vnode, true, false);
1358 			// this should free the vnode when it's still unused
1359 	}
1360 
1361 	unused_vnodes_check_done();
1362 }
1363 
1364 
1365 /*!	Gets the vnode the given vnode is covering.
1366 
1367 	The caller must have \c sVnodeLock read-locked at least.
1368 
1369 	The function returns a reference to the retrieved vnode (if any), the caller
1370 	is responsible to free.
1371 
1372 	\param vnode The vnode whose covered node shall be returned.
1373 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1374 		vnode.
1375 */
1376 static inline Vnode*
1377 get_covered_vnode_locked(Vnode* vnode)
1378 {
1379 	if (Vnode* coveredNode = vnode->covers) {
1380 		while (coveredNode->covers != NULL)
1381 			coveredNode = coveredNode->covers;
1382 
1383 		inc_vnode_ref_count(coveredNode);
1384 		return coveredNode;
1385 	}
1386 
1387 	return NULL;
1388 }
1389 
1390 
1391 /*!	Gets the vnode the given vnode is covering.
1392 
1393 	The caller must not hold \c sVnodeLock. Note that this implies a race
1394 	condition, since the situation can change at any time.
1395 
1396 	The function returns a reference to the retrieved vnode (if any), the caller
1397 	is responsible to free.
1398 
1399 	\param vnode The vnode whose covered node shall be returned.
1400 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1401 		vnode.
1402 */
1403 static inline Vnode*
1404 get_covered_vnode(Vnode* vnode)
1405 {
1406 	if (!vnode->IsCovering())
1407 		return NULL;
1408 
1409 	ReadLocker vnodeReadLocker(sVnodeLock);
1410 	return get_covered_vnode_locked(vnode);
1411 }
1412 
1413 
1414 /*!	Gets the vnode the given vnode is covered by.
1415 
1416 	The caller must have \c sVnodeLock read-locked at least.
1417 
1418 	The function returns a reference to the retrieved vnode (if any), the caller
1419 	is responsible to free.
1420 
1421 	\param vnode The vnode whose covering node shall be returned.
1422 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1423 		any vnode.
1424 */
1425 static Vnode*
1426 get_covering_vnode_locked(Vnode* vnode)
1427 {
1428 	if (Vnode* coveringNode = vnode->covered_by) {
1429 		while (coveringNode->covered_by != NULL)
1430 			coveringNode = coveringNode->covered_by;
1431 
1432 		inc_vnode_ref_count(coveringNode);
1433 		return coveringNode;
1434 	}
1435 
1436 	return NULL;
1437 }
1438 
1439 
1440 /*!	Gets the vnode the given vnode is covered by.
1441 
1442 	The caller must not hold \c sVnodeLock. Note that this implies a race
1443 	condition, since the situation can change at any time.
1444 
1445 	The function returns a reference to the retrieved vnode (if any), the caller
1446 	is responsible to free.
1447 
1448 	\param vnode The vnode whose covering node shall be returned.
1449 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1450 		any vnode.
1451 */
1452 static inline Vnode*
1453 get_covering_vnode(Vnode* vnode)
1454 {
1455 	if (!vnode->IsCovered())
1456 		return NULL;
1457 
1458 	ReadLocker vnodeReadLocker(sVnodeLock);
1459 	return get_covering_vnode_locked(vnode);
1460 }
1461 
1462 
1463 static void
1464 free_unused_vnodes()
1465 {
1466 	free_unused_vnodes(
1467 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1468 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1469 }
1470 
1471 
1472 static void
1473 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1474 {
1475 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1476 
1477 	free_unused_vnodes(level);
1478 }
1479 
1480 
1481 static inline void
1482 put_advisory_locking(struct advisory_locking* locking)
1483 {
1484 	release_sem(locking->lock);
1485 }
1486 
1487 
1488 /*!	Returns the advisory_locking object of the \a vnode in case it
1489 	has one, and locks it.
1490 	You have to call put_advisory_locking() when you're done with
1491 	it.
1492 	Note, you must not have the vnode mutex locked when calling
1493 	this function.
1494 */
1495 static struct advisory_locking*
1496 get_advisory_locking(struct vnode* vnode)
1497 {
1498 	rw_lock_read_lock(&sVnodeLock);
1499 	vnode->Lock();
1500 
1501 	struct advisory_locking* locking = vnode->advisory_locking;
1502 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1503 
1504 	vnode->Unlock();
1505 	rw_lock_read_unlock(&sVnodeLock);
1506 
1507 	if (lock >= 0)
1508 		lock = acquire_sem(lock);
1509 	if (lock < 0) {
1510 		// This means the locking has been deleted in the mean time
1511 		// or had never existed in the first place - otherwise, we
1512 		// would get the lock at some point.
1513 		return NULL;
1514 	}
1515 
1516 	return locking;
1517 }
1518 
1519 
1520 /*!	Creates a locked advisory_locking object, and attaches it to the
1521 	given \a vnode.
1522 	Returns B_OK in case of success - also if the vnode got such an
1523 	object from someone else in the mean time, you'll still get this
1524 	one locked then.
1525 */
1526 static status_t
1527 create_advisory_locking(struct vnode* vnode)
1528 {
1529 	if (vnode == NULL)
1530 		return B_FILE_ERROR;
1531 
1532 	ObjectDeleter<advisory_locking> lockingDeleter;
1533 	struct advisory_locking* locking = NULL;
1534 
1535 	while (get_advisory_locking(vnode) == NULL) {
1536 		// no locking object set on the vnode yet, create one
1537 		if (locking == NULL) {
1538 			locking = new(std::nothrow) advisory_locking;
1539 			if (locking == NULL)
1540 				return B_NO_MEMORY;
1541 			lockingDeleter.SetTo(locking);
1542 
1543 			locking->wait_sem = create_sem(0, "advisory lock");
1544 			if (locking->wait_sem < 0)
1545 				return locking->wait_sem;
1546 
1547 			locking->lock = create_sem(0, "advisory locking");
1548 			if (locking->lock < 0)
1549 				return locking->lock;
1550 		}
1551 
1552 		// set our newly created locking object
1553 		ReadLocker _(sVnodeLock);
1554 		AutoLocker<Vnode> nodeLocker(vnode);
1555 		if (vnode->advisory_locking == NULL) {
1556 			vnode->advisory_locking = locking;
1557 			lockingDeleter.Detach();
1558 			return B_OK;
1559 		}
1560 	}
1561 
1562 	// The vnode already had a locking object. That's just as well.
1563 
1564 	return B_OK;
1565 }
1566 
1567 
1568 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1569 	with the advisory_lock \a lock.
1570 */
1571 static bool
1572 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1573 {
1574 	if (flock == NULL)
1575 		return true;
1576 
1577 	return lock->start <= flock->l_start - 1 + flock->l_len
1578 		&& lock->end >= flock->l_start;
1579 }
1580 
1581 
1582 /*!	Tests whether acquiring a lock would block.
1583 */
1584 static status_t
1585 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1586 {
1587 	flock->l_type = F_UNLCK;
1588 
1589 	struct advisory_locking* locking = get_advisory_locking(vnode);
1590 	if (locking == NULL)
1591 		return B_OK;
1592 
1593 	team_id team = team_get_current_team_id();
1594 
1595 	LockList::Iterator iterator = locking->locks.GetIterator();
1596 	while (iterator.HasNext()) {
1597 		struct advisory_lock* lock = iterator.Next();
1598 
1599 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1600 			// locks do overlap
1601 			if (flock->l_type != F_RDLCK || !lock->shared) {
1602 				// collision
1603 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1604 				flock->l_whence = SEEK_SET;
1605 				flock->l_start = lock->start;
1606 				flock->l_len = lock->end - lock->start + 1;
1607 				flock->l_pid = lock->team;
1608 				break;
1609 			}
1610 		}
1611 	}
1612 
1613 	put_advisory_locking(locking);
1614 	return B_OK;
1615 }
1616 
1617 
1618 /*!	Removes the specified lock, or all locks of the calling team
1619 	if \a flock is NULL.
1620 */
1621 static status_t
1622 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1623 	struct file_descriptor* descriptor, struct flock* flock)
1624 {
1625 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1626 
1627 	struct advisory_locking* locking = get_advisory_locking(vnode);
1628 	if (locking == NULL)
1629 		return B_OK;
1630 
1631 	// find matching lock entries
1632 
1633 	LockList::Iterator iterator = locking->locks.GetIterator();
1634 	while (iterator.HasNext()) {
1635 		struct advisory_lock* lock = iterator.Next();
1636 		bool removeLock = false;
1637 
1638 		if (descriptor != NULL && lock->bound_to == descriptor) {
1639 			// Remove flock() locks
1640 			removeLock = true;
1641 		} else if (lock->bound_to == context
1642 				&& advisory_lock_intersects(lock, flock)) {
1643 			// Remove POSIX locks
1644 			bool endsBeyond = false;
1645 			bool startsBefore = false;
1646 			if (flock != NULL) {
1647 				startsBefore = lock->start < flock->l_start;
1648 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1649 			}
1650 
1651 			if (!startsBefore && !endsBeyond) {
1652 				// lock is completely contained in flock
1653 				removeLock = true;
1654 			} else if (startsBefore && !endsBeyond) {
1655 				// cut the end of the lock
1656 				lock->end = flock->l_start - 1;
1657 			} else if (!startsBefore && endsBeyond) {
1658 				// cut the start of the lock
1659 				lock->start = flock->l_start + flock->l_len;
1660 			} else {
1661 				// divide the lock into two locks
1662 				struct advisory_lock* secondLock = new advisory_lock;
1663 				if (secondLock == NULL) {
1664 					// TODO: we should probably revert the locks we already
1665 					// changed... (ie. allocate upfront)
1666 					put_advisory_locking(locking);
1667 					return B_NO_MEMORY;
1668 				}
1669 
1670 				lock->end = flock->l_start - 1;
1671 
1672 				secondLock->bound_to = context;
1673 				secondLock->team = lock->team;
1674 				secondLock->session = lock->session;
1675 				// values must already be normalized when getting here
1676 				secondLock->start = flock->l_start + flock->l_len;
1677 				secondLock->end = lock->end;
1678 				secondLock->shared = lock->shared;
1679 
1680 				locking->locks.Add(secondLock);
1681 			}
1682 		}
1683 
1684 		if (removeLock) {
1685 			// this lock is no longer used
1686 			iterator.Remove();
1687 			delete lock;
1688 		}
1689 	}
1690 
1691 	bool removeLocking = locking->locks.IsEmpty();
1692 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1693 
1694 	put_advisory_locking(locking);
1695 
1696 	if (removeLocking) {
1697 		// We can remove the whole advisory locking structure; it's no
1698 		// longer used
1699 		locking = get_advisory_locking(vnode);
1700 		if (locking != NULL) {
1701 			ReadLocker locker(sVnodeLock);
1702 			AutoLocker<Vnode> nodeLocker(vnode);
1703 
1704 			// the locking could have been changed in the mean time
1705 			if (locking->locks.IsEmpty()) {
1706 				vnode->advisory_locking = NULL;
1707 				nodeLocker.Unlock();
1708 				locker.Unlock();
1709 
1710 				// we've detached the locking from the vnode, so we can
1711 				// safely delete it
1712 				delete locking;
1713 			} else {
1714 				// the locking is in use again
1715 				nodeLocker.Unlock();
1716 				locker.Unlock();
1717 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1718 			}
1719 		}
1720 	}
1721 
1722 	return B_OK;
1723 }
1724 
1725 
1726 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1727 	will wait for the lock to become available, if there are any collisions
1728 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1729 
1730 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1731 	BSD flock() semantics are used, that is, all children can unlock the file
1732 	in question (we even allow parents to remove the lock, though, but that
1733 	seems to be in line to what the BSD's are doing).
1734 */
1735 static status_t
1736 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1737 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1738 {
1739 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1740 		vnode, flock, wait ? "yes" : "no"));
1741 
1742 	bool shared = flock->l_type == F_RDLCK;
1743 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1744 	status_t status = B_OK;
1745 
1746 	// TODO: do deadlock detection!
1747 
1748 	struct advisory_locking* locking;
1749 
1750 	while (true) {
1751 		// if this vnode has an advisory_locking structure attached,
1752 		// lock that one and search for any colliding file lock
1753 		status = create_advisory_locking(vnode);
1754 		if (status != B_OK)
1755 			return status;
1756 
1757 		locking = vnode->advisory_locking;
1758 		team_id team = team_get_current_team_id();
1759 		sem_id waitForLock = -1;
1760 
1761 		// test for collisions
1762 		LockList::Iterator iterator = locking->locks.GetIterator();
1763 		while (iterator.HasNext()) {
1764 			struct advisory_lock* lock = iterator.Next();
1765 
1766 			// TODO: locks from the same team might be joinable!
1767 			if ((lock->team != team || lock->bound_to != boundTo)
1768 					&& advisory_lock_intersects(lock, flock)) {
1769 				// locks do overlap
1770 				if (!shared || !lock->shared) {
1771 					// we need to wait
1772 					waitForLock = locking->wait_sem;
1773 					break;
1774 				}
1775 			}
1776 		}
1777 
1778 		if (waitForLock < 0)
1779 			break;
1780 
1781 		// We need to wait. Do that or fail now, if we've been asked not to.
1782 
1783 		if (!wait) {
1784 			put_advisory_locking(locking);
1785 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1786 		}
1787 
1788 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1789 			B_CAN_INTERRUPT, 0);
1790 		if (status != B_OK && status != B_BAD_SEM_ID)
1791 			return status;
1792 
1793 		// We have been notified, but we need to re-lock the locking object. So
1794 		// go another round...
1795 	}
1796 
1797 	// install new lock
1798 
1799 	struct advisory_lock* lock = new(std::nothrow) advisory_lock;
1800 	if (lock == NULL) {
1801 		put_advisory_locking(locking);
1802 		return B_NO_MEMORY;
1803 	}
1804 
1805 	lock->bound_to = boundTo;
1806 	lock->team = team_get_current_team_id();
1807 	lock->session = thread_get_current_thread()->team->session_id;
1808 	// values must already be normalized when getting here
1809 	lock->start = flock->l_start;
1810 	lock->end = flock->l_start - 1 + flock->l_len;
1811 	lock->shared = shared;
1812 
1813 	locking->locks.Add(lock);
1814 	put_advisory_locking(locking);
1815 
1816 	return status;
1817 }
1818 
1819 
1820 /*!	Normalizes the \a flock structure to make it easier to compare the
1821 	structure with others. The l_start and l_len fields are set to absolute
1822 	values according to the l_whence field.
1823 */
1824 static status_t
1825 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1826 {
1827 	switch (flock->l_whence) {
1828 		case SEEK_SET:
1829 			break;
1830 		case SEEK_CUR:
1831 			flock->l_start += descriptor->pos;
1832 			break;
1833 		case SEEK_END:
1834 		{
1835 			struct vnode* vnode = descriptor->u.vnode;
1836 			struct stat stat;
1837 			status_t status;
1838 
1839 			if (!HAS_FS_CALL(vnode, read_stat))
1840 				return B_UNSUPPORTED;
1841 
1842 			status = FS_CALL(vnode, read_stat, &stat);
1843 			if (status != B_OK)
1844 				return status;
1845 
1846 			flock->l_start += stat.st_size;
1847 			break;
1848 		}
1849 		default:
1850 			return B_BAD_VALUE;
1851 	}
1852 
1853 	if (flock->l_start < 0)
1854 		flock->l_start = 0;
1855 	if (flock->l_len == 0)
1856 		flock->l_len = OFF_MAX;
1857 
1858 	// don't let the offset and length overflow
1859 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1860 		flock->l_len = OFF_MAX - flock->l_start;
1861 
1862 	if (flock->l_len < 0) {
1863 		// a negative length reverses the region
1864 		flock->l_start += flock->l_len;
1865 		flock->l_len = -flock->l_len;
1866 	}
1867 
1868 	return B_OK;
1869 }
1870 
1871 
1872 static void
1873 replace_vnode_if_disconnected(struct fs_mount* mount,
1874 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1875 	struct vnode* fallBack, bool lockRootLock)
1876 {
1877 	struct vnode* givenVnode = vnode;
1878 	bool vnodeReplaced = false;
1879 
1880 	ReadLocker vnodeReadLocker(sVnodeLock);
1881 
1882 	if (lockRootLock)
1883 		mutex_lock(&sIOContextRootLock);
1884 
1885 	while (vnode != NULL && vnode->mount == mount
1886 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1887 		if (vnode->covers != NULL) {
1888 			// redirect the vnode to the covered vnode
1889 			vnode = vnode->covers;
1890 		} else
1891 			vnode = fallBack;
1892 
1893 		vnodeReplaced = true;
1894 	}
1895 
1896 	// If we've replaced the node, grab a reference for the new one.
1897 	if (vnodeReplaced && vnode != NULL)
1898 		inc_vnode_ref_count(vnode);
1899 
1900 	if (lockRootLock)
1901 		mutex_unlock(&sIOContextRootLock);
1902 
1903 	vnodeReadLocker.Unlock();
1904 
1905 	if (vnodeReplaced)
1906 		put_vnode(givenVnode);
1907 }
1908 
1909 
1910 /*!	Disconnects all file descriptors that are associated with the
1911 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1912 	\a mount object.
1913 
1914 	Note, after you've called this function, there might still be ongoing
1915 	accesses - they won't be interrupted if they already happened before.
1916 	However, any subsequent access will fail.
1917 
1918 	This is not a cheap function and should be used with care and rarely.
1919 	TODO: there is currently no means to stop a blocking read/write!
1920 */
1921 static void
1922 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1923 	struct vnode* vnodeToDisconnect)
1924 {
1925 	// iterate over all teams and peek into their file descriptors
1926 	TeamListIterator teamIterator;
1927 	while (Team* team = teamIterator.Next()) {
1928 		BReference<Team> teamReference(team, true);
1929 		TeamLocker teamLocker(team);
1930 
1931 		// lock the I/O context
1932 		io_context* context = team->io_context;
1933 		if (context == NULL)
1934 			continue;
1935 		MutexLocker contextLocker(context->io_mutex);
1936 
1937 		teamLocker.Unlock();
1938 
1939 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1940 			sRoot, true);
1941 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1942 			sRoot, false);
1943 
1944 		for (uint32 i = 0; i < context->table_size; i++) {
1945 			struct file_descriptor* descriptor = context->fds[i];
1946 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1947 				continue;
1948 
1949 			inc_fd_ref_count(descriptor);
1950 
1951 			// if this descriptor points at this mount, we
1952 			// need to disconnect it to be able to unmount
1953 			struct vnode* vnode = fd_vnode(descriptor);
1954 			if (vnodeToDisconnect != NULL) {
1955 				if (vnode == vnodeToDisconnect)
1956 					disconnect_fd(descriptor);
1957 			} else if ((vnode != NULL && vnode->mount == mount)
1958 				|| (vnode == NULL && descriptor->u.mount == mount))
1959 				disconnect_fd(descriptor);
1960 
1961 			put_fd(descriptor);
1962 		}
1963 	}
1964 }
1965 
1966 
1967 /*!	\brief Gets the root node of the current IO context.
1968 	If \a kernel is \c true, the kernel IO context will be used.
1969 	The caller obtains a reference to the returned node.
1970 */
1971 struct vnode*
1972 get_root_vnode(bool kernel)
1973 {
1974 	if (!kernel) {
1975 		// Get current working directory from io context
1976 		struct io_context* context = get_current_io_context(kernel);
1977 
1978 		mutex_lock(&sIOContextRootLock);
1979 
1980 		struct vnode* root = context->root;
1981 		if (root != NULL)
1982 			inc_vnode_ref_count(root);
1983 
1984 		mutex_unlock(&sIOContextRootLock);
1985 
1986 		if (root != NULL)
1987 			return root;
1988 
1989 		// That should never happen.
1990 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1991 			"have a root\n", team_get_current_team_id());
1992 	}
1993 
1994 	inc_vnode_ref_count(sRoot);
1995 	return sRoot;
1996 }
1997 
1998 
1999 /*!	\brief Gets the directory path and leaf name for a given path.
2000 
2001 	The supplied \a path is transformed to refer to the directory part of
2002 	the entry identified by the original path, and into the buffer \a filename
2003 	the leaf name of the original entry is written.
2004 	Neither the returned path nor the leaf name can be expected to be
2005 	canonical.
2006 
2007 	\param path The path to be analyzed. Must be able to store at least one
2008 		   additional character.
2009 	\param filename The buffer into which the leaf name will be written.
2010 		   Must be of size B_FILE_NAME_LENGTH at least.
2011 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2012 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2013 		   if the given path name is empty.
2014 */
2015 static status_t
2016 get_dir_path_and_leaf(char* path, char* filename)
2017 {
2018 	if (*path == '\0')
2019 		return B_ENTRY_NOT_FOUND;
2020 
2021 	char* last = strrchr(path, '/');
2022 		// '/' are not allowed in file names!
2023 
2024 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2025 
2026 	if (last == NULL) {
2027 		// this path is single segment with no '/' in it
2028 		// ex. "foo"
2029 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2030 			return B_NAME_TOO_LONG;
2031 
2032 		strcpy(path, ".");
2033 	} else {
2034 		last++;
2035 		if (last[0] == '\0') {
2036 			// special case: the path ends in one or more '/' - remove them
2037 			while (*--last == '/' && last != path);
2038 			last[1] = '\0';
2039 
2040 			if (last == path && last[0] == '/') {
2041 				// This path points to the root of the file system
2042 				strcpy(filename, ".");
2043 				return B_OK;
2044 			}
2045 			for (; last != path && *(last - 1) != '/'; last--);
2046 				// rewind to the start of the leaf before the '/'
2047 		}
2048 
2049 		// normal leaf: replace the leaf portion of the path with a '.'
2050 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2051 			return B_NAME_TOO_LONG;
2052 
2053 		last[0] = '.';
2054 		last[1] = '\0';
2055 	}
2056 	return B_OK;
2057 }
2058 
2059 
2060 static status_t
2061 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2062 	bool traverse, bool kernel, VnodePutter& _vnode)
2063 {
2064 	char clonedName[B_FILE_NAME_LENGTH + 1];
2065 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2066 		return B_NAME_TOO_LONG;
2067 
2068 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2069 	struct vnode* directory;
2070 
2071 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2072 	if (status < 0)
2073 		return status;
2074 
2075 	return vnode_path_to_vnode(directory, clonedName, traverse, kernel,
2076 		_vnode, NULL);
2077 }
2078 
2079 
2080 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2081 	and returns the respective vnode.
2082 	On success a reference to the vnode is acquired for the caller.
2083 */
2084 static status_t
2085 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2086 {
2087 	ino_t id;
2088 	bool missing;
2089 
2090 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2091 		return missing ? B_ENTRY_NOT_FOUND
2092 			: get_vnode(dir->device, id, _vnode, true, false);
2093 	}
2094 
2095 	status_t status = FS_CALL(dir, lookup, name, &id);
2096 	if (status != B_OK)
2097 		return status;
2098 
2099 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2100 	// have a reference and just need to look the node up.
2101 	rw_lock_read_lock(&sVnodeLock);
2102 	*_vnode = lookup_vnode(dir->device, id);
2103 	rw_lock_read_unlock(&sVnodeLock);
2104 
2105 	if (*_vnode == NULL) {
2106 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2107 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2108 		return B_ENTRY_NOT_FOUND;
2109 	}
2110 
2111 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2112 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2113 //		(*_vnode)->mount->id, (*_vnode)->id);
2114 
2115 	return B_OK;
2116 }
2117 
2118 
2119 /*!	Returns the vnode for the relative \a path starting at the specified \a vnode.
2120 
2121 	\param[in,out] path The relative path being searched. Must not be NULL.
2122 	If the function returns successfully, \a path contains the name of the last path
2123 	component. This function clobbers the buffer pointed to by \a path only
2124 	if it does contain more than one component.
2125 
2126 	If the function fails and leafName is not NULL, \a _vnode contains the last directory,
2127 	the caller has the responsibility to call put_vnode() on it.
2128 
2129 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2130 	it is successful or not!
2131 
2132 	\param[out] _vnode If the function returns B_OK, points to the found node.
2133 	\param[out] _vnode If the function returns something else and leafname is not NULL: set to the
2134 		last existing directory in the path. The caller has responsibility to release it using
2135 		put_vnode().
2136 	\param[out] _vnode If the function returns something else and leafname is NULL: not used.
2137 */
2138 static status_t
2139 vnode_path_to_vnode(struct vnode* start, char* path, bool traverseLeafLink,
2140 	int count, struct io_context* ioContext, VnodePutter& _vnode,
2141 	ino_t* _parentID, char* leafName)
2142 {
2143 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2144 	ASSERT(!_vnode.IsSet());
2145 
2146 	VnodePutter vnode(start);
2147 
2148 	if (path == NULL)
2149 		return B_BAD_VALUE;
2150 	if (*path == '\0')
2151 		return B_ENTRY_NOT_FOUND;
2152 
2153 	status_t status = B_OK;
2154 	ino_t lastParentID = vnode->id;
2155 	while (true) {
2156 		char* nextPath;
2157 
2158 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2159 			path));
2160 
2161 		// done?
2162 		if (path[0] == '\0')
2163 			break;
2164 
2165 		// walk to find the next path component ("path" will point to a single
2166 		// path component), and filter out multiple slashes
2167 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2168 				nextPath++);
2169 
2170 		bool directoryFound = false;
2171 		if (*nextPath == '/') {
2172 			directoryFound = true;
2173 			*nextPath = '\0';
2174 			do
2175 				nextPath++;
2176 			while (*nextPath == '/');
2177 		}
2178 
2179 		// See if the '..' is at a covering vnode move to the covered
2180 		// vnode so we pass the '..' path to the underlying filesystem.
2181 		// Also prevent breaking the root of the IO context.
2182 		if (strcmp("..", path) == 0) {
2183 			if (vnode.Get() == ioContext->root) {
2184 				// Attempted prison break! Keep it contained.
2185 				path = nextPath;
2186 				continue;
2187 			}
2188 
2189 			if (Vnode* coveredVnode = get_covered_vnode(vnode.Get()))
2190 				vnode.SetTo(coveredVnode);
2191 		}
2192 
2193 		// check if vnode is really a directory
2194 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2195 			status = B_NOT_A_DIRECTORY;
2196 
2197 		// Check if we have the right to search the current directory vnode.
2198 		// If a file system doesn't have the access() function, we assume that
2199 		// searching a directory is always allowed
2200 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2201 			status = FS_CALL(vnode.Get(), access, X_OK);
2202 
2203 		// Tell the filesystem to get the vnode of this path component (if we
2204 		// got the permission from the call above)
2205 		VnodePutter nextVnode;
2206 		if (status == B_OK) {
2207 			struct vnode* temp = NULL;
2208 			status = lookup_dir_entry(vnode.Get(), path, &temp);
2209 			nextVnode.SetTo(temp);
2210 		}
2211 
2212 		if (status != B_OK) {
2213 			if (leafName != NULL) {
2214 				strlcpy(leafName, path, B_FILE_NAME_LENGTH);
2215 				_vnode.SetTo(vnode.Detach());
2216 			}
2217 			return status;
2218 		}
2219 
2220 		// If the new node is a symbolic link, resolve it (if we've been told
2221 		// to do it)
2222 		if (S_ISLNK(nextVnode->Type())
2223 			&& (traverseLeafLink || directoryFound)) {
2224 			size_t bufferSize;
2225 			char* buffer;
2226 
2227 			TRACE(("traverse link\n"));
2228 
2229 			if (count + 1 > B_MAX_SYMLINKS)
2230 				return B_LINK_LIMIT;
2231 
2232 			bufferSize = B_PATH_NAME_LENGTH;
2233 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2234 			if (buffer == NULL)
2235 				return B_NO_MEMORY;
2236 
2237 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2238 				bufferSize--;
2239 				status = FS_CALL(nextVnode.Get(), read_symlink, buffer, &bufferSize);
2240 				// null-terminate
2241 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2242 					buffer[bufferSize] = '\0';
2243 			} else
2244 				status = B_BAD_VALUE;
2245 
2246 			if (status != B_OK) {
2247 				free(buffer);
2248 				return status;
2249 			}
2250 			nextVnode.Unset();
2251 
2252 			// Check if we start from the root directory or the current
2253 			// directory ("vnode" still points to that one).
2254 			// Cut off all leading slashes if it's the root directory
2255 			path = buffer;
2256 			bool absoluteSymlink = false;
2257 			if (path[0] == '/') {
2258 				// we don't need the old directory anymore
2259 				vnode.Unset();
2260 
2261 				while (*++path == '/')
2262 					;
2263 
2264 				mutex_lock(&sIOContextRootLock);
2265 				vnode.SetTo(ioContext->root);
2266 				inc_vnode_ref_count(vnode.Get());
2267 				mutex_unlock(&sIOContextRootLock);
2268 
2269 				absoluteSymlink = true;
2270 			}
2271 
2272 			inc_vnode_ref_count(vnode.Get());
2273 				// balance the next recursion - we will decrement the
2274 				// ref_count of the vnode, no matter if we succeeded or not
2275 
2276 			if (absoluteSymlink && *path == '\0') {
2277 				// symlink was just "/"
2278 				nextVnode.SetTo(vnode.Get());
2279 			} else {
2280 				status = vnode_path_to_vnode(vnode.Get(), path, true, count + 1,
2281 					ioContext, nextVnode, &lastParentID, leafName);
2282 			}
2283 
2284 			object_cache_free(sPathNameCache, buffer, 0);
2285 
2286 			if (status != B_OK) {
2287 				if (leafName != NULL)
2288 					_vnode.SetTo(nextVnode.Detach());
2289 				return status;
2290 			}
2291 		} else
2292 			lastParentID = vnode->id;
2293 
2294 		// decrease the ref count on the old dir we just looked up into
2295 		vnode.Unset();
2296 
2297 		path = nextPath;
2298 		vnode.SetTo(nextVnode.Detach());
2299 
2300 		// see if we hit a covered node
2301 		if (Vnode* coveringNode = get_covering_vnode(vnode.Get()))
2302 			vnode.SetTo(coveringNode);
2303 	}
2304 
2305 	_vnode.SetTo(vnode.Detach());
2306 	if (_parentID)
2307 		*_parentID = lastParentID;
2308 
2309 	return B_OK;
2310 }
2311 
2312 
2313 static status_t
2314 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2315 	bool kernel, VnodePutter& _vnode, ino_t* _parentID, char* leafName)
2316 {
2317 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2318 		get_current_io_context(kernel), _vnode, _parentID, leafName);
2319 }
2320 
2321 
2322 static status_t
2323 path_to_vnode(char* path, bool traverseLink, VnodePutter& _vnode,
2324 	ino_t* _parentID, bool kernel)
2325 {
2326 	struct vnode* start = NULL;
2327 
2328 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2329 
2330 	if (!path)
2331 		return B_BAD_VALUE;
2332 
2333 	if (*path == '\0')
2334 		return B_ENTRY_NOT_FOUND;
2335 
2336 	// figure out if we need to start at root or at cwd
2337 	if (*path == '/') {
2338 		if (sRoot == NULL) {
2339 			// we're a bit early, aren't we?
2340 			return B_ERROR;
2341 		}
2342 
2343 		while (*++path == '/')
2344 			;
2345 		start = get_root_vnode(kernel);
2346 
2347 		if (*path == '\0') {
2348 			_vnode.SetTo(start);
2349 			return B_OK;
2350 		}
2351 
2352 	} else {
2353 		struct io_context* context = get_current_io_context(kernel);
2354 
2355 		mutex_lock(&context->io_mutex);
2356 		start = context->cwd;
2357 		if (start != NULL)
2358 			inc_vnode_ref_count(start);
2359 		mutex_unlock(&context->io_mutex);
2360 
2361 		if (start == NULL)
2362 			return B_ERROR;
2363 	}
2364 
2365 	return vnode_path_to_vnode(start, path, traverseLink, kernel, _vnode,
2366 		_parentID);
2367 }
2368 
2369 
2370 /*! Returns the vnode in the next to last segment of the path, and returns
2371 	the last portion in filename.
2372 	The path buffer must be able to store at least one additional character.
2373 */
2374 static status_t
2375 path_to_dir_vnode(char* path, VnodePutter& _vnode, char* filename,
2376 	bool kernel)
2377 {
2378 	status_t status = get_dir_path_and_leaf(path, filename);
2379 	if (status != B_OK)
2380 		return status;
2381 
2382 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2383 }
2384 
2385 
2386 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2387 		   to by a FD + path pair.
2388 
2389 	\a path must be given in either case. \a fd might be omitted, in which
2390 	case \a path is either an absolute path or one relative to the current
2391 	directory. If both a supplied and \a path is relative it is reckoned off
2392 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2393 	ignored.
2394 
2395 	The caller has the responsibility to call put_vnode() on the returned
2396 	directory vnode.
2397 
2398 	\param fd The FD. May be < 0.
2399 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2400 	       is modified by this function. It must have at least room for a
2401 	       string one character longer than the path it contains.
2402 	\param _vnode A pointer to a variable the directory vnode shall be written
2403 		   into.
2404 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2405 		   the leaf name of the specified entry will be written.
2406 	\param kernel \c true, if invoked from inside the kernel, \c false if
2407 		   invoked from userland.
2408 	\return \c B_OK, if everything went fine, another error code otherwise.
2409 */
2410 static status_t
2411 fd_and_path_to_dir_vnode(int fd, char* path, VnodePutter& _vnode,
2412 	char* filename, bool kernel)
2413 {
2414 	if (!path)
2415 		return B_BAD_VALUE;
2416 	if (*path == '\0')
2417 		return B_ENTRY_NOT_FOUND;
2418 	if (fd < 0)
2419 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2420 
2421 	status_t status = get_dir_path_and_leaf(path, filename);
2422 	if (status != B_OK)
2423 		return status;
2424 
2425 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2426 }
2427 
2428 
2429 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2430 		   to by a vnode + path pair.
2431 
2432 	\a path must be given in either case. \a vnode might be omitted, in which
2433 	case \a path is either an absolute path or one relative to the current
2434 	directory. If both a supplied and \a path is relative it is reckoned off
2435 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2436 	ignored.
2437 
2438 	The caller has the responsibility to call put_vnode() on the returned
2439 	directory vnode.
2440 
2441 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2442 	it is successful or not.
2443 
2444 	\param vnode The vnode. May be \c NULL.
2445 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2446 	       is modified by this function. It must have at least room for a
2447 	       string one character longer than the path it contains.
2448 	\param _vnode A pointer to a variable the directory vnode shall be written
2449 		   into.
2450 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2451 		   the leaf name of the specified entry will be written.
2452 	\param kernel \c true, if invoked from inside the kernel, \c false if
2453 		   invoked from userland.
2454 	\return \c B_OK, if everything went fine, another error code otherwise.
2455 */
2456 static status_t
2457 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2458 	VnodePutter& _vnode, char* filename, bool kernel)
2459 {
2460 	VnodePutter vnodePutter(vnode);
2461 
2462 	if (!path)
2463 		return B_BAD_VALUE;
2464 	if (*path == '\0')
2465 		return B_ENTRY_NOT_FOUND;
2466 	if (vnode == NULL || path[0] == '/')
2467 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2468 
2469 	status_t status = get_dir_path_and_leaf(path, filename);
2470 	if (status != B_OK)
2471 		return status;
2472 
2473 	vnodePutter.Detach();
2474 	return vnode_path_to_vnode(vnode, path, true, kernel, _vnode, NULL);
2475 }
2476 
2477 
2478 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2479 */
2480 static status_t
2481 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2482 	size_t bufferSize, struct io_context* ioContext)
2483 {
2484 	if (bufferSize < sizeof(struct dirent))
2485 		return B_BAD_VALUE;
2486 
2487 	// See if the vnode is covering another vnode and move to the covered
2488 	// vnode so we get the underlying file system
2489 	VnodePutter vnodePutter;
2490 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2491 		vnode = coveredVnode;
2492 		vnodePutter.SetTo(vnode);
2493 	}
2494 
2495 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2496 		// The FS supports getting the name of a vnode.
2497 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2498 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2499 			return B_OK;
2500 	}
2501 
2502 	// The FS doesn't support getting the name of a vnode. So we search the
2503 	// parent directory for the vnode, if the caller let us.
2504 
2505 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2506 		return B_UNSUPPORTED;
2507 
2508 	void* cookie;
2509 
2510 	status_t status = FS_CALL(parent, open_dir, &cookie);
2511 	if (status >= B_OK) {
2512 		while (true) {
2513 			uint32 num = 1;
2514 			// We use the FS hook directly instead of dir_read(), since we don't
2515 			// want the entries to be fixed. We have already resolved vnode to
2516 			// the covered node.
2517 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2518 				&num);
2519 			if (status != B_OK)
2520 				break;
2521 			if (num == 0) {
2522 				status = B_ENTRY_NOT_FOUND;
2523 				break;
2524 			}
2525 
2526 			if (vnode->id == buffer->d_ino) {
2527 				// found correct entry!
2528 				break;
2529 			}
2530 		}
2531 
2532 		FS_CALL(parent, close_dir, cookie);
2533 		FS_CALL(parent, free_dir_cookie, cookie);
2534 	}
2535 	return status;
2536 }
2537 
2538 
2539 static status_t
2540 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2541 	size_t nameSize, bool kernel)
2542 {
2543 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2544 	struct dirent* dirent = (struct dirent*)buffer;
2545 
2546 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2547 		get_current_io_context(kernel));
2548 	if (status != B_OK)
2549 		return status;
2550 
2551 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2552 		return B_BUFFER_OVERFLOW;
2553 
2554 	return B_OK;
2555 }
2556 
2557 
2558 /*!	Gets the full path to a given directory vnode.
2559 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2560 	file system doesn't support this call, it will fall back to iterating
2561 	through the parent directory to get the name of the child.
2562 
2563 	To protect against circular loops, it supports a maximum tree depth
2564 	of 256 levels.
2565 
2566 	Note that the path may not be correct the time this function returns!
2567 	It doesn't use any locking to prevent returning the correct path, as
2568 	paths aren't safe anyway: the path to a file can change at any time.
2569 
2570 	It might be a good idea, though, to check if the returned path exists
2571 	in the calling function (it's not done here because of efficiency)
2572 */
2573 static status_t
2574 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2575 	bool kernel)
2576 {
2577 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2578 
2579 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2580 		return B_BAD_VALUE;
2581 
2582 	if (!S_ISDIR(vnode->Type()))
2583 		return B_NOT_A_DIRECTORY;
2584 
2585 	char* path = buffer;
2586 	int32 insert = bufferSize;
2587 	int32 maxLevel = 256;
2588 	int32 length;
2589 	status_t status = B_OK;
2590 	struct io_context* ioContext = get_current_io_context(kernel);
2591 
2592 	// we don't use get_vnode() here because this call is more
2593 	// efficient and does all we need from get_vnode()
2594 	inc_vnode_ref_count(vnode);
2595 
2596 	path[--insert] = '\0';
2597 		// the path is filled right to left
2598 
2599 	while (true) {
2600 		// If the node is the context's root, bail out. Otherwise resolve mount
2601 		// points.
2602 		if (vnode == ioContext->root)
2603 			break;
2604 
2605 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2606 			put_vnode(vnode);
2607 			vnode = coveredVnode;
2608 		}
2609 
2610 		// lookup the parent vnode
2611 		struct vnode* parentVnode;
2612 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2613 		if (status != B_OK)
2614 			goto out;
2615 
2616 		if (parentVnode == vnode) {
2617 			// The caller apparently got their hands on a node outside of their
2618 			// context's root. Now we've hit the global root.
2619 			put_vnode(parentVnode);
2620 			break;
2621 		}
2622 
2623 		// get the node's name
2624 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2625 			// also used for fs_read_dir()
2626 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2627 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2628 			sizeof(nameBuffer), ioContext);
2629 
2630 		// release the current vnode, we only need its parent from now on
2631 		put_vnode(vnode);
2632 		vnode = parentVnode;
2633 
2634 		if (status != B_OK)
2635 			goto out;
2636 
2637 		// TODO: add an explicit check for loops in about 10 levels to do
2638 		// real loop detection
2639 
2640 		// don't go deeper as 'maxLevel' to prevent circular loops
2641 		if (maxLevel-- < 0) {
2642 			status = B_LINK_LIMIT;
2643 			goto out;
2644 		}
2645 
2646 		// add the name in front of the current path
2647 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2648 		length = strlen(name);
2649 		insert -= length;
2650 		if (insert <= 0) {
2651 			status = B_RESULT_NOT_REPRESENTABLE;
2652 			goto out;
2653 		}
2654 		memcpy(path + insert, name, length);
2655 		path[--insert] = '/';
2656 	}
2657 
2658 	// the root dir will result in an empty path: fix it
2659 	if (path[insert] == '\0')
2660 		path[--insert] = '/';
2661 
2662 	TRACE(("  path is: %s\n", path + insert));
2663 
2664 	// move the path to the start of the buffer
2665 	length = bufferSize - insert;
2666 	memmove(buffer, path + insert, length);
2667 
2668 out:
2669 	put_vnode(vnode);
2670 	return status;
2671 }
2672 
2673 
2674 /*!	Checks the length of every path component, and adds a '.'
2675 	if the path ends in a slash.
2676 	The given path buffer must be able to store at least one
2677 	additional character.
2678 */
2679 static status_t
2680 check_path(char* to)
2681 {
2682 	int32 length = 0;
2683 
2684 	// check length of every path component
2685 
2686 	while (*to) {
2687 		char* begin;
2688 		if (*to == '/')
2689 			to++, length++;
2690 
2691 		begin = to;
2692 		while (*to != '/' && *to)
2693 			to++, length++;
2694 
2695 		if (to - begin > B_FILE_NAME_LENGTH)
2696 			return B_NAME_TOO_LONG;
2697 	}
2698 
2699 	if (length == 0)
2700 		return B_ENTRY_NOT_FOUND;
2701 
2702 	// complete path if there is a slash at the end
2703 
2704 	if (*(to - 1) == '/') {
2705 		if (length > B_PATH_NAME_LENGTH - 2)
2706 			return B_NAME_TOO_LONG;
2707 
2708 		to[0] = '.';
2709 		to[1] = '\0';
2710 	}
2711 
2712 	return B_OK;
2713 }
2714 
2715 
2716 static struct file_descriptor*
2717 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2718 {
2719 	struct file_descriptor* descriptor
2720 		= get_fd(get_current_io_context(kernel), fd);
2721 	if (descriptor == NULL)
2722 		return NULL;
2723 
2724 	struct vnode* vnode = fd_vnode(descriptor);
2725 	if (vnode == NULL) {
2726 		put_fd(descriptor);
2727 		return NULL;
2728 	}
2729 
2730 	// ToDo: when we can close a file descriptor at any point, investigate
2731 	//	if this is still valid to do (accessing the vnode without ref_count
2732 	//	or locking)
2733 	*_vnode = vnode;
2734 	return descriptor;
2735 }
2736 
2737 
2738 static struct vnode*
2739 get_vnode_from_fd(int fd, bool kernel)
2740 {
2741 	struct file_descriptor* descriptor;
2742 	struct vnode* vnode;
2743 
2744 	descriptor = get_fd(get_current_io_context(kernel), fd);
2745 	if (descriptor == NULL)
2746 		return NULL;
2747 
2748 	vnode = fd_vnode(descriptor);
2749 	if (vnode != NULL)
2750 		inc_vnode_ref_count(vnode);
2751 
2752 	put_fd(descriptor);
2753 	return vnode;
2754 }
2755 
2756 
2757 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2758 	only the path will be considered. In this case, the \a path must not be
2759 	NULL.
2760 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2761 	and should be NULL for files.
2762 */
2763 static status_t
2764 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2765 	VnodePutter& _vnode, ino_t* _parentID, bool kernel)
2766 {
2767 	if (fd < 0 && !path)
2768 		return B_BAD_VALUE;
2769 
2770 	if (path != NULL && *path == '\0')
2771 		return B_ENTRY_NOT_FOUND;
2772 
2773 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2774 		// no FD or absolute path
2775 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2776 	}
2777 
2778 	// FD only, or FD + relative path
2779 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2780 	if (vnode == NULL)
2781 		return B_FILE_ERROR;
2782 
2783 	if (path != NULL) {
2784 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, kernel,
2785 			_vnode, _parentID);
2786 	}
2787 
2788 	// there is no relative path to take into account
2789 
2790 	_vnode.SetTo(vnode);
2791 	if (_parentID)
2792 		*_parentID = -1;
2793 
2794 	return B_OK;
2795 }
2796 
2797 
2798 struct vnode*
2799 fd_vnode(struct file_descriptor* descriptor)
2800 {
2801 	if (descriptor->ops == &sFileOps
2802 			|| descriptor->ops == &sDirectoryOps
2803 			|| descriptor->ops == &sAttributeOps
2804 			|| descriptor->ops == &sAttributeDirectoryOps)
2805 		return descriptor->u.vnode;
2806 
2807 	return NULL;
2808 }
2809 
2810 
2811 bool
2812 fd_is_file(struct file_descriptor* descriptor)
2813 {
2814 	return descriptor->ops == &sFileOps;
2815 }
2816 
2817 
2818 static int
2819 get_new_fd(struct fd_ops* ops, struct fs_mount* mount, struct vnode* vnode,
2820 	void* cookie, int openMode, bool kernel)
2821 {
2822 	struct file_descriptor* descriptor;
2823 	int fd;
2824 
2825 	// If the vnode is locked, we don't allow creating a new file/directory
2826 	// file_descriptor for it
2827 	if (vnode && vnode->mandatory_locked_by != NULL
2828 		&& (ops == &sFileOps || ops == &sDirectoryOps))
2829 		return B_BUSY;
2830 
2831 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2832 		return B_BAD_VALUE;
2833 
2834 	descriptor = alloc_fd();
2835 	if (!descriptor)
2836 		return B_NO_MEMORY;
2837 
2838 	if (vnode)
2839 		descriptor->u.vnode = vnode;
2840 	else
2841 		descriptor->u.mount = mount;
2842 	descriptor->cookie = cookie;
2843 
2844 	descriptor->ops = ops;
2845 	descriptor->open_mode = openMode;
2846 
2847 	if (descriptor->ops->fd_seek != NULL) {
2848 		// some kinds of files are not seekable
2849 		switch (vnode->Type() & S_IFMT) {
2850 			case S_IFIFO:
2851 			case S_IFSOCK:
2852 				ASSERT(descriptor->pos == -1);
2853 				break;
2854 
2855 			// The Open Group Base Specs don't mention any file types besides pipes,
2856 			// FIFOs, and sockets specially, so we allow seeking all others.
2857 			default:
2858 				descriptor->pos = 0;
2859 				break;
2860 		}
2861 	}
2862 
2863 	io_context* context = get_current_io_context(kernel);
2864 	fd = new_fd(context, descriptor);
2865 	if (fd < 0) {
2866 		descriptor->ops = NULL;
2867 		put_fd(descriptor);
2868 		return B_NO_MORE_FDS;
2869 	}
2870 
2871 	mutex_lock(&context->io_mutex);
2872 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2873 	mutex_unlock(&context->io_mutex);
2874 
2875 	return fd;
2876 }
2877 
2878 
2879 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2880 	vfs_normalize_path(). See there for more documentation.
2881 */
2882 static status_t
2883 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2884 {
2885 	VnodePutter dir;
2886 	status_t error;
2887 
2888 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2889 		// get dir vnode + leaf name
2890 		char leaf[B_FILE_NAME_LENGTH];
2891 		error = vnode_and_path_to_dir_vnode(dir.Detach(), path, dir, leaf, kernel);
2892 		if (error != B_OK)
2893 			return error;
2894 		strcpy(path, leaf);
2895 
2896 		// get file vnode, if we shall resolve links
2897 		bool fileExists = false;
2898 		VnodePutter fileVnode;
2899 		if (traverseLink) {
2900 			inc_vnode_ref_count(dir.Get());
2901 			if (vnode_path_to_vnode(dir.Get(), path, false, kernel, fileVnode,
2902 					NULL) == B_OK) {
2903 				fileExists = true;
2904 			}
2905 		}
2906 
2907 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2908 			// we're done -- construct the path
2909 			bool hasLeaf = true;
2910 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2911 				// special cases "." and ".." -- get the dir, forget the leaf
2912 				error = vnode_path_to_vnode(dir.Detach(), leaf, false, kernel,
2913 					dir, NULL);
2914 				if (error != B_OK)
2915 					return error;
2916 				hasLeaf = false;
2917 			}
2918 
2919 			// get the directory path
2920 			error = dir_vnode_to_path(dir.Get(), path, B_PATH_NAME_LENGTH, kernel);
2921 			if (error != B_OK)
2922 				return error;
2923 
2924 			// append the leaf name
2925 			if (hasLeaf) {
2926 				// insert a directory separator if this is not the file system
2927 				// root
2928 				if ((strcmp(path, "/") != 0
2929 					&& strlcat(path, "/", pathSize) >= pathSize)
2930 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2931 					return B_NAME_TOO_LONG;
2932 				}
2933 			}
2934 
2935 			return B_OK;
2936 		}
2937 
2938 		// read link
2939 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2940 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2941 			error = FS_CALL(fileVnode.Get(), read_symlink, path, &bufferSize);
2942 			if (error != B_OK)
2943 				return error;
2944 			if (bufferSize < B_PATH_NAME_LENGTH)
2945 				path[bufferSize] = '\0';
2946 		} else
2947 			return B_BAD_VALUE;
2948 	}
2949 
2950 	return B_LINK_LIMIT;
2951 }
2952 
2953 
2954 static status_t
2955 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2956 	struct io_context* ioContext)
2957 {
2958 	// Make sure the IO context root is not bypassed.
2959 	if (parent == ioContext->root) {
2960 		*_device = parent->device;
2961 		*_node = parent->id;
2962 		return B_OK;
2963 	}
2964 
2965 	inc_vnode_ref_count(parent);
2966 		// vnode_path_to_vnode() puts the node
2967 
2968 	// ".." is guaranteed not to be clobbered by this call
2969 	VnodePutter vnode;
2970 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false,
2971 		ioContext, vnode, NULL);
2972 	if (status == B_OK) {
2973 		*_device = vnode->device;
2974 		*_node = vnode->id;
2975 	}
2976 
2977 	return status;
2978 }
2979 
2980 
2981 #ifdef ADD_DEBUGGER_COMMANDS
2982 
2983 
2984 static void
2985 _dump_advisory_locking(advisory_locking* locking)
2986 {
2987 	if (locking == NULL)
2988 		return;
2989 
2990 	kprintf("   lock:        %" B_PRId32, locking->lock);
2991 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2992 
2993 	int32 index = 0;
2994 	LockList::Iterator iterator = locking->locks.GetIterator();
2995 	while (iterator.HasNext()) {
2996 		struct advisory_lock* lock = iterator.Next();
2997 
2998 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2999 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3000 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3001 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3002 	}
3003 }
3004 
3005 
3006 static void
3007 _dump_mount(struct fs_mount* mount)
3008 {
3009 	kprintf("MOUNT: %p\n", mount);
3010 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3011 	kprintf(" device_name:   %s\n", mount->device_name);
3012 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3013 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3014 	kprintf(" partition:     %p\n", mount->partition);
3015 	kprintf(" lock:          %p\n", &mount->lock);
3016 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3017 		mount->owns_file_device ? " owns_file_device" : "");
3018 
3019 	fs_volume* volume = mount->volume;
3020 	while (volume != NULL) {
3021 		kprintf(" volume %p:\n", volume);
3022 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3023 		kprintf("  private_volume:   %p\n", volume->private_volume);
3024 		kprintf("  ops:              %p\n", volume->ops);
3025 		kprintf("  file_system:      %p\n", volume->file_system);
3026 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3027 		volume = volume->super_volume;
3028 	}
3029 
3030 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3031 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3032 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3033 	set_debug_variable("_partition", (addr_t)mount->partition);
3034 }
3035 
3036 
3037 static bool
3038 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3039 	const char* name)
3040 {
3041 	bool insertSlash = buffer[bufferSize] != '\0';
3042 	size_t nameLength = strlen(name);
3043 
3044 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3045 		return false;
3046 
3047 	if (insertSlash)
3048 		buffer[--bufferSize] = '/';
3049 
3050 	bufferSize -= nameLength;
3051 	memcpy(buffer + bufferSize, name, nameLength);
3052 
3053 	return true;
3054 }
3055 
3056 
3057 static bool
3058 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3059 	ino_t nodeID)
3060 {
3061 	if (bufferSize == 0)
3062 		return false;
3063 
3064 	bool insertSlash = buffer[bufferSize] != '\0';
3065 	if (insertSlash)
3066 		buffer[--bufferSize] = '/';
3067 
3068 	size_t size = snprintf(buffer, bufferSize,
3069 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3070 	if (size > bufferSize) {
3071 		if (insertSlash)
3072 			bufferSize++;
3073 		return false;
3074 	}
3075 
3076 	if (size < bufferSize)
3077 		memmove(buffer + bufferSize - size, buffer, size);
3078 
3079 	bufferSize -= size;
3080 	return true;
3081 }
3082 
3083 
3084 static char*
3085 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3086 	bool& _truncated)
3087 {
3088 	// null-terminate the path
3089 	buffer[--bufferSize] = '\0';
3090 
3091 	while (true) {
3092 		while (vnode->covers != NULL)
3093 			vnode = vnode->covers;
3094 
3095 		if (vnode == sRoot) {
3096 			_truncated = bufferSize == 0;
3097 			if (!_truncated)
3098 				buffer[--bufferSize] = '/';
3099 			return buffer + bufferSize;
3100 		}
3101 
3102 		// resolve the name
3103 		ino_t dirID;
3104 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3105 			vnode->id, dirID);
3106 		if (name == NULL) {
3107 			// Failed to resolve the name -- prepend "<dev,node>/".
3108 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3109 				vnode->mount->id, vnode->id);
3110 			return buffer + bufferSize;
3111 		}
3112 
3113 		// prepend the name
3114 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3115 			_truncated = true;
3116 			return buffer + bufferSize;
3117 		}
3118 
3119 		// resolve the directory node
3120 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3121 		if (nextVnode == NULL) {
3122 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3123 				vnode->mount->id, dirID);
3124 			return buffer + bufferSize;
3125 		}
3126 
3127 		vnode = nextVnode;
3128 	}
3129 }
3130 
3131 
3132 static void
3133 _dump_vnode(struct vnode* vnode, bool printPath)
3134 {
3135 	kprintf("VNODE: %p\n", vnode);
3136 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3137 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3138 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3139 	kprintf(" private_node:  %p\n", vnode->private_node);
3140 	kprintf(" mount:         %p\n", vnode->mount);
3141 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3142 	kprintf(" covers:        %p\n", vnode->covers);
3143 	kprintf(" cache:         %p\n", vnode->cache);
3144 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3145 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3146 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3147 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3148 
3149 	_dump_advisory_locking(vnode->advisory_locking);
3150 
3151 	if (printPath) {
3152 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3153 		if (buffer != NULL) {
3154 			bool truncated;
3155 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3156 				B_PATH_NAME_LENGTH, truncated);
3157 			if (path != NULL) {
3158 				kprintf(" path:          ");
3159 				if (truncated)
3160 					kputs("<truncated>/");
3161 				kputs(path);
3162 				kputs("\n");
3163 			} else
3164 				kprintf("Failed to resolve vnode path.\n");
3165 
3166 			debug_free(buffer);
3167 		} else
3168 			kprintf("Failed to allocate memory for constructing the path.\n");
3169 	}
3170 
3171 	set_debug_variable("_node", (addr_t)vnode->private_node);
3172 	set_debug_variable("_mount", (addr_t)vnode->mount);
3173 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3174 	set_debug_variable("_covers", (addr_t)vnode->covers);
3175 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3176 }
3177 
3178 
3179 static int
3180 dump_mount(int argc, char** argv)
3181 {
3182 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3183 		kprintf("usage: %s [id|address]\n", argv[0]);
3184 		return 0;
3185 	}
3186 
3187 	ulong val = parse_expression(argv[1]);
3188 	uint32 id = val;
3189 
3190 	struct fs_mount* mount = sMountsTable->Lookup(id);
3191 	if (mount == NULL) {
3192 		if (IS_USER_ADDRESS(id)) {
3193 			kprintf("fs_mount not found\n");
3194 			return 0;
3195 		}
3196 		mount = (fs_mount*)val;
3197 	}
3198 
3199 	_dump_mount(mount);
3200 	return 0;
3201 }
3202 
3203 
3204 static int
3205 dump_mounts(int argc, char** argv)
3206 {
3207 	if (argc != 1) {
3208 		kprintf("usage: %s\n", argv[0]);
3209 		return 0;
3210 	}
3211 
3212 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3213 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3214 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3215 
3216 	struct fs_mount* mount;
3217 
3218 	MountTable::Iterator iterator(sMountsTable);
3219 	while (iterator.HasNext()) {
3220 		mount = iterator.Next();
3221 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3222 			mount->root_vnode->covers, mount->volume->private_volume,
3223 			mount->volume->file_system_name);
3224 
3225 		fs_volume* volume = mount->volume;
3226 		while (volume->super_volume != NULL) {
3227 			volume = volume->super_volume;
3228 			kprintf("                                     %p %s\n",
3229 				volume->private_volume, volume->file_system_name);
3230 		}
3231 	}
3232 
3233 	return 0;
3234 }
3235 
3236 
3237 static int
3238 dump_vnode(int argc, char** argv)
3239 {
3240 	bool printPath = false;
3241 	int argi = 1;
3242 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3243 		printPath = true;
3244 		argi++;
3245 	}
3246 
3247 	if (argi >= argc || argi + 2 < argc || strcmp(argv[argi], "--help") == 0) {
3248 		print_debugger_command_usage(argv[0]);
3249 		return 0;
3250 	}
3251 
3252 	struct vnode* vnode = NULL;
3253 
3254 	if (argi + 1 == argc) {
3255 		vnode = (struct vnode*)parse_expression(argv[argi]);
3256 		if (IS_USER_ADDRESS(vnode)) {
3257 			kprintf("invalid vnode address\n");
3258 			return 0;
3259 		}
3260 		_dump_vnode(vnode, printPath);
3261 		return 0;
3262 	}
3263 
3264 	dev_t device = parse_expression(argv[argi]);
3265 	ino_t id = parse_expression(argv[argi + 1]);
3266 
3267 	VnodeTable::Iterator iterator(sVnodeTable);
3268 	while (iterator.HasNext()) {
3269 		vnode = iterator.Next();
3270 		if (vnode->id != id || vnode->device != device)
3271 			continue;
3272 
3273 		_dump_vnode(vnode, printPath);
3274 	}
3275 
3276 	return 0;
3277 }
3278 
3279 
3280 static int
3281 dump_vnodes(int argc, char** argv)
3282 {
3283 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3284 		kprintf("usage: %s [device]\n", argv[0]);
3285 		return 0;
3286 	}
3287 
3288 	// restrict dumped nodes to a certain device if requested
3289 	dev_t device = parse_expression(argv[1]);
3290 
3291 	struct vnode* vnode;
3292 
3293 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3294 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3295 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3296 
3297 	VnodeTable::Iterator iterator(sVnodeTable);
3298 	while (iterator.HasNext()) {
3299 		vnode = iterator.Next();
3300 		if (vnode->device != device)
3301 			continue;
3302 
3303 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3304 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3305 			vnode->private_node, vnode->advisory_locking,
3306 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3307 			vnode->IsUnpublished() ? "u" : "-");
3308 	}
3309 
3310 	return 0;
3311 }
3312 
3313 
3314 static int
3315 dump_vnode_caches(int argc, char** argv)
3316 {
3317 	struct vnode* vnode;
3318 
3319 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3320 		kprintf("usage: %s [device]\n", argv[0]);
3321 		return 0;
3322 	}
3323 
3324 	// restrict dumped nodes to a certain device if requested
3325 	dev_t device = -1;
3326 	if (argc > 1)
3327 		device = parse_expression(argv[1]);
3328 
3329 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3330 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3331 
3332 	VnodeTable::Iterator iterator(sVnodeTable);
3333 	while (iterator.HasNext()) {
3334 		vnode = iterator.Next();
3335 		if (vnode->cache == NULL)
3336 			continue;
3337 		if (device != -1 && vnode->device != device)
3338 			continue;
3339 
3340 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3341 			vnode, vnode->device, vnode->id, vnode->cache,
3342 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3343 			vnode->cache->page_count);
3344 	}
3345 
3346 	return 0;
3347 }
3348 
3349 
3350 int
3351 dump_io_context(int argc, char** argv)
3352 {
3353 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3354 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3355 		return 0;
3356 	}
3357 
3358 	struct io_context* context = NULL;
3359 
3360 	if (argc > 1) {
3361 		ulong num = parse_expression(argv[1]);
3362 		if (IS_KERNEL_ADDRESS(num))
3363 			context = (struct io_context*)num;
3364 		else {
3365 			Team* team = team_get_team_struct_locked(num);
3366 			if (team == NULL) {
3367 				kprintf("could not find team with ID %lu\n", num);
3368 				return 0;
3369 			}
3370 			context = (struct io_context*)team->io_context;
3371 		}
3372 	} else
3373 		context = get_current_io_context(true);
3374 
3375 	kprintf("I/O CONTEXT: %p\n", context);
3376 	kprintf(" root vnode:\t%p\n", context->root);
3377 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3378 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3379 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3380 
3381 	if (context->num_used_fds) {
3382 		kprintf("   no.    %*s  ref  open  mode         pos    %*s\n",
3383 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3384 	}
3385 
3386 	for (uint32 i = 0; i < context->table_size; i++) {
3387 		struct file_descriptor* fd = context->fds[i];
3388 		if (fd == NULL)
3389 			continue;
3390 
3391 		kprintf("  %3" B_PRIu32 ":  %p  %3" B_PRId32 "  %4"
3392 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3393 			fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3394 			fd->pos, fd->cookie,
3395 			(fd_vnode(fd) != NULL) ? "vnode" : "mount",
3396 			fd->u.vnode);
3397 	}
3398 
3399 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3400 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3401 
3402 	set_debug_variable("_cwd", (addr_t)context->cwd);
3403 
3404 	return 0;
3405 }
3406 
3407 
3408 int
3409 dump_vnode_usage(int argc, char** argv)
3410 {
3411 	if (argc != 1) {
3412 		kprintf("usage: %s\n", argv[0]);
3413 		return 0;
3414 	}
3415 
3416 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3417 		sUnusedVnodes, kMaxUnusedVnodes);
3418 
3419 	uint32 count = sVnodeTable->CountElements();
3420 
3421 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3422 		count - sUnusedVnodes);
3423 	return 0;
3424 }
3425 
3426 #endif	// ADD_DEBUGGER_COMMANDS
3427 
3428 
3429 /*!	Clears memory specified by an iovec array.
3430 */
3431 static void
3432 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3433 {
3434 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3435 		size_t length = std::min(vecs[i].iov_len, bytes);
3436 		memset(vecs[i].iov_base, 0, length);
3437 		bytes -= length;
3438 	}
3439 }
3440 
3441 
3442 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3443 	and calls the file system hooks to read/write the request to disk.
3444 */
3445 static status_t
3446 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3447 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3448 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3449 	bool doWrite)
3450 {
3451 	if (fileVecCount == 0) {
3452 		// There are no file vecs at this offset, so we're obviously trying
3453 		// to access the file outside of its bounds
3454 		return B_BAD_VALUE;
3455 	}
3456 
3457 	size_t numBytes = *_numBytes;
3458 	uint32 fileVecIndex;
3459 	size_t vecOffset = *_vecOffset;
3460 	uint32 vecIndex = *_vecIndex;
3461 	status_t status;
3462 	size_t size;
3463 
3464 	if (!doWrite && vecOffset == 0) {
3465 		// now directly read the data from the device
3466 		// the first file_io_vec can be read directly
3467 		// TODO: we could also write directly
3468 
3469 		if (fileVecs[0].length < (off_t)numBytes)
3470 			size = fileVecs[0].length;
3471 		else
3472 			size = numBytes;
3473 
3474 		if (fileVecs[0].offset >= 0) {
3475 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3476 				&vecs[vecIndex], vecCount - vecIndex, &size);
3477 		} else {
3478 			// sparse read
3479 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3480 			status = B_OK;
3481 		}
3482 		if (status != B_OK)
3483 			return status;
3484 
3485 		ASSERT((off_t)size <= fileVecs[0].length);
3486 
3487 		// If the file portion was contiguous, we're already done now
3488 		if (size == numBytes)
3489 			return B_OK;
3490 
3491 		// if we reached the end of the file, we can return as well
3492 		if ((off_t)size != fileVecs[0].length) {
3493 			*_numBytes = size;
3494 			return B_OK;
3495 		}
3496 
3497 		fileVecIndex = 1;
3498 
3499 		// first, find out where we have to continue in our iovecs
3500 		for (; vecIndex < vecCount; vecIndex++) {
3501 			if (size < vecs[vecIndex].iov_len)
3502 				break;
3503 
3504 			size -= vecs[vecIndex].iov_len;
3505 		}
3506 
3507 		vecOffset = size;
3508 	} else {
3509 		fileVecIndex = 0;
3510 		size = 0;
3511 	}
3512 
3513 	// Too bad, let's process the rest of the file_io_vecs
3514 
3515 	size_t totalSize = size;
3516 	size_t bytesLeft = numBytes - size;
3517 
3518 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3519 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3520 		off_t fileOffset = fileVec.offset;
3521 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3522 
3523 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3524 			fileLeft));
3525 
3526 		// process the complete fileVec
3527 		while (fileLeft > 0) {
3528 			iovec tempVecs[MAX_TEMP_IO_VECS];
3529 			uint32 tempCount = 0;
3530 
3531 			// size tracks how much of what is left of the current fileVec
3532 			// (fileLeft) has been assigned to tempVecs
3533 			size = 0;
3534 
3535 			// assign what is left of the current fileVec to the tempVecs
3536 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3537 					&& tempCount < MAX_TEMP_IO_VECS;) {
3538 				// try to satisfy one iovec per iteration (or as much as
3539 				// possible)
3540 
3541 				// bytes left of the current iovec
3542 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3543 				if (vecLeft == 0) {
3544 					vecOffset = 0;
3545 					vecIndex++;
3546 					continue;
3547 				}
3548 
3549 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3550 					vecIndex, vecOffset, size));
3551 
3552 				// actually available bytes
3553 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3554 
3555 				tempVecs[tempCount].iov_base
3556 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3557 				tempVecs[tempCount].iov_len = tempVecSize;
3558 				tempCount++;
3559 
3560 				size += tempVecSize;
3561 				vecOffset += tempVecSize;
3562 			}
3563 
3564 			size_t bytes = size;
3565 
3566 			if (fileOffset == -1) {
3567 				if (doWrite) {
3568 					panic("sparse write attempt: vnode %p", vnode);
3569 					status = B_IO_ERROR;
3570 				} else {
3571 					// sparse read
3572 					zero_iovecs(tempVecs, tempCount, bytes);
3573 					status = B_OK;
3574 				}
3575 			} else if (doWrite) {
3576 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3577 					tempVecs, tempCount, &bytes);
3578 			} else {
3579 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3580 					tempVecs, tempCount, &bytes);
3581 			}
3582 			if (status != B_OK)
3583 				return status;
3584 
3585 			totalSize += bytes;
3586 			bytesLeft -= size;
3587 			if (fileOffset >= 0)
3588 				fileOffset += size;
3589 			fileLeft -= size;
3590 			//dprintf("-> file left = %Lu\n", fileLeft);
3591 
3592 			if (size != bytes || vecIndex >= vecCount) {
3593 				// there are no more bytes or iovecs, let's bail out
3594 				*_numBytes = totalSize;
3595 				return B_OK;
3596 			}
3597 		}
3598 	}
3599 
3600 	*_vecIndex = vecIndex;
3601 	*_vecOffset = vecOffset;
3602 	*_numBytes = totalSize;
3603 	return B_OK;
3604 }
3605 
3606 
3607 static bool
3608 is_user_in_group(gid_t gid)
3609 {
3610 	if (gid == getegid())
3611 		return true;
3612 
3613 	gid_t groups[NGROUPS_MAX];
3614 	int groupCount = getgroups(NGROUPS_MAX, groups);
3615 	for (int i = 0; i < groupCount; i++) {
3616 		if (gid == groups[i])
3617 			return true;
3618 	}
3619 
3620 	return false;
3621 }
3622 
3623 
3624 static status_t
3625 free_io_context(io_context* context)
3626 {
3627 	uint32 i;
3628 
3629 	TIOC(FreeIOContext(context));
3630 
3631 	if (context->root)
3632 		put_vnode(context->root);
3633 
3634 	if (context->cwd)
3635 		put_vnode(context->cwd);
3636 
3637 	mutex_lock(&context->io_mutex);
3638 
3639 	for (i = 0; i < context->table_size; i++) {
3640 		if (struct file_descriptor* descriptor = context->fds[i]) {
3641 			close_fd(context, descriptor);
3642 			put_fd(descriptor);
3643 		}
3644 	}
3645 
3646 	mutex_destroy(&context->io_mutex);
3647 
3648 	remove_node_monitors(context);
3649 	free(context->fds);
3650 	free(context);
3651 
3652 	return B_OK;
3653 }
3654 
3655 
3656 static status_t
3657 resize_monitor_table(struct io_context* context, const int newSize)
3658 {
3659 	int	status = B_OK;
3660 
3661 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3662 		return B_BAD_VALUE;
3663 
3664 	mutex_lock(&context->io_mutex);
3665 
3666 	if ((size_t)newSize < context->num_monitors) {
3667 		status = B_BUSY;
3668 		goto out;
3669 	}
3670 	context->max_monitors = newSize;
3671 
3672 out:
3673 	mutex_unlock(&context->io_mutex);
3674 	return status;
3675 }
3676 
3677 
3678 //	#pragma mark - public API for file systems
3679 
3680 
3681 extern "C" status_t
3682 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3683 	fs_vnode_ops* ops)
3684 {
3685 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3686 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3687 
3688 	if (privateNode == NULL)
3689 		return B_BAD_VALUE;
3690 
3691 	int32 tries = BUSY_VNODE_RETRIES;
3692 restart:
3693 	// create the node
3694 	bool nodeCreated;
3695 	struct vnode* vnode;
3696 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3697 		nodeCreated);
3698 	if (status != B_OK)
3699 		return status;
3700 
3701 	WriteLocker nodeLocker(sVnodeLock, true);
3702 		// create_new_vnode_and_lock() has locked for us
3703 
3704 	if (!nodeCreated && vnode->IsBusy()) {
3705 		nodeLocker.Unlock();
3706 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3707 			return B_BUSY;
3708 		goto restart;
3709 	}
3710 
3711 	// file system integrity check:
3712 	// test if the vnode already exists and bail out if this is the case!
3713 	if (!nodeCreated) {
3714 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3715 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3716 			vnode->private_node);
3717 		return B_ERROR;
3718 	}
3719 
3720 	vnode->private_node = privateNode;
3721 	vnode->ops = ops;
3722 	vnode->SetUnpublished(true);
3723 
3724 	TRACE(("returns: %s\n", strerror(status)));
3725 
3726 	return status;
3727 }
3728 
3729 
3730 extern "C" status_t
3731 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3732 	fs_vnode_ops* ops, int type, uint32 flags)
3733 {
3734 	FUNCTION(("publish_vnode()\n"));
3735 
3736 	int32 tries = BUSY_VNODE_RETRIES;
3737 restart:
3738 	WriteLocker locker(sVnodeLock);
3739 
3740 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3741 
3742 	bool nodeCreated = false;
3743 	if (vnode == NULL) {
3744 		if (privateNode == NULL)
3745 			return B_BAD_VALUE;
3746 
3747 		// create the node
3748 		locker.Unlock();
3749 			// create_new_vnode_and_lock() will re-lock for us on success
3750 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3751 			nodeCreated);
3752 		if (status != B_OK)
3753 			return status;
3754 
3755 		locker.SetTo(sVnodeLock, true);
3756 	}
3757 
3758 	if (nodeCreated) {
3759 		vnode->private_node = privateNode;
3760 		vnode->ops = ops;
3761 		vnode->SetUnpublished(true);
3762 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3763 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3764 		// already known, but not published
3765 	} else if (vnode->IsBusy()) {
3766 		locker.Unlock();
3767 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3768 			return B_BUSY;
3769 		goto restart;
3770 	} else
3771 		return B_BAD_VALUE;
3772 
3773 	bool publishSpecialSubNode = false;
3774 
3775 	vnode->SetType(type);
3776 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3777 	publishSpecialSubNode = is_special_node_type(type)
3778 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3779 
3780 	status_t status = B_OK;
3781 
3782 	// create sub vnodes, if necessary
3783 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3784 		locker.Unlock();
3785 
3786 		fs_volume* subVolume = volume;
3787 		if (volume->sub_volume != NULL) {
3788 			while (status == B_OK && subVolume->sub_volume != NULL) {
3789 				subVolume = subVolume->sub_volume;
3790 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3791 					vnode);
3792 			}
3793 		}
3794 
3795 		if (status == B_OK && publishSpecialSubNode)
3796 			status = create_special_sub_node(vnode, flags);
3797 
3798 		if (status != B_OK) {
3799 			// error -- clean up the created sub vnodes
3800 			while (subVolume->super_volume != volume) {
3801 				subVolume = subVolume->super_volume;
3802 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3803 			}
3804 		}
3805 
3806 		if (status == B_OK) {
3807 			ReadLocker vnodesReadLocker(sVnodeLock);
3808 			AutoLocker<Vnode> nodeLocker(vnode);
3809 			vnode->SetBusy(false);
3810 			vnode->SetUnpublished(false);
3811 		} else {
3812 			locker.Lock();
3813 			sVnodeTable->Remove(vnode);
3814 			remove_vnode_from_mount_list(vnode, vnode->mount);
3815 			object_cache_free(sVnodeCache, vnode, 0);
3816 		}
3817 	} else {
3818 		// we still hold the write lock -- mark the node unbusy and published
3819 		vnode->SetBusy(false);
3820 		vnode->SetUnpublished(false);
3821 	}
3822 
3823 	TRACE(("returns: %s\n", strerror(status)));
3824 
3825 	return status;
3826 }
3827 
3828 
3829 extern "C" status_t
3830 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3831 {
3832 	struct vnode* vnode;
3833 
3834 	if (volume == NULL)
3835 		return B_BAD_VALUE;
3836 
3837 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3838 	if (status != B_OK)
3839 		return status;
3840 
3841 	// If this is a layered FS, we need to get the node cookie for the requested
3842 	// layer.
3843 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3844 		fs_vnode resolvedNode;
3845 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3846 			&resolvedNode);
3847 		if (status != B_OK) {
3848 			panic("get_vnode(): Failed to get super node for vnode %p, "
3849 				"volume: %p", vnode, volume);
3850 			put_vnode(vnode);
3851 			return status;
3852 		}
3853 
3854 		if (_privateNode != NULL)
3855 			*_privateNode = resolvedNode.private_node;
3856 	} else if (_privateNode != NULL)
3857 		*_privateNode = vnode->private_node;
3858 
3859 	return B_OK;
3860 }
3861 
3862 
3863 extern "C" status_t
3864 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3865 {
3866 	ReadLocker nodeLocker(sVnodeLock);
3867 
3868 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3869 	if (vnode == NULL)
3870 		return B_BAD_VALUE;
3871 
3872 	inc_vnode_ref_count(vnode);
3873 	return B_OK;
3874 }
3875 
3876 
3877 extern "C" status_t
3878 put_vnode(fs_volume* volume, ino_t vnodeID)
3879 {
3880 	struct vnode* vnode;
3881 
3882 	rw_lock_read_lock(&sVnodeLock);
3883 	vnode = lookup_vnode(volume->id, vnodeID);
3884 	rw_lock_read_unlock(&sVnodeLock);
3885 
3886 	if (vnode == NULL)
3887 		return B_BAD_VALUE;
3888 
3889 	dec_vnode_ref_count(vnode, false, true);
3890 	return B_OK;
3891 }
3892 
3893 
3894 extern "C" status_t
3895 remove_vnode(fs_volume* volume, ino_t vnodeID)
3896 {
3897 	ReadLocker locker(sVnodeLock);
3898 
3899 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3900 	if (vnode == NULL)
3901 		return B_ENTRY_NOT_FOUND;
3902 
3903 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3904 		// this vnode is in use
3905 		return B_BUSY;
3906 	}
3907 
3908 	vnode->Lock();
3909 
3910 	vnode->SetRemoved(true);
3911 	bool removeUnpublished = false;
3912 
3913 	if (vnode->IsUnpublished()) {
3914 		// prepare the vnode for deletion
3915 		removeUnpublished = true;
3916 		vnode->SetBusy(true);
3917 	}
3918 
3919 	vnode->Unlock();
3920 	locker.Unlock();
3921 
3922 	if (removeUnpublished) {
3923 		// If the vnode hasn't been published yet, we delete it here
3924 		atomic_add(&vnode->ref_count, -1);
3925 		free_vnode(vnode, true);
3926 	}
3927 
3928 	return B_OK;
3929 }
3930 
3931 
3932 extern "C" status_t
3933 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3934 {
3935 	struct vnode* vnode;
3936 
3937 	rw_lock_read_lock(&sVnodeLock);
3938 
3939 	vnode = lookup_vnode(volume->id, vnodeID);
3940 	if (vnode) {
3941 		AutoLocker<Vnode> nodeLocker(vnode);
3942 		vnode->SetRemoved(false);
3943 	}
3944 
3945 	rw_lock_read_unlock(&sVnodeLock);
3946 	return B_OK;
3947 }
3948 
3949 
3950 extern "C" status_t
3951 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3952 {
3953 	ReadLocker _(sVnodeLock);
3954 
3955 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3956 		if (_removed != NULL)
3957 			*_removed = vnode->IsRemoved();
3958 		return B_OK;
3959 	}
3960 
3961 	return B_BAD_VALUE;
3962 }
3963 
3964 
3965 extern "C" fs_volume*
3966 volume_for_vnode(fs_vnode* _vnode)
3967 {
3968 	if (_vnode == NULL)
3969 		return NULL;
3970 
3971 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3972 	return vnode->mount->volume;
3973 }
3974 
3975 
3976 extern "C" status_t
3977 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3978 	uid_t nodeUserID)
3979 {
3980 	// get node permissions
3981 	int userPermissions = (mode & S_IRWXU) >> 6;
3982 	int groupPermissions = (mode & S_IRWXG) >> 3;
3983 	int otherPermissions = mode & S_IRWXO;
3984 
3985 	// get the node permissions for this uid/gid
3986 	int permissions = 0;
3987 	uid_t uid = geteuid();
3988 
3989 	if (uid == 0) {
3990 		// user is root
3991 		// root has always read/write permission, but at least one of the
3992 		// X bits must be set for execute permission
3993 		permissions = userPermissions | groupPermissions | otherPermissions
3994 			| S_IROTH | S_IWOTH;
3995 		if (S_ISDIR(mode))
3996 			permissions |= S_IXOTH;
3997 	} else if (uid == nodeUserID) {
3998 		// user is node owner
3999 		permissions = userPermissions;
4000 	} else if (is_user_in_group(nodeGroupID)) {
4001 		// user is in owning group
4002 		permissions = groupPermissions;
4003 	} else {
4004 		// user is one of the others
4005 		permissions = otherPermissions;
4006 	}
4007 
4008 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4009 }
4010 
4011 
4012 #if 0
4013 extern "C" status_t
4014 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4015 	size_t* _numBytes)
4016 {
4017 	struct file_descriptor* descriptor;
4018 	struct vnode* vnode;
4019 
4020 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4021 	if (descriptor == NULL)
4022 		return B_FILE_ERROR;
4023 
4024 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4025 		count, 0, _numBytes);
4026 
4027 	put_fd(descriptor);
4028 	return status;
4029 }
4030 
4031 
4032 extern "C" status_t
4033 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4034 	size_t* _numBytes)
4035 {
4036 	struct file_descriptor* descriptor;
4037 	struct vnode* vnode;
4038 
4039 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4040 	if (descriptor == NULL)
4041 		return B_FILE_ERROR;
4042 
4043 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4044 		count, 0, _numBytes);
4045 
4046 	put_fd(descriptor);
4047 	return status;
4048 }
4049 #endif
4050 
4051 
4052 extern "C" status_t
4053 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4054 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4055 	size_t* _bytes)
4056 {
4057 	struct vnode* vnode;
4058 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4059 	if (!descriptor.IsSet())
4060 		return B_FILE_ERROR;
4061 
4062 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4063 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4064 		false);
4065 
4066 	return status;
4067 }
4068 
4069 
4070 extern "C" status_t
4071 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4072 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4073 	size_t* _bytes)
4074 {
4075 	struct vnode* vnode;
4076 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4077 	if (!descriptor.IsSet())
4078 		return B_FILE_ERROR;
4079 
4080 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4081 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4082 		true);
4083 
4084 	return status;
4085 }
4086 
4087 
4088 extern "C" status_t
4089 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4090 {
4091 	// lookup mount -- the caller is required to make sure that the mount
4092 	// won't go away
4093 	ReadLocker locker(sMountLock);
4094 	struct fs_mount* mount = find_mount(mountID);
4095 	if (mount == NULL)
4096 		return B_BAD_VALUE;
4097 	locker.Unlock();
4098 
4099 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4100 }
4101 
4102 
4103 extern "C" status_t
4104 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4105 {
4106 	// lookup mount -- the caller is required to make sure that the mount
4107 	// won't go away
4108 	ReadLocker locker(sMountLock);
4109 	struct fs_mount* mount = find_mount(mountID);
4110 	if (mount == NULL)
4111 		return B_BAD_VALUE;
4112 	locker.Unlock();
4113 
4114 	return mount->entry_cache.Add(dirID, name, -1, true);
4115 }
4116 
4117 
4118 extern "C" status_t
4119 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4120 {
4121 	// lookup mount -- the caller is required to make sure that the mount
4122 	// won't go away
4123 	ReadLocker locker(sMountLock);
4124 	struct fs_mount* mount = find_mount(mountID);
4125 	if (mount == NULL)
4126 		return B_BAD_VALUE;
4127 	locker.Unlock();
4128 
4129 	return mount->entry_cache.Remove(dirID, name);
4130 }
4131 
4132 
4133 //	#pragma mark - private VFS API
4134 //	Functions the VFS exports for other parts of the kernel
4135 
4136 
4137 /*! Acquires another reference to the vnode that has to be released
4138 	by calling vfs_put_vnode().
4139 */
4140 void
4141 vfs_acquire_vnode(struct vnode* vnode)
4142 {
4143 	inc_vnode_ref_count(vnode);
4144 }
4145 
4146 
4147 /*! This is currently called from file_cache_create() only.
4148 	It's probably a temporary solution as long as devfs requires that
4149 	fs_read_pages()/fs_write_pages() are called with the standard
4150 	open cookie and not with a device cookie.
4151 	If that's done differently, remove this call; it has no other
4152 	purpose.
4153 */
4154 extern "C" status_t
4155 vfs_get_cookie_from_fd(int fd, void** _cookie)
4156 {
4157 	struct file_descriptor* descriptor;
4158 
4159 	descriptor = get_fd(get_current_io_context(true), fd);
4160 	if (descriptor == NULL)
4161 		return B_FILE_ERROR;
4162 
4163 	*_cookie = descriptor->cookie;
4164 	return B_OK;
4165 }
4166 
4167 
4168 extern "C" status_t
4169 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4170 {
4171 	*vnode = get_vnode_from_fd(fd, kernel);
4172 
4173 	if (*vnode == NULL)
4174 		return B_FILE_ERROR;
4175 
4176 	return B_NO_ERROR;
4177 }
4178 
4179 
4180 extern "C" status_t
4181 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4182 {
4183 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4184 		path, kernel));
4185 
4186 	KPath pathBuffer;
4187 	if (pathBuffer.InitCheck() != B_OK)
4188 		return B_NO_MEMORY;
4189 
4190 	char* buffer = pathBuffer.LockBuffer();
4191 	strlcpy(buffer, path, pathBuffer.BufferSize());
4192 
4193 	VnodePutter vnode;
4194 	status_t status = path_to_vnode(buffer, true, vnode, NULL, kernel);
4195 	if (status != B_OK)
4196 		return status;
4197 
4198 	*_vnode = vnode.Detach();
4199 	return B_OK;
4200 }
4201 
4202 
4203 extern "C" status_t
4204 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4205 {
4206 	struct vnode* vnode = NULL;
4207 
4208 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4209 	if (status != B_OK)
4210 		return status;
4211 
4212 	*_vnode = vnode;
4213 	return B_OK;
4214 }
4215 
4216 
4217 extern "C" status_t
4218 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4219 	const char* name, struct vnode** _vnode)
4220 {
4221 	VnodePutter vnode;
4222 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, false, true, vnode);
4223 	*_vnode = vnode.Detach();
4224 	return status;
4225 }
4226 
4227 
4228 extern "C" void
4229 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4230 {
4231 	*_mountID = vnode->device;
4232 	*_vnodeID = vnode->id;
4233 }
4234 
4235 
4236 /*!
4237 	Helper function abstracting the process of "converting" a given
4238 	vnode-pointer to a fs_vnode-pointer.
4239 	Currently only used in bindfs.
4240 */
4241 extern "C" fs_vnode*
4242 vfs_fsnode_for_vnode(struct vnode* vnode)
4243 {
4244 	return vnode;
4245 }
4246 
4247 
4248 /*!
4249 	Calls fs_open() on the given vnode and returns a new
4250 	file descriptor for it
4251 */
4252 int
4253 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4254 {
4255 	return open_vnode(vnode, openMode, kernel);
4256 }
4257 
4258 
4259 /*!	Looks up a vnode with the given mount and vnode ID.
4260 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4261 	to the node.
4262 	It's currently only be used by file_cache_create().
4263 */
4264 extern "C" status_t
4265 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4266 {
4267 	rw_lock_read_lock(&sVnodeLock);
4268 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4269 	rw_lock_read_unlock(&sVnodeLock);
4270 
4271 	if (vnode == NULL)
4272 		return B_ERROR;
4273 
4274 	*_vnode = vnode;
4275 	return B_OK;
4276 }
4277 
4278 
4279 extern "C" status_t
4280 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4281 	bool traverseLeafLink, bool kernel, void** _node)
4282 {
4283 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4284 		volume, path, kernel));
4285 
4286 	KPath pathBuffer;
4287 	if (pathBuffer.InitCheck() != B_OK)
4288 		return B_NO_MEMORY;
4289 
4290 	fs_mount* mount;
4291 	status_t status = get_mount(volume->id, &mount);
4292 	if (status != B_OK)
4293 		return status;
4294 
4295 	char* buffer = pathBuffer.LockBuffer();
4296 	strlcpy(buffer, path, pathBuffer.BufferSize());
4297 
4298 	VnodePutter vnode;
4299 
4300 	if (buffer[0] == '/')
4301 		status = path_to_vnode(buffer, traverseLeafLink, vnode, NULL, kernel);
4302 	else {
4303 		inc_vnode_ref_count(mount->root_vnode);
4304 			// vnode_path_to_vnode() releases a reference to the starting vnode
4305 		status = vnode_path_to_vnode(mount->root_vnode, buffer, traverseLeafLink,
4306 			kernel, vnode, NULL);
4307 	}
4308 
4309 	put_mount(mount);
4310 
4311 	if (status != B_OK)
4312 		return status;
4313 
4314 	if (vnode->device != volume->id) {
4315 		// wrong mount ID - must not gain access on foreign file system nodes
4316 		return B_BAD_VALUE;
4317 	}
4318 
4319 	// Use get_vnode() to resolve the cookie for the right layer.
4320 	status = get_vnode(volume, vnode->id, _node);
4321 
4322 	return status;
4323 }
4324 
4325 
4326 status_t
4327 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4328 	struct stat* stat, bool kernel)
4329 {
4330 	status_t status;
4331 
4332 	if (path != NULL) {
4333 		// path given: get the stat of the node referred to by (fd, path)
4334 		KPath pathBuffer(path);
4335 		if (pathBuffer.InitCheck() != B_OK)
4336 			return B_NO_MEMORY;
4337 
4338 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4339 			traverseLeafLink, stat, kernel);
4340 	} else {
4341 		// no path given: get the FD and use the FD operation
4342 		FileDescriptorPutter descriptor
4343 			(get_fd(get_current_io_context(kernel), fd));
4344 		if (!descriptor.IsSet())
4345 			return B_FILE_ERROR;
4346 
4347 		if (descriptor->ops->fd_read_stat)
4348 			status = descriptor->ops->fd_read_stat(descriptor.Get(), stat);
4349 		else
4350 			status = B_UNSUPPORTED;
4351 	}
4352 
4353 	return status;
4354 }
4355 
4356 
4357 /*!	Finds the full path to the file that contains the module \a moduleName,
4358 	puts it into \a pathBuffer, and returns B_OK for success.
4359 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4360 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4361 	\a pathBuffer is clobbered in any case and must not be relied on if this
4362 	functions returns unsuccessfully.
4363 	\a basePath and \a pathBuffer must not point to the same space.
4364 */
4365 status_t
4366 vfs_get_module_path(const char* basePath, const char* moduleName,
4367 	char* pathBuffer, size_t bufferSize)
4368 {
4369 	status_t status;
4370 	size_t length;
4371 	char* path;
4372 
4373 	if (bufferSize == 0
4374 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4375 		return B_BUFFER_OVERFLOW;
4376 
4377 	VnodePutter dir;
4378 	status = path_to_vnode(pathBuffer, true, dir, NULL, true);
4379 	if (status != B_OK)
4380 		return status;
4381 
4382 	// the path buffer had been clobbered by the above call
4383 	length = strlcpy(pathBuffer, basePath, bufferSize);
4384 	if (pathBuffer[length - 1] != '/')
4385 		pathBuffer[length++] = '/';
4386 
4387 	path = pathBuffer + length;
4388 	bufferSize -= length;
4389 
4390 	VnodePutter file;
4391 	while (moduleName) {
4392 		char* nextPath = strchr(moduleName, '/');
4393 		if (nextPath == NULL)
4394 			length = strlen(moduleName);
4395 		else {
4396 			length = nextPath - moduleName;
4397 			nextPath++;
4398 		}
4399 
4400 		if (length + 1 >= bufferSize)
4401 			return B_BUFFER_OVERFLOW;
4402 
4403 		memcpy(path, moduleName, length);
4404 		path[length] = '\0';
4405 		moduleName = nextPath;
4406 
4407 		// vnode_path_to_vnode() assumes ownership of the passed dir
4408 		status = vnode_path_to_vnode(dir.Detach(), path, true, true, file, NULL);
4409 		if (status != B_OK)
4410 			return status;
4411 
4412 		if (S_ISDIR(file->Type())) {
4413 			// goto the next directory
4414 			path[length] = '/';
4415 			path[length + 1] = '\0';
4416 			path += length + 1;
4417 			bufferSize -= length + 1;
4418 
4419 			dir.SetTo(file.Detach());
4420 		} else if (S_ISREG(file->Type())) {
4421 			// it's a file so it should be what we've searched for
4422 			return B_OK;
4423 		} else {
4424 			TRACE(("vfs_get_module_path(): something is strange here: "
4425 				"0x%08" B_PRIx32 "...\n", file->Type()));
4426 			return B_ERROR;
4427 		}
4428 	}
4429 
4430 	// if we got here, the moduleName just pointed to a directory, not to
4431 	// a real module - what should we do in this case?
4432 	return B_ENTRY_NOT_FOUND;
4433 }
4434 
4435 
4436 /*!	\brief Normalizes a given path.
4437 
4438 	The path must refer to an existing or non-existing entry in an existing
4439 	directory, that is chopping off the leaf component the remaining path must
4440 	refer to an existing directory.
4441 
4442 	The returned will be canonical in that it will be absolute, will not
4443 	contain any "." or ".." components or duplicate occurrences of '/'s,
4444 	and none of the directory components will by symbolic links.
4445 
4446 	Any two paths referring to the same entry, will result in the same
4447 	normalized path (well, that is pretty much the definition of `normalized',
4448 	isn't it :-).
4449 
4450 	\param path The path to be normalized.
4451 	\param buffer The buffer into which the normalized path will be written.
4452 		   May be the same one as \a path.
4453 	\param bufferSize The size of \a buffer.
4454 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4455 	\param kernel \c true, if the IO context of the kernel shall be used,
4456 		   otherwise that of the team this thread belongs to. Only relevant,
4457 		   if the path is relative (to get the CWD).
4458 	\return \c B_OK if everything went fine, another error code otherwise.
4459 */
4460 status_t
4461 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4462 	bool traverseLink, bool kernel)
4463 {
4464 	if (!path || !buffer || bufferSize < 1)
4465 		return B_BAD_VALUE;
4466 
4467 	if (path != buffer) {
4468 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4469 			return B_BUFFER_OVERFLOW;
4470 	}
4471 
4472 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4473 }
4474 
4475 
4476 /*!	\brief Gets the parent of the passed in node.
4477 
4478 	Gets the parent of the passed in node, and correctly resolves covered
4479 	nodes.
4480 */
4481 extern "C" status_t
4482 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4483 {
4484 	return resolve_covered_parent(parent, device, node,
4485 		get_current_io_context(true));
4486 }
4487 
4488 
4489 /*!	\brief Creates a special node in the file system.
4490 
4491 	The caller gets a reference to the newly created node (which is passed
4492 	back through \a _createdVnode) and is responsible for releasing it.
4493 
4494 	\param path The path where to create the entry for the node. Can be \c NULL,
4495 		in which case the node is created without an entry in the root FS -- it
4496 		will automatically be deleted when the last reference has been released.
4497 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4498 		the target file system will just create the node with its standard
4499 		operations. Depending on the type of the node a subnode might be created
4500 		automatically, though.
4501 	\param mode The type and permissions for the node to be created.
4502 	\param flags Flags to be passed to the creating FS.
4503 	\param kernel \c true, if called in the kernel context (relevant only if
4504 		\a path is not \c NULL and not absolute).
4505 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4506 		file system creating the node, with the private data pointer and
4507 		operations for the super node. Can be \c NULL.
4508 	\param _createVnode Pointer to pre-allocated storage where to store the
4509 		pointer to the newly created node.
4510 	\return \c B_OK, if everything went fine, another error code otherwise.
4511 */
4512 status_t
4513 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4514 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4515 	struct vnode** _createdVnode)
4516 {
4517 	VnodePutter dirNode;
4518 	char _leaf[B_FILE_NAME_LENGTH];
4519 	char* leaf = NULL;
4520 
4521 	if (path) {
4522 		// We've got a path. Get the dir vnode and the leaf name.
4523 		KPath tmpPathBuffer;
4524 		if (tmpPathBuffer.InitCheck() != B_OK)
4525 			return B_NO_MEMORY;
4526 
4527 		char* tmpPath = tmpPathBuffer.LockBuffer();
4528 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4529 			return B_NAME_TOO_LONG;
4530 
4531 		// get the dir vnode and the leaf name
4532 		leaf = _leaf;
4533 		status_t error = path_to_dir_vnode(tmpPath, dirNode, leaf, kernel);
4534 		if (error != B_OK)
4535 			return error;
4536 	} else {
4537 		// No path. Create the node in the root FS.
4538 		dirNode.SetTo(sRoot);
4539 		inc_vnode_ref_count(dirNode.Get());
4540 	}
4541 
4542 	// check support for creating special nodes
4543 	if (!HAS_FS_CALL(dirNode, create_special_node))
4544 		return B_UNSUPPORTED;
4545 
4546 	// create the node
4547 	fs_vnode superVnode;
4548 	ino_t nodeID;
4549 	status_t status = FS_CALL(dirNode.Get(), create_special_node, leaf, subVnode,
4550 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4551 	if (status != B_OK)
4552 		return status;
4553 
4554 	// lookup the node
4555 	rw_lock_read_lock(&sVnodeLock);
4556 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4557 	rw_lock_read_unlock(&sVnodeLock);
4558 
4559 	if (*_createdVnode == NULL) {
4560 		panic("vfs_create_special_node(): lookup of node failed");
4561 		return B_ERROR;
4562 	}
4563 
4564 	return B_OK;
4565 }
4566 
4567 
4568 extern "C" void
4569 vfs_put_vnode(struct vnode* vnode)
4570 {
4571 	put_vnode(vnode);
4572 }
4573 
4574 
4575 extern "C" status_t
4576 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4577 {
4578 	// Get current working directory from io context
4579 	struct io_context* context = get_current_io_context(false);
4580 	status_t status = B_OK;
4581 
4582 	mutex_lock(&context->io_mutex);
4583 
4584 	if (context->cwd != NULL) {
4585 		*_mountID = context->cwd->device;
4586 		*_vnodeID = context->cwd->id;
4587 	} else
4588 		status = B_ERROR;
4589 
4590 	mutex_unlock(&context->io_mutex);
4591 	return status;
4592 }
4593 
4594 
4595 status_t
4596 vfs_unmount(dev_t mountID, uint32 flags)
4597 {
4598 	return fs_unmount(NULL, mountID, flags, true);
4599 }
4600 
4601 
4602 extern "C" status_t
4603 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4604 {
4605 	struct vnode* vnode;
4606 
4607 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4608 	if (status != B_OK)
4609 		return status;
4610 
4611 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4612 	put_vnode(vnode);
4613 	return B_OK;
4614 }
4615 
4616 
4617 extern "C" void
4618 vfs_free_unused_vnodes(int32 level)
4619 {
4620 	vnode_low_resource_handler(NULL,
4621 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4622 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4623 		level);
4624 }
4625 
4626 
4627 extern "C" bool
4628 vfs_can_page(struct vnode* vnode, void* cookie)
4629 {
4630 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4631 
4632 	if (HAS_FS_CALL(vnode, can_page))
4633 		return FS_CALL(vnode, can_page, cookie);
4634 	return false;
4635 }
4636 
4637 
4638 extern "C" status_t
4639 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4640 	const generic_io_vec* vecs, size_t count, uint32 flags,
4641 	generic_size_t* _numBytes)
4642 {
4643 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4644 		vecs, pos));
4645 
4646 #if VFS_PAGES_IO_TRACING
4647 	generic_size_t bytesRequested = *_numBytes;
4648 #endif
4649 
4650 	IORequest request;
4651 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4652 	if (status == B_OK) {
4653 		status = vfs_vnode_io(vnode, cookie, &request);
4654 		if (status == B_OK)
4655 			status = request.Wait();
4656 		*_numBytes = request.TransferredBytes();
4657 	}
4658 
4659 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4660 		status, *_numBytes));
4661 
4662 	return status;
4663 }
4664 
4665 
4666 extern "C" status_t
4667 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4668 	const generic_io_vec* vecs, size_t count, uint32 flags,
4669 	generic_size_t* _numBytes)
4670 {
4671 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4672 		vecs, pos));
4673 
4674 #if VFS_PAGES_IO_TRACING
4675 	generic_size_t bytesRequested = *_numBytes;
4676 #endif
4677 
4678 	IORequest request;
4679 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4680 	if (status == B_OK) {
4681 		status = vfs_vnode_io(vnode, cookie, &request);
4682 		if (status == B_OK)
4683 			status = request.Wait();
4684 		*_numBytes = request.TransferredBytes();
4685 	}
4686 
4687 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4688 		status, *_numBytes));
4689 
4690 	return status;
4691 }
4692 
4693 
4694 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4695 	created if \a allocate is \c true.
4696 	In case it's successful, it will also grab a reference to the cache
4697 	it returns.
4698 */
4699 extern "C" status_t
4700 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4701 {
4702 	if (vnode->cache != NULL) {
4703 		vnode->cache->AcquireRef();
4704 		*_cache = vnode->cache;
4705 		return B_OK;
4706 	}
4707 
4708 	rw_lock_read_lock(&sVnodeLock);
4709 	vnode->Lock();
4710 
4711 	status_t status = B_OK;
4712 
4713 	// The cache could have been created in the meantime
4714 	if (vnode->cache == NULL) {
4715 		if (allocate) {
4716 			// TODO: actually the vnode needs to be busy already here, or
4717 			//	else this won't work...
4718 			bool wasBusy = vnode->IsBusy();
4719 			vnode->SetBusy(true);
4720 
4721 			vnode->Unlock();
4722 			rw_lock_read_unlock(&sVnodeLock);
4723 
4724 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4725 
4726 			rw_lock_read_lock(&sVnodeLock);
4727 			vnode->Lock();
4728 			vnode->SetBusy(wasBusy);
4729 		} else
4730 			status = B_BAD_VALUE;
4731 	}
4732 
4733 	vnode->Unlock();
4734 	rw_lock_read_unlock(&sVnodeLock);
4735 
4736 	if (status == B_OK) {
4737 		vnode->cache->AcquireRef();
4738 		*_cache = vnode->cache;
4739 	}
4740 
4741 	return status;
4742 }
4743 
4744 
4745 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4746 	their own.
4747 	In case it's successful, it will also grab a reference to the cache
4748 	it returns.
4749 */
4750 extern "C" status_t
4751 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4752 {
4753 	rw_lock_read_lock(&sVnodeLock);
4754 	vnode->Lock();
4755 
4756 	status_t status = B_OK;
4757 	if (vnode->cache != NULL) {
4758 		status = B_NOT_ALLOWED;
4759 	} else {
4760 		vnode->cache = _cache;
4761 		_cache->AcquireRef();
4762 	}
4763 
4764 	vnode->Unlock();
4765 	rw_lock_read_unlock(&sVnodeLock);
4766 	return status;
4767 }
4768 
4769 
4770 status_t
4771 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4772 	file_io_vec* vecs, size_t* _count)
4773 {
4774 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4775 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4776 
4777 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4778 }
4779 
4780 
4781 status_t
4782 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4783 {
4784 	status_t status = FS_CALL(vnode, read_stat, stat);
4785 
4786 	// fill in the st_dev and st_ino fields
4787 	if (status == B_OK) {
4788 		stat->st_dev = vnode->device;
4789 		stat->st_ino = vnode->id;
4790 		// the rdev field must stay unset for non-special files
4791 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4792 			stat->st_rdev = -1;
4793 	}
4794 
4795 	return status;
4796 }
4797 
4798 
4799 status_t
4800 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4801 {
4802 	struct vnode* vnode;
4803 	status_t status = get_vnode(device, inode, &vnode, true, false);
4804 	if (status != B_OK)
4805 		return status;
4806 
4807 	status = vfs_stat_vnode(vnode, stat);
4808 
4809 	put_vnode(vnode);
4810 	return status;
4811 }
4812 
4813 
4814 status_t
4815 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4816 {
4817 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4818 }
4819 
4820 
4821 status_t
4822 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4823 	bool kernel, char* path, size_t pathLength)
4824 {
4825 	VnodePutter vnode;
4826 	status_t status;
4827 
4828 	// filter invalid leaf names
4829 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4830 		return B_BAD_VALUE;
4831 
4832 	// get the vnode matching the dir's node_ref
4833 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4834 		// special cases "." and "..": we can directly get the vnode of the
4835 		// referenced directory
4836 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, vnode);
4837 		leaf = NULL;
4838 	} else {
4839 		struct vnode* temp = NULL;
4840 		status = get_vnode(device, inode, &temp, true, false);
4841 		vnode.SetTo(temp);
4842 	}
4843 	if (status != B_OK)
4844 		return status;
4845 
4846 	// get the directory path
4847 	status = dir_vnode_to_path(vnode.Get(), path, pathLength, kernel);
4848 	vnode.Unset();
4849 		// we don't need the vnode anymore
4850 	if (status != B_OK)
4851 		return status;
4852 
4853 	// append the leaf name
4854 	if (leaf) {
4855 		// insert a directory separator if this is not the file system root
4856 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4857 				>= pathLength)
4858 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4859 			return B_NAME_TOO_LONG;
4860 		}
4861 	}
4862 
4863 	return B_OK;
4864 }
4865 
4866 
4867 /*!	If the given descriptor locked its vnode, that lock will be released. */
4868 void
4869 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4870 {
4871 	struct vnode* vnode = fd_vnode(descriptor);
4872 
4873 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4874 		vnode->mandatory_locked_by = NULL;
4875 }
4876 
4877 
4878 /*!	Releases any POSIX locks on the file descriptor. */
4879 status_t
4880 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4881 {
4882 	struct vnode* vnode = descriptor->u.vnode;
4883 	if (vnode == NULL)
4884 		return B_OK;
4885 
4886 	if (HAS_FS_CALL(vnode, release_lock))
4887 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4888 
4889 	return release_advisory_lock(vnode, context, NULL, NULL);
4890 }
4891 
4892 
4893 /*!	Closes all file descriptors of the specified I/O context that
4894 	have the O_CLOEXEC flag set.
4895 */
4896 void
4897 vfs_exec_io_context(io_context* context)
4898 {
4899 	uint32 i;
4900 
4901 	for (i = 0; i < context->table_size; i++) {
4902 		mutex_lock(&context->io_mutex);
4903 
4904 		struct file_descriptor* descriptor = context->fds[i];
4905 		bool remove = false;
4906 
4907 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4908 			context->fds[i] = NULL;
4909 			context->num_used_fds--;
4910 
4911 			remove = true;
4912 		}
4913 
4914 		mutex_unlock(&context->io_mutex);
4915 
4916 		if (remove) {
4917 			close_fd(context, descriptor);
4918 			put_fd(descriptor);
4919 		}
4920 	}
4921 }
4922 
4923 
4924 /*! Sets up a new io_control structure, and inherits the properties
4925 	of the parent io_control if it is given.
4926 */
4927 io_context*
4928 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4929 {
4930 	io_context* context = (io_context*)malloc(sizeof(io_context));
4931 	if (context == NULL)
4932 		return NULL;
4933 
4934 	TIOC(NewIOContext(context, parentContext));
4935 
4936 	memset(context, 0, sizeof(io_context));
4937 	context->ref_count = 1;
4938 
4939 	MutexLocker parentLocker;
4940 
4941 	size_t tableSize;
4942 	if (parentContext != NULL) {
4943 		parentLocker.SetTo(parentContext->io_mutex, false);
4944 		tableSize = parentContext->table_size;
4945 	} else
4946 		tableSize = DEFAULT_FD_TABLE_SIZE;
4947 
4948 	// allocate space for FDs and their close-on-exec flag
4949 	context->fds = (file_descriptor**)malloc(
4950 		sizeof(struct file_descriptor*) * tableSize
4951 		+ sizeof(struct select_info**) * tableSize
4952 		+ (tableSize + 7) / 8);
4953 	if (context->fds == NULL) {
4954 		free(context);
4955 		return NULL;
4956 	}
4957 
4958 	context->select_infos = (select_info**)(context->fds + tableSize);
4959 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4960 
4961 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4962 		+ sizeof(struct select_info**) * tableSize
4963 		+ (tableSize + 7) / 8);
4964 
4965 	mutex_init(&context->io_mutex, "I/O context");
4966 
4967 	// Copy all parent file descriptors
4968 
4969 	if (parentContext != NULL) {
4970 		size_t i;
4971 
4972 		mutex_lock(&sIOContextRootLock);
4973 		context->root = parentContext->root;
4974 		if (context->root)
4975 			inc_vnode_ref_count(context->root);
4976 		mutex_unlock(&sIOContextRootLock);
4977 
4978 		context->cwd = parentContext->cwd;
4979 		if (context->cwd)
4980 			inc_vnode_ref_count(context->cwd);
4981 
4982 		if (parentContext->inherit_fds) {
4983 			for (i = 0; i < tableSize; i++) {
4984 				struct file_descriptor* descriptor = parentContext->fds[i];
4985 
4986 				if (descriptor != NULL
4987 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4988 					bool closeOnExec = fd_close_on_exec(parentContext, i);
4989 					if (closeOnExec && purgeCloseOnExec)
4990 						continue;
4991 
4992 					TFD(InheritFD(context, i, descriptor, parentContext));
4993 
4994 					context->fds[i] = descriptor;
4995 					context->num_used_fds++;
4996 					atomic_add(&descriptor->ref_count, 1);
4997 					atomic_add(&descriptor->open_count, 1);
4998 
4999 					if (closeOnExec)
5000 						fd_set_close_on_exec(context, i, true);
5001 				}
5002 			}
5003 		}
5004 
5005 		parentLocker.Unlock();
5006 	} else {
5007 		context->root = sRoot;
5008 		context->cwd = sRoot;
5009 
5010 		if (context->root)
5011 			inc_vnode_ref_count(context->root);
5012 
5013 		if (context->cwd)
5014 			inc_vnode_ref_count(context->cwd);
5015 	}
5016 
5017 	context->table_size = tableSize;
5018 	context->inherit_fds = parentContext != NULL;
5019 
5020 	list_init(&context->node_monitors);
5021 	context->max_monitors = DEFAULT_NODE_MONITORS;
5022 
5023 	return context;
5024 }
5025 
5026 
5027 void
5028 vfs_get_io_context(io_context* context)
5029 {
5030 	atomic_add(&context->ref_count, 1);
5031 }
5032 
5033 
5034 void
5035 vfs_put_io_context(io_context* context)
5036 {
5037 	if (atomic_add(&context->ref_count, -1) == 1)
5038 		free_io_context(context);
5039 }
5040 
5041 
5042 status_t
5043 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5044 {
5045 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5046 		return B_BAD_VALUE;
5047 
5048 	TIOC(ResizeIOContext(context, newSize));
5049 
5050 	MutexLocker _(context->io_mutex);
5051 
5052 	uint32 oldSize = context->table_size;
5053 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5054 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5055 
5056 	// If the tables shrink, make sure none of the fds being dropped are in use.
5057 	if (newSize < oldSize) {
5058 		for (uint32 i = oldSize; i-- > newSize;) {
5059 			if (context->fds[i])
5060 				return B_BUSY;
5061 		}
5062 	}
5063 
5064 	// store pointers to the old tables
5065 	file_descriptor** oldFDs = context->fds;
5066 	select_info** oldSelectInfos = context->select_infos;
5067 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5068 
5069 	// allocate new tables
5070 	file_descriptor** newFDs = (file_descriptor**)malloc(
5071 		sizeof(struct file_descriptor*) * newSize
5072 		+ sizeof(struct select_infos**) * newSize
5073 		+ newCloseOnExitBitmapSize);
5074 	if (newFDs == NULL)
5075 		return B_NO_MEMORY;
5076 
5077 	context->fds = newFDs;
5078 	context->select_infos = (select_info**)(context->fds + newSize);
5079 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5080 	context->table_size = newSize;
5081 
5082 	// copy entries from old tables
5083 	uint32 toCopy = min_c(oldSize, newSize);
5084 
5085 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5086 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5087 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5088 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5089 
5090 	// clear additional entries, if the tables grow
5091 	if (newSize > oldSize) {
5092 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5093 		memset(context->select_infos + oldSize, 0,
5094 			sizeof(void*) * (newSize - oldSize));
5095 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5096 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5097 	}
5098 
5099 	free(oldFDs);
5100 
5101 	return B_OK;
5102 }
5103 
5104 
5105 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5106 
5107 	Given an arbitrary vnode (identified by mount and node ID), the function
5108 	checks, whether the vnode is covered by another vnode. If it is, the
5109 	function returns the mount and node ID of the covering vnode. Otherwise
5110 	it simply returns the supplied mount and node ID.
5111 
5112 	In case of error (e.g. the supplied node could not be found) the variables
5113 	for storing the resolved mount and node ID remain untouched and an error
5114 	code is returned.
5115 
5116 	\param mountID The mount ID of the vnode in question.
5117 	\param nodeID The node ID of the vnode in question.
5118 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5119 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5120 	\return
5121 	- \c B_OK, if everything went fine,
5122 	- another error code, if something went wrong.
5123 */
5124 status_t
5125 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5126 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5127 {
5128 	// get the node
5129 	struct vnode* node;
5130 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5131 	if (error != B_OK)
5132 		return error;
5133 
5134 	// resolve the node
5135 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5136 		put_vnode(node);
5137 		node = coveringNode;
5138 	}
5139 
5140 	// set the return values
5141 	*resolvedMountID = node->device;
5142 	*resolvedNodeID = node->id;
5143 
5144 	put_vnode(node);
5145 
5146 	return B_OK;
5147 }
5148 
5149 
5150 status_t
5151 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5152 	ino_t* _mountPointNodeID)
5153 {
5154 	ReadLocker nodeLocker(sVnodeLock);
5155 	ReadLocker mountLocker(sMountLock);
5156 
5157 	struct fs_mount* mount = find_mount(mountID);
5158 	if (mount == NULL)
5159 		return B_BAD_VALUE;
5160 
5161 	Vnode* mountPoint = mount->covers_vnode;
5162 
5163 	*_mountPointMountID = mountPoint->device;
5164 	*_mountPointNodeID = mountPoint->id;
5165 
5166 	return B_OK;
5167 }
5168 
5169 
5170 status_t
5171 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5172 	ino_t coveredNodeID)
5173 {
5174 	// get the vnodes
5175 	Vnode* vnode;
5176 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5177 	if (error != B_OK)
5178 		return B_BAD_VALUE;
5179 	VnodePutter vnodePutter(vnode);
5180 
5181 	Vnode* coveredVnode;
5182 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5183 		false);
5184 	if (error != B_OK)
5185 		return B_BAD_VALUE;
5186 	VnodePutter coveredVnodePutter(coveredVnode);
5187 
5188 	// establish the covered/covering links
5189 	WriteLocker locker(sVnodeLock);
5190 
5191 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5192 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5193 		return B_BUSY;
5194 	}
5195 
5196 	vnode->covers = coveredVnode;
5197 	vnode->SetCovering(true);
5198 
5199 	coveredVnode->covered_by = vnode;
5200 	coveredVnode->SetCovered(true);
5201 
5202 	// the vnodes do now reference each other
5203 	inc_vnode_ref_count(vnode);
5204 	inc_vnode_ref_count(coveredVnode);
5205 
5206 	return B_OK;
5207 }
5208 
5209 
5210 int
5211 vfs_getrlimit(int resource, struct rlimit* rlp)
5212 {
5213 	if (!rlp)
5214 		return B_BAD_ADDRESS;
5215 
5216 	switch (resource) {
5217 		case RLIMIT_NOFILE:
5218 		{
5219 			struct io_context* context = get_current_io_context(false);
5220 			MutexLocker _(context->io_mutex);
5221 
5222 			rlp->rlim_cur = context->table_size;
5223 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5224 			return 0;
5225 		}
5226 
5227 		case RLIMIT_NOVMON:
5228 		{
5229 			struct io_context* context = get_current_io_context(false);
5230 			MutexLocker _(context->io_mutex);
5231 
5232 			rlp->rlim_cur = context->max_monitors;
5233 			rlp->rlim_max = MAX_NODE_MONITORS;
5234 			return 0;
5235 		}
5236 
5237 		default:
5238 			return B_BAD_VALUE;
5239 	}
5240 }
5241 
5242 
5243 int
5244 vfs_setrlimit(int resource, const struct rlimit* rlp)
5245 {
5246 	if (!rlp)
5247 		return B_BAD_ADDRESS;
5248 
5249 	switch (resource) {
5250 		case RLIMIT_NOFILE:
5251 			/* TODO: check getuid() */
5252 			if (rlp->rlim_max != RLIM_SAVED_MAX
5253 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5254 				return B_NOT_ALLOWED;
5255 
5256 			return vfs_resize_fd_table(get_current_io_context(false),
5257 				rlp->rlim_cur);
5258 
5259 		case RLIMIT_NOVMON:
5260 			/* TODO: check getuid() */
5261 			if (rlp->rlim_max != RLIM_SAVED_MAX
5262 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5263 				return B_NOT_ALLOWED;
5264 
5265 			return resize_monitor_table(get_current_io_context(false),
5266 				rlp->rlim_cur);
5267 
5268 		default:
5269 			return B_BAD_VALUE;
5270 	}
5271 }
5272 
5273 
5274 status_t
5275 vfs_init(kernel_args* args)
5276 {
5277 	vnode::StaticInit();
5278 
5279 	sVnodeTable = new(std::nothrow) VnodeTable();
5280 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5281 		panic("vfs_init: error creating vnode hash table\n");
5282 
5283 	struct vnode dummy_vnode;
5284 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5285 
5286 	struct fs_mount dummyMount;
5287 	sMountsTable = new(std::nothrow) MountTable();
5288 	if (sMountsTable == NULL
5289 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5290 		panic("vfs_init: error creating mounts hash table\n");
5291 
5292 	sPathNameCache = create_object_cache("vfs path names",
5293 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5294 	if (sPathNameCache == NULL)
5295 		panic("vfs_init: error creating path name object_cache\n");
5296 
5297 	sVnodeCache = create_object_cache("vfs vnodes",
5298 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5299 	if (sVnodeCache == NULL)
5300 		panic("vfs_init: error creating vnode object_cache\n");
5301 
5302 	sFileDescriptorCache = create_object_cache("vfs fds",
5303 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5304 	if (sFileDescriptorCache == NULL)
5305 		panic("vfs_init: error creating file descriptor object_cache\n");
5306 
5307 	node_monitor_init();
5308 
5309 	sRoot = NULL;
5310 
5311 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5312 
5313 	if (block_cache_init() != B_OK)
5314 		return B_ERROR;
5315 
5316 #ifdef ADD_DEBUGGER_COMMANDS
5317 	// add some debugger commands
5318 	add_debugger_command_etc("vnode", &dump_vnode,
5319 		"Print info about the specified vnode",
5320 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5321 		"Prints information about the vnode specified by address <vnode> or\n"
5322 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5323 		"constructed and printed. It might not be possible to construct a\n"
5324 		"complete path, though.\n",
5325 		0);
5326 	add_debugger_command("vnodes", &dump_vnodes,
5327 		"list all vnodes (from the specified device)");
5328 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5329 		"list all vnode caches");
5330 	add_debugger_command("mount", &dump_mount,
5331 		"info about the specified fs_mount");
5332 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5333 	add_debugger_command("io_context", &dump_io_context,
5334 		"info about the I/O context");
5335 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5336 		"info about vnode usage");
5337 #endif
5338 
5339 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5340 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5341 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5342 		0);
5343 
5344 	fifo_init();
5345 	file_map_init();
5346 
5347 	return file_cache_init();
5348 }
5349 
5350 
5351 //	#pragma mark - fd_ops implementations
5352 
5353 
5354 /*!
5355 	Calls fs_open() on the given vnode and returns a new
5356 	file descriptor for it
5357 */
5358 static int
5359 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5360 {
5361 	void* cookie;
5362 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5363 	if (status != B_OK)
5364 		return status;
5365 
5366 	int fd = get_new_fd(&sFileOps, NULL, vnode, cookie, openMode, kernel);
5367 	if (fd < 0) {
5368 		FS_CALL(vnode, close, cookie);
5369 		FS_CALL(vnode, free_cookie, cookie);
5370 	}
5371 	return fd;
5372 }
5373 
5374 
5375 /*!
5376 	Calls fs_open() on the given vnode and returns a new
5377 	file descriptor for it
5378 */
5379 static int
5380 create_vnode(struct vnode* directory, const char* name, int openMode,
5381 	int perms, bool kernel)
5382 {
5383 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5384 	status_t status = B_ERROR;
5385 	VnodePutter vnode, dirPutter;
5386 	void* cookie;
5387 	ino_t newID;
5388 	char clonedName[B_FILE_NAME_LENGTH + 1];
5389 
5390 	// This is somewhat tricky: If the entry already exists, the FS responsible
5391 	// for the directory might not necessarily also be the one responsible for
5392 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5393 	// we can actually never call the create() hook without O_EXCL. Instead we
5394 	// try to look the entry up first. If it already exists, we just open the
5395 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5396 	// introduces a race condition, since someone else might have created the
5397 	// entry in the meantime. We hope the respective FS returns the correct
5398 	// error code and retry (up to 3 times) again.
5399 
5400 	for (int i = 0; i < 3 && status != B_OK; i++) {
5401 		bool create = false;
5402 
5403 		// look the node up
5404 		{
5405 			struct vnode* entry = NULL;
5406 			status = lookup_dir_entry(directory, name, &entry);
5407 			vnode.SetTo(entry);
5408 		}
5409 		if (status == B_OK) {
5410 			if ((openMode & O_EXCL) != 0)
5411 				return B_FILE_EXISTS;
5412 
5413 			// If the node is a symlink, we have to follow it, unless
5414 			// O_NOTRAVERSE is set.
5415 			if (S_ISLNK(vnode->Type()) && traverse) {
5416 				vnode.Unset();
5417 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5418 						>= B_FILE_NAME_LENGTH) {
5419 					return B_NAME_TOO_LONG;
5420 				}
5421 
5422 				inc_vnode_ref_count(directory);
5423 				dirPutter.Unset();
5424 				status = vnode_path_to_vnode(directory, clonedName, true,
5425 					kernel, vnode, NULL, clonedName);
5426 				if (status != B_OK) {
5427 					// vnode is not found, but maybe it has a parent and we can create it from
5428 					// there. In that case, vnode_path_to_vnode has set vnode to the latest
5429 					// directory found in the path
5430 					if (status == B_ENTRY_NOT_FOUND) {
5431 						directory = vnode.Detach();
5432 						dirPutter.SetTo(directory);
5433 						name = clonedName;
5434 						create = true;
5435 					} else
5436 						return status;
5437 				}
5438 			}
5439 
5440 			if (!create) {
5441 				if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5442 					return B_LINK_LIMIT;
5443 
5444 				int fd = open_vnode(vnode.Get(), openMode & ~O_CREAT, kernel);
5445 				// on success keep the vnode reference for the FD
5446 				if (fd >= 0)
5447 					vnode.Detach();
5448 
5449 				return fd;
5450 			}
5451 		}
5452 
5453 		// it doesn't exist yet -- try to create it
5454 
5455 		if (!HAS_FS_CALL(directory, create))
5456 			return B_READ_ONLY_DEVICE;
5457 
5458 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5459 			&cookie, &newID);
5460 		if (status != B_OK
5461 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5462 			return status;
5463 		}
5464 	}
5465 
5466 	if (status != B_OK)
5467 		return status;
5468 
5469 	// the node has been created successfully
5470 
5471 	rw_lock_read_lock(&sVnodeLock);
5472 	vnode.SetTo(lookup_vnode(directory->device, newID));
5473 	rw_lock_read_unlock(&sVnodeLock);
5474 
5475 	if (!vnode.IsSet()) {
5476 		panic("vfs: fs_create() returned success but there is no vnode, "
5477 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5478 		return B_BAD_VALUE;
5479 	}
5480 
5481 	int fd = get_new_fd(&sFileOps, NULL, vnode.Get(), cookie, openMode, kernel);
5482 	if (fd >= 0) {
5483 		vnode.Detach();
5484 		return fd;
5485 	}
5486 
5487 	status = fd;
5488 
5489 	// something went wrong, clean up
5490 
5491 	FS_CALL(vnode.Get(), close, cookie);
5492 	FS_CALL(vnode.Get(), free_cookie, cookie);
5493 
5494 	FS_CALL(directory, unlink, name);
5495 
5496 	return status;
5497 }
5498 
5499 
5500 /*! Calls fs open_dir() on the given vnode and returns a new
5501 	file descriptor for it
5502 */
5503 static int
5504 open_dir_vnode(struct vnode* vnode, bool kernel)
5505 {
5506 	if (!HAS_FS_CALL(vnode, open_dir))
5507 		return B_UNSUPPORTED;
5508 
5509 	void* cookie;
5510 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5511 	if (status != B_OK)
5512 		return status;
5513 
5514 	// directory is opened, create a fd
5515 	status = get_new_fd(&sDirectoryOps, NULL, vnode, cookie, O_CLOEXEC, kernel);
5516 	if (status >= 0)
5517 		return status;
5518 
5519 	FS_CALL(vnode, close_dir, cookie);
5520 	FS_CALL(vnode, free_dir_cookie, cookie);
5521 
5522 	return status;
5523 }
5524 
5525 
5526 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5527 	file descriptor for it.
5528 	Used by attr_dir_open(), and attr_dir_open_fd().
5529 */
5530 static int
5531 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5532 {
5533 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5534 		return B_UNSUPPORTED;
5535 
5536 	void* cookie;
5537 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5538 	if (status != B_OK)
5539 		return status;
5540 
5541 	// directory is opened, create a fd
5542 	status = get_new_fd(&sAttributeDirectoryOps, NULL, vnode, cookie, O_CLOEXEC,
5543 		kernel);
5544 	if (status >= 0)
5545 		return status;
5546 
5547 	FS_CALL(vnode, close_attr_dir, cookie);
5548 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5549 
5550 	return status;
5551 }
5552 
5553 
5554 static int
5555 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5556 	int openMode, int perms, bool kernel)
5557 {
5558 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5559 		"kernel %d\n", name, openMode, perms, kernel));
5560 
5561 	// get directory to put the new file in
5562 	struct vnode* directory;
5563 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5564 	if (status != B_OK)
5565 		return status;
5566 
5567 	status = create_vnode(directory, name, openMode, perms, kernel);
5568 	put_vnode(directory);
5569 
5570 	return status;
5571 }
5572 
5573 
5574 static int
5575 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5576 {
5577 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5578 		openMode, perms, kernel));
5579 
5580 	// get directory to put the new file in
5581 	char name[B_FILE_NAME_LENGTH];
5582 	VnodePutter directory;
5583 	status_t status = fd_and_path_to_dir_vnode(fd, path, directory, name,
5584 		kernel);
5585 	if (status < 0)
5586 		return status;
5587 
5588 	return create_vnode(directory.Get(), name, openMode, perms, kernel);
5589 }
5590 
5591 
5592 static int
5593 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5594 	int openMode, bool kernel)
5595 {
5596 	if (name == NULL || *name == '\0')
5597 		return B_BAD_VALUE;
5598 
5599 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5600 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5601 
5602 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5603 
5604 	// get the vnode matching the entry_ref
5605 	VnodePutter vnode;
5606 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5607 		kernel, vnode);
5608 	if (status != B_OK)
5609 		return status;
5610 
5611 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5612 		return B_LINK_LIMIT;
5613 
5614 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5615 	if (newFD >= 0) {
5616 		cache_node_opened(vnode.Get(), vnode->cache, mountID,
5617 			directoryID, vnode->id, name);
5618 
5619 		// The vnode reference has been transferred to the FD
5620 		vnode.Detach();
5621 	}
5622 
5623 	return newFD;
5624 }
5625 
5626 
5627 static int
5628 file_open(int fd, char* path, int openMode, bool kernel)
5629 {
5630 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5631 
5632 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5633 		fd, path, openMode, kernel));
5634 
5635 	// get the vnode matching the vnode + path combination
5636 	VnodePutter vnode;
5637 	ino_t parentID;
5638 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode,
5639 		&parentID, kernel);
5640 	if (status != B_OK)
5641 		return status;
5642 
5643 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5644 		return B_LINK_LIMIT;
5645 
5646 	// open the vnode
5647 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5648 	if (newFD >= 0) {
5649 		cache_node_opened(vnode.Get(), vnode->cache,
5650 			vnode->device, parentID, vnode->id, NULL);
5651 
5652 		// The vnode reference has been transferred to the FD
5653 		vnode.Detach();
5654 	}
5655 
5656 	return newFD;
5657 }
5658 
5659 
5660 static status_t
5661 file_close(struct file_descriptor* descriptor)
5662 {
5663 	struct vnode* vnode = descriptor->u.vnode;
5664 	status_t status = B_OK;
5665 
5666 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5667 
5668 	cache_node_closed(vnode, vnode->cache, vnode->device,
5669 		vnode->id);
5670 	if (HAS_FS_CALL(vnode, close)) {
5671 		status = FS_CALL(vnode, close, descriptor->cookie);
5672 	}
5673 
5674 	if (status == B_OK) {
5675 		// remove all outstanding locks for this team
5676 		if (HAS_FS_CALL(vnode, release_lock))
5677 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5678 		else
5679 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5680 	}
5681 	return status;
5682 }
5683 
5684 
5685 static void
5686 file_free_fd(struct file_descriptor* descriptor)
5687 {
5688 	struct vnode* vnode = descriptor->u.vnode;
5689 
5690 	if (vnode != NULL) {
5691 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5692 		put_vnode(vnode);
5693 	}
5694 }
5695 
5696 
5697 static status_t
5698 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5699 	size_t* length)
5700 {
5701 	struct vnode* vnode = descriptor->u.vnode;
5702 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5703 		pos, length, *length));
5704 
5705 	if (S_ISDIR(vnode->Type()))
5706 		return B_IS_A_DIRECTORY;
5707 	if (pos != -1 && descriptor->pos == -1)
5708 		return ESPIPE;
5709 
5710 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5711 }
5712 
5713 
5714 static status_t
5715 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5716 	size_t* length)
5717 {
5718 	struct vnode* vnode = descriptor->u.vnode;
5719 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5720 		length));
5721 
5722 	if (S_ISDIR(vnode->Type()))
5723 		return B_IS_A_DIRECTORY;
5724 	if (pos != -1 && descriptor->pos == -1)
5725 		return ESPIPE;
5726 
5727 	if (!HAS_FS_CALL(vnode, write))
5728 		return B_READ_ONLY_DEVICE;
5729 
5730 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5731 }
5732 
5733 
5734 static ssize_t
5735 file_vector_io(struct file_descriptor* descriptor, off_t pos,
5736 	const struct iovec *vecs, int count, bool write)
5737 {
5738 	struct vnode* vnode = descriptor->u.vnode;
5739 	if (pos != -1 && descriptor->pos == -1)
5740 		return ESPIPE;
5741 	if (S_ISDIR(vnode->Type()))
5742 		return B_IS_A_DIRECTORY;
5743 
5744 	if (pos == -1)
5745 		return B_UNSUPPORTED;
5746 	if (!HAS_FS_CALL(vnode, io))
5747 		return B_UNSUPPORTED;
5748 
5749 	// We can only perform real vectored I/O for vnodes that have no cache,
5750 	// because the I/O hook bypasses the cache entirely.
5751 	if (vnode->cache != NULL)
5752 		return B_UNSUPPORTED;
5753 
5754 	BStackOrHeapArray<generic_io_vec, 8> iovecs(count);
5755 	if (!iovecs.IsValid())
5756 		return B_NO_MEMORY;
5757 
5758 	generic_size_t length = 0;
5759 	for (int i = 0; i < count; i++) {
5760 		iovecs[i].base = (generic_addr_t)vecs[i].iov_base;
5761 		iovecs[i].length = vecs[i].iov_len;
5762 		length += vecs[i].iov_len;
5763 	}
5764 
5765 	status_t status = (write ? vfs_write_pages : vfs_read_pages)(vnode,
5766 		descriptor->cookie, pos, iovecs, count, 0, &length);
5767 	if (length > 0)
5768 		return length;
5769 	return status;
5770 }
5771 
5772 
5773 static ssize_t
5774 file_readv(struct file_descriptor* descriptor, off_t pos,
5775 	const struct iovec *vecs, int count)
5776 {
5777 	FUNCTION(("file_readv: pos %" B_PRIdOFF "\n", pos));
5778 	return file_vector_io(descriptor, pos, vecs, count, false);
5779 }
5780 
5781 
5782 static ssize_t
5783 file_writev(struct file_descriptor* descriptor, off_t pos,
5784 	const struct iovec *vecs, int count)
5785 {
5786 	FUNCTION(("file_writev: pos %" B_PRIdOFF "\n", pos));
5787 	return file_vector_io(descriptor, pos, vecs, count, true);
5788 }
5789 
5790 
5791 static off_t
5792 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5793 {
5794 	struct vnode* vnode = descriptor->u.vnode;
5795 	off_t offset;
5796 	bool isDevice = false;
5797 
5798 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5799 		seekType));
5800 
5801 	if (descriptor->pos == -1)
5802 		return ESPIPE;
5803 
5804 	switch (vnode->Type() & S_IFMT) {
5805 		// drivers publish block devices as chr, so pick both
5806 		case S_IFBLK:
5807 		case S_IFCHR:
5808 			isDevice = true;
5809 			break;
5810 	}
5811 
5812 	switch (seekType) {
5813 		case SEEK_SET:
5814 			offset = 0;
5815 			break;
5816 		case SEEK_CUR:
5817 			offset = descriptor->pos;
5818 			break;
5819 		case SEEK_END:
5820 		{
5821 			// stat() the node
5822 			if (!HAS_FS_CALL(vnode, read_stat))
5823 				return B_UNSUPPORTED;
5824 
5825 			struct stat stat;
5826 			status_t status = FS_CALL(vnode, read_stat, &stat);
5827 			if (status != B_OK)
5828 				return status;
5829 
5830 			offset = stat.st_size;
5831 
5832 			if (offset == 0 && isDevice) {
5833 				// stat() on regular drivers doesn't report size
5834 				device_geometry geometry;
5835 
5836 				if (HAS_FS_CALL(vnode, ioctl)) {
5837 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5838 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5839 					if (status == B_OK)
5840 						offset = (off_t)geometry.bytes_per_sector
5841 							* geometry.sectors_per_track
5842 							* geometry.cylinder_count
5843 							* geometry.head_count;
5844 				}
5845 			}
5846 
5847 			break;
5848 		}
5849 		case SEEK_DATA:
5850 		case SEEK_HOLE:
5851 		{
5852 			status_t status = B_BAD_VALUE;
5853 			if (HAS_FS_CALL(vnode, ioctl)) {
5854 				offset = pos;
5855 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5856 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5857 					&offset, sizeof(offset));
5858 				if (status == B_OK) {
5859 					if (offset > pos)
5860 						offset -= pos;
5861 					break;
5862 				}
5863 			}
5864 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5865 				return status;
5866 
5867 			// basic implementation with stat() the node
5868 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5869 				return B_BAD_VALUE;
5870 
5871 			struct stat stat;
5872 			status = FS_CALL(vnode, read_stat, &stat);
5873 			if (status != B_OK)
5874 				return status;
5875 
5876 			off_t end = stat.st_size;
5877 			if (pos >= end)
5878 				return ENXIO;
5879 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5880 			break;
5881 		}
5882 		default:
5883 			return B_BAD_VALUE;
5884 	}
5885 
5886 	// assumes off_t is 64 bits wide
5887 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5888 		return B_BUFFER_OVERFLOW;
5889 
5890 	pos += offset;
5891 	if (pos < 0)
5892 		return B_BAD_VALUE;
5893 
5894 	return descriptor->pos = pos;
5895 }
5896 
5897 
5898 static status_t
5899 file_select(struct file_descriptor* descriptor, uint8 event,
5900 	struct selectsync* sync)
5901 {
5902 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5903 
5904 	struct vnode* vnode = descriptor->u.vnode;
5905 
5906 	// If the FS has no select() hook, notify select() now.
5907 	if (!HAS_FS_CALL(vnode, select)) {
5908 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5909 			notify_select_event(sync, event);
5910 		return B_UNSUPPORTED;
5911 	}
5912 
5913 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5914 }
5915 
5916 
5917 static status_t
5918 file_deselect(struct file_descriptor* descriptor, uint8 event,
5919 	struct selectsync* sync)
5920 {
5921 	struct vnode* vnode = descriptor->u.vnode;
5922 
5923 	if (!HAS_FS_CALL(vnode, deselect))
5924 		return B_OK;
5925 
5926 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5927 }
5928 
5929 
5930 static status_t
5931 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5932 	bool kernel)
5933 {
5934 	struct vnode* vnode;
5935 	status_t status;
5936 
5937 	if (name == NULL || *name == '\0')
5938 		return B_BAD_VALUE;
5939 
5940 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5941 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5942 
5943 	status = get_vnode(mountID, parentID, &vnode, true, false);
5944 	if (status != B_OK)
5945 		return status;
5946 
5947 	if (HAS_FS_CALL(vnode, create_dir))
5948 		status = FS_CALL(vnode, create_dir, name, perms);
5949 	else
5950 		status = B_READ_ONLY_DEVICE;
5951 
5952 	put_vnode(vnode);
5953 	return status;
5954 }
5955 
5956 
5957 static status_t
5958 dir_create(int fd, char* path, int perms, bool kernel)
5959 {
5960 	char filename[B_FILE_NAME_LENGTH];
5961 	status_t status;
5962 
5963 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5964 		kernel));
5965 
5966 	VnodePutter vnode;
5967 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
5968 	if (status < 0)
5969 		return status;
5970 
5971 	if (HAS_FS_CALL(vnode, create_dir)) {
5972 		status = FS_CALL(vnode.Get(), create_dir, filename, perms);
5973 	} else
5974 		status = B_READ_ONLY_DEVICE;
5975 
5976 	return status;
5977 }
5978 
5979 
5980 static int
5981 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5982 {
5983 	FUNCTION(("dir_open_entry_ref()\n"));
5984 
5985 	if (name && name[0] == '\0')
5986 		return B_BAD_VALUE;
5987 
5988 	// get the vnode matching the entry_ref/node_ref
5989 	VnodePutter vnode;
5990 	status_t status;
5991 	if (name) {
5992 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5993 			vnode);
5994 	} else {
5995 		struct vnode* temp = NULL;
5996 		status = get_vnode(mountID, parentID, &temp, true, false);
5997 		vnode.SetTo(temp);
5998 	}
5999 	if (status != B_OK)
6000 		return status;
6001 
6002 	int newFD = open_dir_vnode(vnode.Get(), kernel);
6003 	if (newFD >= 0) {
6004 		cache_node_opened(vnode.Get(), vnode->cache, mountID, parentID,
6005 			vnode->id, name);
6006 
6007 		// The vnode reference has been transferred to the FD
6008 		vnode.Detach();
6009 	}
6010 
6011 	return newFD;
6012 }
6013 
6014 
6015 static int
6016 dir_open(int fd, char* path, bool kernel)
6017 {
6018 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
6019 		kernel));
6020 
6021 	// get the vnode matching the vnode + path combination
6022 	VnodePutter vnode;
6023 	ino_t parentID;
6024 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, &parentID,
6025 		kernel);
6026 	if (status != B_OK)
6027 		return status;
6028 
6029 	// open the dir
6030 	int newFD = open_dir_vnode(vnode.Get(), kernel);
6031 	if (newFD >= 0) {
6032 		cache_node_opened(vnode.Get(), vnode->cache, vnode->device,
6033 			parentID, vnode->id, NULL);
6034 
6035 		// The vnode reference has been transferred to the FD
6036 		vnode.Detach();
6037 	}
6038 
6039 	return newFD;
6040 }
6041 
6042 
6043 static status_t
6044 dir_close(struct file_descriptor* descriptor)
6045 {
6046 	struct vnode* vnode = descriptor->u.vnode;
6047 
6048 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6049 
6050 	cache_node_closed(vnode, vnode->cache, vnode->device,
6051 		vnode->id);
6052 	if (HAS_FS_CALL(vnode, close_dir))
6053 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6054 
6055 	return B_OK;
6056 }
6057 
6058 
6059 static void
6060 dir_free_fd(struct file_descriptor* descriptor)
6061 {
6062 	struct vnode* vnode = descriptor->u.vnode;
6063 
6064 	if (vnode != NULL) {
6065 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6066 		put_vnode(vnode);
6067 	}
6068 }
6069 
6070 
6071 static status_t
6072 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6073 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6074 {
6075 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6076 		bufferSize, _count);
6077 }
6078 
6079 
6080 static status_t
6081 fix_dirent(struct vnode* parent, struct dirent* entry,
6082 	struct io_context* ioContext)
6083 {
6084 	// set d_pdev and d_pino
6085 	entry->d_pdev = parent->device;
6086 	entry->d_pino = parent->id;
6087 
6088 	// If this is the ".." entry and the directory covering another vnode,
6089 	// we need to replace d_dev and d_ino with the actual values.
6090 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6091 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6092 			ioContext);
6093 	}
6094 
6095 	// resolve covered vnodes
6096 	ReadLocker _(&sVnodeLock);
6097 
6098 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6099 	if (vnode != NULL && vnode->covered_by != NULL) {
6100 		do {
6101 			vnode = vnode->covered_by;
6102 		} while (vnode->covered_by != NULL);
6103 
6104 		entry->d_dev = vnode->device;
6105 		entry->d_ino = vnode->id;
6106 	}
6107 
6108 	return B_OK;
6109 }
6110 
6111 
6112 static status_t
6113 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6114 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6115 {
6116 	if (!HAS_FS_CALL(vnode, read_dir))
6117 		return B_UNSUPPORTED;
6118 
6119 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6120 		_count);
6121 	if (error != B_OK)
6122 		return error;
6123 
6124 	// we need to adjust the read dirents
6125 	uint32 count = *_count;
6126 	for (uint32 i = 0; i < count; i++) {
6127 		error = fix_dirent(vnode, buffer, ioContext);
6128 		if (error != B_OK)
6129 			return error;
6130 
6131 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6132 	}
6133 
6134 	return error;
6135 }
6136 
6137 
6138 static status_t
6139 dir_rewind(struct file_descriptor* descriptor)
6140 {
6141 	struct vnode* vnode = descriptor->u.vnode;
6142 
6143 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6144 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6145 	}
6146 
6147 	return B_UNSUPPORTED;
6148 }
6149 
6150 
6151 static status_t
6152 dir_remove(int fd, char* path, bool kernel)
6153 {
6154 	char name[B_FILE_NAME_LENGTH];
6155 	status_t status;
6156 
6157 	if (path != NULL) {
6158 		// we need to make sure our path name doesn't stop with "/", ".",
6159 		// or ".."
6160 		char* lastSlash;
6161 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6162 			char* leaf = lastSlash + 1;
6163 			if (!strcmp(leaf, ".."))
6164 				return B_NOT_ALLOWED;
6165 
6166 			// omit multiple slashes
6167 			while (lastSlash > path && lastSlash[-1] == '/')
6168 				lastSlash--;
6169 
6170 			if (leaf[0]
6171 				&& strcmp(leaf, ".")) {
6172 				break;
6173 			}
6174 			// "name/" -> "name", or "name/." -> "name"
6175 			lastSlash[0] = '\0';
6176 		}
6177 
6178 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6179 			return B_NOT_ALLOWED;
6180 	}
6181 
6182 	VnodePutter directory;
6183 	status = fd_and_path_to_dir_vnode(fd, path, directory, name, kernel);
6184 	if (status != B_OK)
6185 		return status;
6186 
6187 	if (HAS_FS_CALL(directory, remove_dir))
6188 		status = FS_CALL(directory.Get(), remove_dir, name);
6189 	else
6190 		status = B_READ_ONLY_DEVICE;
6191 
6192 	return status;
6193 }
6194 
6195 
6196 static status_t
6197 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6198 	size_t length)
6199 {
6200 	struct vnode* vnode = descriptor->u.vnode;
6201 
6202 	if (HAS_FS_CALL(vnode, ioctl))
6203 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6204 
6205 	return B_DEV_INVALID_IOCTL;
6206 }
6207 
6208 
6209 static status_t
6210 common_fcntl(int fd, int op, size_t argument, bool kernel)
6211 {
6212 	struct flock flock;
6213 
6214 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6215 		fd, op, argument, kernel ? "kernel" : "user"));
6216 
6217 	struct io_context* context = get_current_io_context(kernel);
6218 
6219 	FileDescriptorPutter descriptor(get_fd(context, fd));
6220 	if (!descriptor.IsSet())
6221 		return B_FILE_ERROR;
6222 
6223 	struct vnode* vnode = fd_vnode(descriptor.Get());
6224 
6225 	status_t status = B_OK;
6226 
6227 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6228 		if (descriptor->ops != &sFileOps)
6229 			status = B_BAD_VALUE;
6230 		else if (kernel)
6231 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6232 		else if (user_memcpy(&flock, (struct flock*)argument,
6233 				sizeof(struct flock)) != B_OK)
6234 			status = B_BAD_ADDRESS;
6235 		if (status != B_OK)
6236 			return status;
6237 	}
6238 
6239 	switch (op) {
6240 		case F_SETFD:
6241 		{
6242 			// Set file descriptor flags
6243 
6244 			// O_CLOEXEC is the only flag available at this time
6245 			mutex_lock(&context->io_mutex);
6246 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6247 			mutex_unlock(&context->io_mutex);
6248 
6249 			status = B_OK;
6250 			break;
6251 		}
6252 
6253 		case F_GETFD:
6254 		{
6255 			// Get file descriptor flags
6256 			mutex_lock(&context->io_mutex);
6257 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6258 			mutex_unlock(&context->io_mutex);
6259 			break;
6260 		}
6261 
6262 		case F_SETFL:
6263 		{
6264 			// Set file descriptor open mode
6265 
6266 			// we only accept changes to certain flags
6267 			const int32 modifiableFlags = O_APPEND | O_NONBLOCK;
6268 			argument &= modifiableFlags;
6269 
6270 			if (descriptor->ops->fd_set_flags != NULL) {
6271 				status = descriptor->ops->fd_set_flags(descriptor.Get(), argument);
6272 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6273 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6274 					(int)argument);
6275 			} else
6276 				status = B_UNSUPPORTED;
6277 
6278 			if (status == B_OK) {
6279 				// update this descriptor's open_mode field
6280 				descriptor->open_mode = (descriptor->open_mode
6281 					& ~modifiableFlags) | argument;
6282 			}
6283 
6284 			break;
6285 		}
6286 
6287 		case F_GETFL:
6288 			// Get file descriptor open mode
6289 			status = descriptor->open_mode;
6290 			break;
6291 
6292 		case F_DUPFD:
6293 		case F_DUPFD_CLOEXEC:
6294 		{
6295 			status = new_fd_etc(context, descriptor.Get(), (int)argument);
6296 			if (status >= 0) {
6297 				mutex_lock(&context->io_mutex);
6298 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6299 				mutex_unlock(&context->io_mutex);
6300 
6301 				atomic_add(&descriptor->ref_count, 1);
6302 			}
6303 			break;
6304 		}
6305 
6306 		case F_GETLK:
6307 			if (vnode != NULL) {
6308 				struct flock normalizedLock;
6309 
6310 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6311 				status = normalize_flock(descriptor.Get(), &normalizedLock);
6312 				if (status != B_OK)
6313 					break;
6314 
6315 				if (HAS_FS_CALL(vnode, test_lock)) {
6316 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6317 						&normalizedLock);
6318 				} else
6319 					status = test_advisory_lock(vnode, &normalizedLock);
6320 				if (status == B_OK) {
6321 					if (normalizedLock.l_type == F_UNLCK) {
6322 						// no conflicting lock found, copy back the same struct
6323 						// we were given except change type to F_UNLCK
6324 						flock.l_type = F_UNLCK;
6325 						if (kernel) {
6326 							memcpy((struct flock*)argument, &flock,
6327 								sizeof(struct flock));
6328 						} else {
6329 							status = user_memcpy((struct flock*)argument,
6330 								&flock, sizeof(struct flock));
6331 						}
6332 					} else {
6333 						// a conflicting lock was found, copy back its range and
6334 						// type
6335 						if (normalizedLock.l_len == OFF_MAX)
6336 							normalizedLock.l_len = 0;
6337 
6338 						if (kernel) {
6339 							memcpy((struct flock*)argument,
6340 								&normalizedLock, sizeof(struct flock));
6341 						} else {
6342 							status = user_memcpy((struct flock*)argument,
6343 								&normalizedLock, sizeof(struct flock));
6344 						}
6345 					}
6346 				}
6347 			} else
6348 				status = B_BAD_VALUE;
6349 			break;
6350 
6351 		case F_SETLK:
6352 		case F_SETLKW:
6353 			status = normalize_flock(descriptor.Get(), &flock);
6354 			if (status != B_OK)
6355 				break;
6356 
6357 			if (vnode == NULL) {
6358 				status = B_BAD_VALUE;
6359 			} else if (flock.l_type == F_UNLCK) {
6360 				if (HAS_FS_CALL(vnode, release_lock)) {
6361 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6362 						&flock);
6363 				} else {
6364 					status = release_advisory_lock(vnode, context, NULL,
6365 						&flock);
6366 				}
6367 			} else {
6368 				// the open mode must match the lock type
6369 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6370 						&& flock.l_type == F_WRLCK)
6371 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6372 						&& flock.l_type == F_RDLCK))
6373 					status = B_FILE_ERROR;
6374 				else {
6375 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6376 						status = FS_CALL(vnode, acquire_lock,
6377 							descriptor->cookie, &flock, op == F_SETLKW);
6378 					} else {
6379 						status = acquire_advisory_lock(vnode, context, NULL,
6380 							&flock, op == F_SETLKW);
6381 					}
6382 				}
6383 			}
6384 			break;
6385 
6386 		// ToDo: add support for more ops?
6387 
6388 		default:
6389 			status = B_BAD_VALUE;
6390 	}
6391 
6392 	return status;
6393 }
6394 
6395 
6396 static status_t
6397 common_sync(int fd, bool kernel)
6398 {
6399 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6400 
6401 	struct vnode* vnode;
6402 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6403 	if (!descriptor.IsSet())
6404 		return B_FILE_ERROR;
6405 
6406 	status_t status;
6407 	if (HAS_FS_CALL(vnode, fsync))
6408 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6409 	else
6410 		status = B_UNSUPPORTED;
6411 
6412 	return status;
6413 }
6414 
6415 
6416 static status_t
6417 common_lock_node(int fd, bool kernel)
6418 {
6419 	struct vnode* vnode;
6420 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6421 	if (!descriptor.IsSet())
6422 		return B_FILE_ERROR;
6423 
6424 	status_t status = B_OK;
6425 
6426 	// We need to set the locking atomically - someone
6427 	// else might set one at the same time
6428 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6429 			descriptor.Get(), (file_descriptor*)NULL) != NULL)
6430 		status = B_BUSY;
6431 
6432 	return status;
6433 }
6434 
6435 
6436 static status_t
6437 common_unlock_node(int fd, bool kernel)
6438 {
6439 	struct vnode* vnode;
6440 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6441 	if (!descriptor.IsSet())
6442 		return B_FILE_ERROR;
6443 
6444 	status_t status = B_OK;
6445 
6446 	// We need to set the locking atomically - someone
6447 	// else might set one at the same time
6448 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6449 			(file_descriptor*)NULL, descriptor.Get()) != descriptor.Get())
6450 		status = B_BAD_VALUE;
6451 
6452 	return status;
6453 }
6454 
6455 
6456 static status_t
6457 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6458 {
6459 	if (offset < 0 || length == 0)
6460 		return B_BAD_VALUE;
6461 	if (offset > OFF_MAX - length)
6462 		return B_FILE_TOO_LARGE;
6463 
6464 	struct vnode* vnode;
6465 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6466 	if (!descriptor.IsSet() || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6467 		return B_FILE_ERROR;
6468 
6469 	switch (vnode->Type() & S_IFMT) {
6470 		case S_IFIFO:
6471 		case S_IFSOCK:
6472 			return ESPIPE;
6473 
6474 		case S_IFBLK:
6475 		case S_IFCHR:
6476 		case S_IFDIR:
6477 		case S_IFLNK:
6478 			return B_DEVICE_NOT_FOUND;
6479 
6480 		case S_IFREG:
6481 			break;
6482 	}
6483 
6484 	status_t status = B_OK;
6485 	if (HAS_FS_CALL(vnode, preallocate)) {
6486 		status = FS_CALL(vnode, preallocate, offset, length);
6487 	} else {
6488 		status = HAS_FS_CALL(vnode, write)
6489 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6490 	}
6491 
6492 	return status;
6493 }
6494 
6495 
6496 static status_t
6497 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6498 	bool kernel)
6499 {
6500 	VnodePutter vnode;
6501 	status_t status;
6502 
6503 	status = fd_and_path_to_vnode(fd, path, false, vnode, NULL, kernel);
6504 	if (status != B_OK)
6505 		return status;
6506 
6507 	if (HAS_FS_CALL(vnode, read_symlink)) {
6508 		status = FS_CALL(vnode.Get(), read_symlink, buffer, _bufferSize);
6509 	} else
6510 		status = B_BAD_VALUE;
6511 
6512 	return status;
6513 }
6514 
6515 
6516 static status_t
6517 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6518 	bool kernel)
6519 {
6520 	// path validity checks have to be in the calling function!
6521 	char name[B_FILE_NAME_LENGTH];
6522 	status_t status;
6523 
6524 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6525 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6526 
6527 	VnodePutter vnode;
6528 	status = fd_and_path_to_dir_vnode(fd, path, vnode, name, kernel);
6529 	if (status != B_OK)
6530 		return status;
6531 
6532 	if (HAS_FS_CALL(vnode, create_symlink))
6533 		status = FS_CALL(vnode.Get(), create_symlink, name, toPath, mode);
6534 	else {
6535 		status = HAS_FS_CALL(vnode, write)
6536 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6537 	}
6538 
6539 	return status;
6540 }
6541 
6542 
6543 static status_t
6544 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6545 	bool traverseLeafLink, bool kernel)
6546 {
6547 	// path validity checks have to be in the calling function!
6548 
6549 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6550 		toPath, kernel));
6551 
6552 	char name[B_FILE_NAME_LENGTH];
6553 	VnodePutter directory;
6554 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, directory, name,
6555 		kernel);
6556 	if (status != B_OK)
6557 		return status;
6558 
6559 	VnodePutter vnode;
6560 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, vnode, NULL,
6561 		kernel);
6562 	if (status != B_OK)
6563 		return status;
6564 
6565 	if (directory->mount != vnode->mount)
6566 		return B_CROSS_DEVICE_LINK;
6567 
6568 	if (HAS_FS_CALL(directory, link))
6569 		status = FS_CALL(directory.Get(), link, name, vnode.Get());
6570 	else
6571 		status = B_READ_ONLY_DEVICE;
6572 
6573 	return status;
6574 }
6575 
6576 
6577 static status_t
6578 common_unlink(int fd, char* path, bool kernel)
6579 {
6580 	char filename[B_FILE_NAME_LENGTH];
6581 	status_t status;
6582 
6583 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6584 		kernel));
6585 
6586 	VnodePutter vnode;
6587 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
6588 	if (status < 0)
6589 		return status;
6590 
6591 	if (HAS_FS_CALL(vnode, unlink))
6592 		status = FS_CALL(vnode.Get(), unlink, filename);
6593 	else
6594 		status = B_READ_ONLY_DEVICE;
6595 
6596 	return status;
6597 }
6598 
6599 
6600 static status_t
6601 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6602 {
6603 	status_t status;
6604 
6605 	// TODO: honor effectiveUserGroup argument
6606 
6607 	VnodePutter vnode;
6608 	status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
6609 	if (status != B_OK)
6610 		return status;
6611 
6612 	if (HAS_FS_CALL(vnode, access))
6613 		status = FS_CALL(vnode.Get(), access, mode);
6614 	else
6615 		status = B_OK;
6616 
6617 	return status;
6618 }
6619 
6620 
6621 static status_t
6622 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6623 {
6624 	status_t status;
6625 
6626 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6627 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6628 
6629 	VnodePutter fromVnode;
6630 	char fromName[B_FILE_NAME_LENGTH];
6631 	status = fd_and_path_to_dir_vnode(fd, path, fromVnode, fromName, kernel);
6632 	if (status != B_OK)
6633 		return status;
6634 
6635 	VnodePutter toVnode;
6636 	char toName[B_FILE_NAME_LENGTH];
6637 	status = fd_and_path_to_dir_vnode(newFD, newPath, toVnode, toName, kernel);
6638 	if (status != B_OK)
6639 		return status;
6640 
6641 	if (fromVnode->device != toVnode->device)
6642 		return B_CROSS_DEVICE_LINK;
6643 
6644 	if (fromVnode.Get() == toVnode.Get() && !strcmp(fromName, toName))
6645 		return B_OK;
6646 
6647 	if (fromName[0] == '\0' || toName[0] == '\0'
6648 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6649 		|| !strcmp(toName, ".") || !strcmp(toName, "..")) {
6650 		return B_BAD_VALUE;
6651 	}
6652 
6653 	if (HAS_FS_CALL(fromVnode, rename))
6654 		status = FS_CALL(fromVnode.Get(), rename, fromName, toVnode.Get(), toName);
6655 	else
6656 		status = B_READ_ONLY_DEVICE;
6657 
6658 	return status;
6659 }
6660 
6661 
6662 static status_t
6663 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6664 {
6665 	struct vnode* vnode = descriptor->u.vnode;
6666 
6667 	FUNCTION(("common_read_stat: stat %p\n", stat));
6668 
6669 	// TODO: remove this once all file systems properly set them!
6670 	stat->st_crtim.tv_nsec = 0;
6671 	stat->st_ctim.tv_nsec = 0;
6672 	stat->st_mtim.tv_nsec = 0;
6673 	stat->st_atim.tv_nsec = 0;
6674 
6675 	return vfs_stat_vnode(vnode, stat);
6676 }
6677 
6678 
6679 static status_t
6680 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6681 	int statMask)
6682 {
6683 	struct vnode* vnode = descriptor->u.vnode;
6684 
6685 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6686 		vnode, stat, statMask));
6687 
6688 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6689 		&& (statMask & B_STAT_SIZE) != 0) {
6690 		return B_BAD_VALUE;
6691 	}
6692 
6693 	if (!HAS_FS_CALL(vnode, write_stat))
6694 		return B_READ_ONLY_DEVICE;
6695 
6696 	return FS_CALL(vnode, write_stat, stat, statMask);
6697 }
6698 
6699 
6700 static status_t
6701 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6702 	struct stat* stat, bool kernel)
6703 {
6704 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6705 		stat));
6706 
6707 	VnodePutter vnode;
6708 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6709 		NULL, kernel);
6710 	if (status != B_OK)
6711 		return status;
6712 
6713 	status = vfs_stat_vnode(vnode.Get(), stat);
6714 
6715 	return status;
6716 }
6717 
6718 
6719 static status_t
6720 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6721 	const struct stat* stat, int statMask, bool kernel)
6722 {
6723 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6724 		"kernel %d\n", fd, path, stat, statMask, kernel));
6725 
6726 	VnodePutter vnode;
6727 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6728 		NULL, kernel);
6729 	if (status != B_OK)
6730 		return status;
6731 
6732 	if (HAS_FS_CALL(vnode, write_stat))
6733 		status = FS_CALL(vnode.Get(), write_stat, stat, statMask);
6734 	else
6735 		status = B_READ_ONLY_DEVICE;
6736 
6737 	return status;
6738 }
6739 
6740 
6741 static int
6742 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6743 {
6744 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6745 		kernel));
6746 
6747 	VnodePutter vnode;
6748 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6749 		NULL, kernel);
6750 	if (status != B_OK)
6751 		return status;
6752 
6753 	status = open_attr_dir_vnode(vnode.Get(), kernel);
6754 	if (status >= 0)
6755 		vnode.Detach();
6756 
6757 	return status;
6758 }
6759 
6760 
6761 static status_t
6762 attr_dir_close(struct file_descriptor* descriptor)
6763 {
6764 	struct vnode* vnode = descriptor->u.vnode;
6765 
6766 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6767 
6768 	if (HAS_FS_CALL(vnode, close_attr_dir))
6769 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6770 
6771 	return B_OK;
6772 }
6773 
6774 
6775 static void
6776 attr_dir_free_fd(struct file_descriptor* descriptor)
6777 {
6778 	struct vnode* vnode = descriptor->u.vnode;
6779 
6780 	if (vnode != NULL) {
6781 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6782 		put_vnode(vnode);
6783 	}
6784 }
6785 
6786 
6787 static status_t
6788 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6789 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6790 {
6791 	struct vnode* vnode = descriptor->u.vnode;
6792 
6793 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6794 
6795 	if (HAS_FS_CALL(vnode, read_attr_dir))
6796 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6797 			bufferSize, _count);
6798 
6799 	return B_UNSUPPORTED;
6800 }
6801 
6802 
6803 static status_t
6804 attr_dir_rewind(struct file_descriptor* descriptor)
6805 {
6806 	struct vnode* vnode = descriptor->u.vnode;
6807 
6808 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6809 
6810 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6811 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6812 
6813 	return B_UNSUPPORTED;
6814 }
6815 
6816 
6817 static int
6818 attr_create(int fd, char* path, const char* name, uint32 type,
6819 	int openMode, bool kernel)
6820 {
6821 	if (name == NULL || *name == '\0')
6822 		return B_BAD_VALUE;
6823 
6824 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6825 	VnodePutter vnode;
6826 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6827 		kernel);
6828 	if (status != B_OK)
6829 		return status;
6830 
6831 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6832 		return B_LINK_LIMIT;
6833 
6834 	if (!HAS_FS_CALL(vnode, create_attr))
6835 		return B_READ_ONLY_DEVICE;
6836 
6837 	void* cookie;
6838 	status = FS_CALL(vnode.Get(), create_attr, name, type, openMode, &cookie);
6839 	if (status != B_OK)
6840 		return status;
6841 
6842 	fd = get_new_fd(&sAttributeOps, NULL, vnode.Get(), cookie, openMode, kernel);
6843 	if (fd >= 0) {
6844 		vnode.Detach();
6845 		return fd;
6846 	}
6847 
6848 	status = fd;
6849 
6850 	FS_CALL(vnode.Get(), close_attr, cookie);
6851 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6852 
6853 	FS_CALL(vnode.Get(), remove_attr, name);
6854 
6855 	return status;
6856 }
6857 
6858 
6859 static int
6860 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6861 {
6862 	if (name == NULL || *name == '\0')
6863 		return B_BAD_VALUE;
6864 
6865 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6866 	VnodePutter vnode;
6867 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6868 		kernel);
6869 	if (status != B_OK)
6870 		return status;
6871 
6872 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6873 		return B_LINK_LIMIT;
6874 
6875 	if (!HAS_FS_CALL(vnode, open_attr))
6876 		return B_UNSUPPORTED;
6877 
6878 	void* cookie;
6879 	status = FS_CALL(vnode.Get(), open_attr, name, openMode, &cookie);
6880 	if (status != B_OK)
6881 		return status;
6882 
6883 	// now we only need a file descriptor for this attribute and we're done
6884 	fd = get_new_fd(&sAttributeOps, NULL, vnode.Get(), cookie, openMode, kernel);
6885 	if (fd >= 0) {
6886 		vnode.Detach();
6887 		return fd;
6888 	}
6889 
6890 	status = fd;
6891 
6892 	FS_CALL(vnode.Get(), close_attr, cookie);
6893 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6894 
6895 	return status;
6896 }
6897 
6898 
6899 static status_t
6900 attr_close(struct file_descriptor* descriptor)
6901 {
6902 	struct vnode* vnode = descriptor->u.vnode;
6903 
6904 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6905 
6906 	if (HAS_FS_CALL(vnode, close_attr))
6907 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6908 
6909 	return B_OK;
6910 }
6911 
6912 
6913 static void
6914 attr_free_fd(struct file_descriptor* descriptor)
6915 {
6916 	struct vnode* vnode = descriptor->u.vnode;
6917 
6918 	if (vnode != NULL) {
6919 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6920 		put_vnode(vnode);
6921 	}
6922 }
6923 
6924 
6925 static status_t
6926 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6927 	size_t* length)
6928 {
6929 	struct vnode* vnode = descriptor->u.vnode;
6930 
6931 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6932 		pos, length, *length));
6933 
6934 	if (!HAS_FS_CALL(vnode, read_attr))
6935 		return B_UNSUPPORTED;
6936 
6937 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6938 }
6939 
6940 
6941 static status_t
6942 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6943 	size_t* length)
6944 {
6945 	struct vnode* vnode = descriptor->u.vnode;
6946 
6947 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6948 		length));
6949 
6950 	if (!HAS_FS_CALL(vnode, write_attr))
6951 		return B_UNSUPPORTED;
6952 
6953 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6954 }
6955 
6956 
6957 static off_t
6958 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6959 {
6960 	off_t offset;
6961 
6962 	switch (seekType) {
6963 		case SEEK_SET:
6964 			offset = 0;
6965 			break;
6966 		case SEEK_CUR:
6967 			offset = descriptor->pos;
6968 			break;
6969 		case SEEK_END:
6970 		{
6971 			struct vnode* vnode = descriptor->u.vnode;
6972 			if (!HAS_FS_CALL(vnode, read_stat))
6973 				return B_UNSUPPORTED;
6974 
6975 			struct stat stat;
6976 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6977 				&stat);
6978 			if (status != B_OK)
6979 				return status;
6980 
6981 			offset = stat.st_size;
6982 			break;
6983 		}
6984 		default:
6985 			return B_BAD_VALUE;
6986 	}
6987 
6988 	// assumes off_t is 64 bits wide
6989 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6990 		return B_BUFFER_OVERFLOW;
6991 
6992 	pos += offset;
6993 	if (pos < 0)
6994 		return B_BAD_VALUE;
6995 
6996 	return descriptor->pos = pos;
6997 }
6998 
6999 
7000 static status_t
7001 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7002 {
7003 	struct vnode* vnode = descriptor->u.vnode;
7004 
7005 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7006 
7007 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7008 		return B_UNSUPPORTED;
7009 
7010 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7011 }
7012 
7013 
7014 static status_t
7015 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7016 	int statMask)
7017 {
7018 	struct vnode* vnode = descriptor->u.vnode;
7019 
7020 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7021 
7022 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7023 		return B_READ_ONLY_DEVICE;
7024 
7025 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7026 }
7027 
7028 
7029 static status_t
7030 attr_remove(int fd, const char* name, bool kernel)
7031 {
7032 	if (name == NULL || *name == '\0')
7033 		return B_BAD_VALUE;
7034 
7035 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7036 		kernel));
7037 
7038 	struct vnode* vnode;
7039 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
7040 	if (!descriptor.IsSet())
7041 		return B_FILE_ERROR;
7042 
7043 	status_t status;
7044 	if (HAS_FS_CALL(vnode, remove_attr))
7045 		status = FS_CALL(vnode, remove_attr, name);
7046 	else
7047 		status = B_READ_ONLY_DEVICE;
7048 
7049 	return status;
7050 }
7051 
7052 
7053 static status_t
7054 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7055 	bool kernel)
7056 {
7057 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7058 		|| *toName == '\0')
7059 		return B_BAD_VALUE;
7060 
7061 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7062 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7063 
7064 	struct vnode* fromVnode;
7065 	FileDescriptorPutter fromDescriptor(get_fd_and_vnode(fromFD, &fromVnode, kernel));
7066 	if (!fromDescriptor.IsSet())
7067 		return B_FILE_ERROR;
7068 
7069 	struct vnode* toVnode;
7070 	FileDescriptorPutter toDescriptor(get_fd_and_vnode(toFD, &toVnode, kernel));
7071 	if (!toDescriptor.IsSet())
7072 		return B_FILE_ERROR;
7073 
7074 	// are the files on the same volume?
7075 	if (fromVnode->device != toVnode->device)
7076 		return B_CROSS_DEVICE_LINK;
7077 
7078 	status_t status;
7079 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7080 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7081 	} else
7082 		status = B_READ_ONLY_DEVICE;
7083 
7084 	return status;
7085 }
7086 
7087 
7088 static int
7089 index_dir_open(dev_t mountID, bool kernel)
7090 {
7091 	struct fs_mount* mount;
7092 	void* cookie;
7093 
7094 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7095 		kernel));
7096 
7097 	status_t status = get_mount(mountID, &mount);
7098 	if (status != B_OK)
7099 		return status;
7100 
7101 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7102 		status = B_UNSUPPORTED;
7103 		goto error;
7104 	}
7105 
7106 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7107 	if (status != B_OK)
7108 		goto error;
7109 
7110 	// get fd for the index directory
7111 	int fd;
7112 	fd = get_new_fd(&sIndexDirectoryOps, mount, NULL, cookie, O_CLOEXEC, kernel);
7113 	if (fd >= 0)
7114 		return fd;
7115 
7116 	// something went wrong
7117 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7118 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7119 
7120 	status = fd;
7121 
7122 error:
7123 	put_mount(mount);
7124 	return status;
7125 }
7126 
7127 
7128 static status_t
7129 index_dir_close(struct file_descriptor* descriptor)
7130 {
7131 	struct fs_mount* mount = descriptor->u.mount;
7132 
7133 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7134 
7135 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7136 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7137 
7138 	return B_OK;
7139 }
7140 
7141 
7142 static void
7143 index_dir_free_fd(struct file_descriptor* descriptor)
7144 {
7145 	struct fs_mount* mount = descriptor->u.mount;
7146 
7147 	if (mount != NULL) {
7148 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7149 		put_mount(mount);
7150 	}
7151 }
7152 
7153 
7154 static status_t
7155 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7156 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7157 {
7158 	struct fs_mount* mount = descriptor->u.mount;
7159 
7160 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7161 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7162 			bufferSize, _count);
7163 	}
7164 
7165 	return B_UNSUPPORTED;
7166 }
7167 
7168 
7169 static status_t
7170 index_dir_rewind(struct file_descriptor* descriptor)
7171 {
7172 	struct fs_mount* mount = descriptor->u.mount;
7173 
7174 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7175 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7176 
7177 	return B_UNSUPPORTED;
7178 }
7179 
7180 
7181 static status_t
7182 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7183 	bool kernel)
7184 {
7185 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7186 		mountID, name, kernel));
7187 
7188 	struct fs_mount* mount;
7189 	status_t status = get_mount(mountID, &mount);
7190 	if (status != B_OK)
7191 		return status;
7192 
7193 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7194 		status = B_READ_ONLY_DEVICE;
7195 		goto out;
7196 	}
7197 
7198 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7199 
7200 out:
7201 	put_mount(mount);
7202 	return status;
7203 }
7204 
7205 
7206 #if 0
7207 static status_t
7208 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7209 {
7210 	struct vnode* vnode = descriptor->u.vnode;
7211 
7212 	// ToDo: currently unused!
7213 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7214 	if (!HAS_FS_CALL(vnode, read_index_stat))
7215 		return B_UNSUPPORTED;
7216 
7217 	return B_UNSUPPORTED;
7218 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7219 }
7220 
7221 
7222 static void
7223 index_free_fd(struct file_descriptor* descriptor)
7224 {
7225 	struct vnode* vnode = descriptor->u.vnode;
7226 
7227 	if (vnode != NULL) {
7228 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7229 		put_vnode(vnode);
7230 	}
7231 }
7232 #endif
7233 
7234 
7235 static status_t
7236 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7237 	bool kernel)
7238 {
7239 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7240 		mountID, name, kernel));
7241 
7242 	struct fs_mount* mount;
7243 	status_t status = get_mount(mountID, &mount);
7244 	if (status != B_OK)
7245 		return status;
7246 
7247 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7248 		status = B_UNSUPPORTED;
7249 		goto out;
7250 	}
7251 
7252 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7253 
7254 out:
7255 	put_mount(mount);
7256 	return status;
7257 }
7258 
7259 
7260 static status_t
7261 index_remove(dev_t mountID, const char* name, bool kernel)
7262 {
7263 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7264 		mountID, name, kernel));
7265 
7266 	struct fs_mount* mount;
7267 	status_t status = get_mount(mountID, &mount);
7268 	if (status != B_OK)
7269 		return status;
7270 
7271 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7272 		status = B_READ_ONLY_DEVICE;
7273 		goto out;
7274 	}
7275 
7276 	status = FS_MOUNT_CALL(mount, remove_index, name);
7277 
7278 out:
7279 	put_mount(mount);
7280 	return status;
7281 }
7282 
7283 
7284 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7285 		It would be nice if the FS would find some more kernel support
7286 		for them.
7287 		For example, query parsing should be moved into the kernel.
7288 */
7289 static int
7290 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7291 	int32 token, bool kernel)
7292 {
7293 	struct fs_mount* mount;
7294 	void* cookie;
7295 
7296 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7297 		device, query, kernel));
7298 
7299 	status_t status = get_mount(device, &mount);
7300 	if (status != B_OK)
7301 		return status;
7302 
7303 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7304 		status = B_UNSUPPORTED;
7305 		goto error;
7306 	}
7307 
7308 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7309 		&cookie);
7310 	if (status != B_OK)
7311 		goto error;
7312 
7313 	// get fd for the index directory
7314 	int fd;
7315 	fd = get_new_fd(&sQueryOps, mount, NULL, cookie, O_CLOEXEC, kernel);
7316 	if (fd >= 0)
7317 		return fd;
7318 
7319 	status = fd;
7320 
7321 	// something went wrong
7322 	FS_MOUNT_CALL(mount, close_query, cookie);
7323 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7324 
7325 error:
7326 	put_mount(mount);
7327 	return status;
7328 }
7329 
7330 
7331 static status_t
7332 query_close(struct file_descriptor* descriptor)
7333 {
7334 	struct fs_mount* mount = descriptor->u.mount;
7335 
7336 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7337 
7338 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7339 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7340 
7341 	return B_OK;
7342 }
7343 
7344 
7345 static void
7346 query_free_fd(struct file_descriptor* descriptor)
7347 {
7348 	struct fs_mount* mount = descriptor->u.mount;
7349 
7350 	if (mount != NULL) {
7351 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7352 		put_mount(mount);
7353 	}
7354 }
7355 
7356 
7357 static status_t
7358 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7359 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7360 {
7361 	struct fs_mount* mount = descriptor->u.mount;
7362 
7363 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7364 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7365 			bufferSize, _count);
7366 	}
7367 
7368 	return B_UNSUPPORTED;
7369 }
7370 
7371 
7372 static status_t
7373 query_rewind(struct file_descriptor* descriptor)
7374 {
7375 	struct fs_mount* mount = descriptor->u.mount;
7376 
7377 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7378 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7379 
7380 	return B_UNSUPPORTED;
7381 }
7382 
7383 
7384 //	#pragma mark - General File System functions
7385 
7386 
7387 static dev_t
7388 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7389 	const char* args, bool kernel)
7390 {
7391 	struct ::fs_mount* mount;
7392 	status_t status = B_OK;
7393 	fs_volume* volume = NULL;
7394 	int32 layer = 0;
7395 	Vnode* coveredNode = NULL;
7396 
7397 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7398 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7399 
7400 	// The path is always safe, we just have to make sure that fsName is
7401 	// almost valid - we can't make any assumptions about args, though.
7402 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7403 	// We'll get it from the DDM later.
7404 	if (fsName == NULL) {
7405 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7406 			return B_BAD_VALUE;
7407 	} else if (fsName[0] == '\0')
7408 		return B_BAD_VALUE;
7409 
7410 	RecursiveLocker mountOpLocker(sMountOpLock);
7411 
7412 	// Helper to delete a newly created file device on failure.
7413 	// Not exactly beautiful, but helps to keep the code below cleaner.
7414 	struct FileDeviceDeleter {
7415 		FileDeviceDeleter() : id(-1) {}
7416 		~FileDeviceDeleter()
7417 		{
7418 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7419 		}
7420 
7421 		partition_id id;
7422 	} fileDeviceDeleter;
7423 
7424 	// If the file system is not a "virtual" one, the device argument should
7425 	// point to a real file/device (if given at all).
7426 	// get the partition
7427 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7428 	KPartition* partition = NULL;
7429 	KPath normalizedDevice;
7430 	bool newlyCreatedFileDevice = false;
7431 
7432 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7433 		// normalize the device path
7434 		status = normalizedDevice.SetTo(device, true);
7435 		if (status != B_OK)
7436 			return status;
7437 
7438 		// get a corresponding partition from the DDM
7439 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7440 		if (partition == NULL) {
7441 			// Partition not found: This either means, the user supplied
7442 			// an invalid path, or the path refers to an image file. We try
7443 			// to let the DDM create a file device for the path.
7444 			partition_id deviceID = ddm->CreateFileDevice(
7445 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7446 			if (deviceID >= 0) {
7447 				partition = ddm->RegisterPartition(deviceID);
7448 				if (newlyCreatedFileDevice)
7449 					fileDeviceDeleter.id = deviceID;
7450 			}
7451 		}
7452 
7453 		if (!partition) {
7454 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7455 				normalizedDevice.Path()));
7456 			return B_ENTRY_NOT_FOUND;
7457 		}
7458 
7459 		device = normalizedDevice.Path();
7460 			// correct path to file device
7461 	}
7462 	PartitionRegistrar partitionRegistrar(partition, true);
7463 
7464 	// Write lock the partition's device. For the time being, we keep the lock
7465 	// until we're done mounting -- not nice, but ensure, that no-one is
7466 	// interfering.
7467 	// TODO: Just mark the partition busy while mounting!
7468 	KDiskDevice* diskDevice = NULL;
7469 	if (partition) {
7470 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7471 		if (!diskDevice) {
7472 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7473 			return B_ERROR;
7474 		}
7475 	}
7476 
7477 	DeviceWriteLocker writeLocker(diskDevice, true);
7478 		// this takes over the write lock acquired before
7479 
7480 	if (partition != NULL) {
7481 		// make sure, that the partition is not busy
7482 		if (partition->IsBusy()) {
7483 			TRACE(("fs_mount(): Partition is busy.\n"));
7484 			return B_BUSY;
7485 		}
7486 
7487 		// if no FS name had been supplied, we get it from the partition
7488 		if (fsName == NULL) {
7489 			KDiskSystem* diskSystem = partition->DiskSystem();
7490 			if (!diskSystem) {
7491 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7492 					"recognize it.\n"));
7493 				return B_BAD_VALUE;
7494 			}
7495 
7496 			if (!diskSystem->IsFileSystem()) {
7497 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7498 					"partitioning system.\n"));
7499 				return B_BAD_VALUE;
7500 			}
7501 
7502 			// The disk system name will not change, and the KDiskSystem
7503 			// object will not go away while the disk device is locked (and
7504 			// the partition has a reference to it), so this is safe.
7505 			fsName = diskSystem->Name();
7506 		}
7507 	}
7508 
7509 	mount = new(std::nothrow) (struct ::fs_mount);
7510 	if (mount == NULL)
7511 		return B_NO_MEMORY;
7512 
7513 	mount->device_name = strdup(device);
7514 		// "device" can be NULL
7515 
7516 	status = mount->entry_cache.Init();
7517 	if (status != B_OK)
7518 		goto err1;
7519 
7520 	// initialize structure
7521 	mount->id = sNextMountID++;
7522 	mount->partition = NULL;
7523 	mount->root_vnode = NULL;
7524 	mount->covers_vnode = NULL;
7525 	mount->unmounting = false;
7526 	mount->owns_file_device = false;
7527 	mount->volume = NULL;
7528 
7529 	// build up the volume(s)
7530 	while (true) {
7531 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7532 		if (layerFSName == NULL) {
7533 			if (layer == 0) {
7534 				status = B_NO_MEMORY;
7535 				goto err1;
7536 			}
7537 
7538 			break;
7539 		}
7540 		MemoryDeleter layerFSNameDeleter(layerFSName);
7541 
7542 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7543 		if (volume == NULL) {
7544 			status = B_NO_MEMORY;
7545 			goto err1;
7546 		}
7547 
7548 		volume->id = mount->id;
7549 		volume->partition = partition != NULL ? partition->ID() : -1;
7550 		volume->layer = layer++;
7551 		volume->private_volume = NULL;
7552 		volume->ops = NULL;
7553 		volume->sub_volume = NULL;
7554 		volume->super_volume = NULL;
7555 		volume->file_system = NULL;
7556 		volume->file_system_name = NULL;
7557 
7558 		volume->file_system_name = get_file_system_name(layerFSName);
7559 		if (volume->file_system_name == NULL) {
7560 			status = B_NO_MEMORY;
7561 			free(volume);
7562 			goto err1;
7563 		}
7564 
7565 		volume->file_system = get_file_system(layerFSName);
7566 		if (volume->file_system == NULL) {
7567 			status = B_DEVICE_NOT_FOUND;
7568 			free(volume->file_system_name);
7569 			free(volume);
7570 			goto err1;
7571 		}
7572 
7573 		if (mount->volume == NULL)
7574 			mount->volume = volume;
7575 		else {
7576 			volume->super_volume = mount->volume;
7577 			mount->volume->sub_volume = volume;
7578 			mount->volume = volume;
7579 		}
7580 	}
7581 
7582 	// insert mount struct into list before we call FS's mount() function
7583 	// so that vnodes can be created for this mount
7584 	rw_lock_write_lock(&sMountLock);
7585 	sMountsTable->Insert(mount);
7586 	rw_lock_write_unlock(&sMountLock);
7587 
7588 	ino_t rootID;
7589 
7590 	if (!sRoot) {
7591 		// we haven't mounted anything yet
7592 		if (strcmp(path, "/") != 0) {
7593 			status = B_ERROR;
7594 			goto err2;
7595 		}
7596 
7597 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7598 			args, &rootID);
7599 		if (status != B_OK || mount->volume->ops == NULL)
7600 			goto err2;
7601 	} else {
7602 		{
7603 			VnodePutter temp;
7604 			status = path_to_vnode(path, true, temp, NULL, kernel);
7605 			coveredNode = temp.Detach();
7606 		}
7607 		if (status != B_OK)
7608 			goto err2;
7609 
7610 		mount->covers_vnode = coveredNode;
7611 
7612 		// make sure covered_vnode is a directory
7613 		if (!S_ISDIR(coveredNode->Type())) {
7614 			status = B_NOT_A_DIRECTORY;
7615 			goto err3;
7616 		}
7617 
7618 		if (coveredNode->IsCovered()) {
7619 			// this is already a covered vnode
7620 			status = B_BUSY;
7621 			goto err3;
7622 		}
7623 
7624 		// mount it/them
7625 		fs_volume* volume = mount->volume;
7626 		while (volume) {
7627 			status = volume->file_system->mount(volume, device, flags, args,
7628 				&rootID);
7629 			if (status != B_OK || volume->ops == NULL) {
7630 				if (status == B_OK && volume->ops == NULL)
7631 					panic("fs_mount: mount() succeeded but ops is NULL!");
7632 				if (volume->sub_volume)
7633 					goto err4;
7634 				goto err3;
7635 			}
7636 
7637 			volume = volume->super_volume;
7638 		}
7639 
7640 		volume = mount->volume;
7641 		while (volume) {
7642 			if (volume->ops->all_layers_mounted != NULL)
7643 				volume->ops->all_layers_mounted(volume);
7644 			volume = volume->super_volume;
7645 		}
7646 	}
7647 
7648 	// the root node is supposed to be owned by the file system - it must
7649 	// exist at this point
7650 	rw_lock_write_lock(&sVnodeLock);
7651 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7652 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7653 		panic("fs_mount: file system does not own its root node!\n");
7654 		status = B_ERROR;
7655 		rw_lock_write_unlock(&sVnodeLock);
7656 		goto err4;
7657 	}
7658 
7659 	// set up the links between the root vnode and the vnode it covers
7660 	if (coveredNode != NULL) {
7661 		if (coveredNode->IsCovered()) {
7662 			// the vnode is covered now
7663 			status = B_BUSY;
7664 			rw_lock_write_unlock(&sVnodeLock);
7665 			goto err4;
7666 		}
7667 
7668 		mount->root_vnode->covers = coveredNode;
7669 		mount->root_vnode->SetCovering(true);
7670 
7671 		coveredNode->covered_by = mount->root_vnode;
7672 		coveredNode->SetCovered(true);
7673 	}
7674 	rw_lock_write_unlock(&sVnodeLock);
7675 
7676 	if (!sRoot) {
7677 		sRoot = mount->root_vnode;
7678 		mutex_lock(&sIOContextRootLock);
7679 		get_current_io_context(true)->root = sRoot;
7680 		mutex_unlock(&sIOContextRootLock);
7681 		inc_vnode_ref_count(sRoot);
7682 	}
7683 
7684 	// supply the partition (if any) with the mount cookie and mark it mounted
7685 	if (partition) {
7686 		partition->SetMountCookie(mount->volume->private_volume);
7687 		partition->SetVolumeID(mount->id);
7688 
7689 		// keep a partition reference as long as the partition is mounted
7690 		partitionRegistrar.Detach();
7691 		mount->partition = partition;
7692 		mount->owns_file_device = newlyCreatedFileDevice;
7693 		fileDeviceDeleter.id = -1;
7694 	}
7695 
7696 	notify_mount(mount->id,
7697 		coveredNode != NULL ? coveredNode->device : -1,
7698 		coveredNode ? coveredNode->id : -1);
7699 
7700 	return mount->id;
7701 
7702 err4:
7703 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7704 err3:
7705 	if (coveredNode != NULL)
7706 		put_vnode(coveredNode);
7707 err2:
7708 	rw_lock_write_lock(&sMountLock);
7709 	sMountsTable->Remove(mount);
7710 	rw_lock_write_unlock(&sMountLock);
7711 err1:
7712 	delete mount;
7713 
7714 	return status;
7715 }
7716 
7717 
7718 static status_t
7719 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7720 {
7721 	struct fs_mount* mount;
7722 	status_t err;
7723 
7724 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7725 		mountID, kernel));
7726 
7727 	VnodePutter pathVnode;
7728 	if (path != NULL) {
7729 		err = path_to_vnode(path, true, pathVnode, NULL, kernel);
7730 		if (err != B_OK)
7731 			return B_ENTRY_NOT_FOUND;
7732 	}
7733 
7734 	RecursiveLocker mountOpLocker(sMountOpLock);
7735 	ReadLocker mountLocker(sMountLock);
7736 
7737 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7738 	if (mount == NULL) {
7739 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7740 			pathVnode.Get());
7741 	}
7742 
7743 	mountLocker.Unlock();
7744 
7745 	if (path != NULL) {
7746 		if (mount->root_vnode != pathVnode.Get()) {
7747 			// not mountpoint
7748 			return B_BAD_VALUE;
7749 		}
7750 
7751 		pathVnode.Unset();
7752 	}
7753 
7754 	// if the volume is associated with a partition, lock the device of the
7755 	// partition as long as we are unmounting
7756 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7757 	KPartition* partition = mount->partition;
7758 	KDiskDevice* diskDevice = NULL;
7759 	if (partition != NULL) {
7760 		if (partition->Device() == NULL) {
7761 			dprintf("fs_unmount(): There is no device!\n");
7762 			return B_ERROR;
7763 		}
7764 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7765 		if (!diskDevice) {
7766 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7767 			return B_ERROR;
7768 		}
7769 	}
7770 	DeviceWriteLocker writeLocker(diskDevice, true);
7771 
7772 	// make sure, that the partition is not busy
7773 	if (partition != NULL) {
7774 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7775 			dprintf("fs_unmount(): Partition is busy.\n");
7776 			return B_BUSY;
7777 		}
7778 	}
7779 
7780 	// grab the vnode master mutex to keep someone from creating
7781 	// a vnode while we're figuring out if we can continue
7782 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7783 
7784 	bool disconnectedDescriptors = false;
7785 
7786 	while (true) {
7787 		bool busy = false;
7788 
7789 		// cycle through the list of vnodes associated with this mount and
7790 		// make sure all of them are not busy or have refs on them
7791 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7792 		while (struct vnode* vnode = iterator.Next()) {
7793 			if (vnode->IsBusy()) {
7794 				dprintf("fs_unmount(): inode %" B_PRIdINO " is busy\n", vnode->id);
7795 				busy = true;
7796 				break;
7797 			}
7798 
7799 			// check the vnode's ref count -- subtract additional references for
7800 			// covering
7801 			int32 refCount = vnode->ref_count;
7802 			if (vnode->covers != NULL)
7803 				refCount--;
7804 			if (vnode->covered_by != NULL)
7805 				refCount--;
7806 
7807 			if (refCount != 0) {
7808 				dprintf("fs_unmount(): inode %" B_PRIdINO " is still referenced\n", vnode->id);
7809 				// there are still vnodes in use on this mount, so we cannot
7810 				// unmount yet
7811 				busy = true;
7812 				break;
7813 			}
7814 		}
7815 
7816 		if (!busy)
7817 			break;
7818 
7819 		if ((flags & B_FORCE_UNMOUNT) == 0)
7820 			return B_BUSY;
7821 
7822 		if (disconnectedDescriptors) {
7823 			// wait a bit until the last access is finished, and then try again
7824 			vnodesWriteLocker.Unlock();
7825 			snooze(100000);
7826 			// TODO: if there is some kind of bug that prevents the ref counts
7827 			// from getting back to zero, this will fall into an endless loop...
7828 			vnodesWriteLocker.Lock();
7829 			continue;
7830 		}
7831 
7832 		// the file system is still busy - but we're forced to unmount it,
7833 		// so let's disconnect all open file descriptors
7834 
7835 		mount->unmounting = true;
7836 			// prevent new vnodes from being created
7837 
7838 		vnodesWriteLocker.Unlock();
7839 
7840 		disconnect_mount_or_vnode_fds(mount, NULL);
7841 		disconnectedDescriptors = true;
7842 
7843 		vnodesWriteLocker.Lock();
7844 	}
7845 
7846 	// We can safely continue. Mark all of the vnodes busy and this mount
7847 	// structure in unmounting state. Also undo the vnode covers/covered_by
7848 	// links.
7849 	mount->unmounting = true;
7850 
7851 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7852 	while (struct vnode* vnode = iterator.Next()) {
7853 		// Remove all covers/covered_by links from other mounts' nodes to this
7854 		// vnode and adjust the node ref count accordingly. We will release the
7855 		// references to the external vnodes below.
7856 		if (Vnode* coveredNode = vnode->covers) {
7857 			if (Vnode* coveringNode = vnode->covered_by) {
7858 				// We have both covered and covering vnodes, so just remove us
7859 				// from the chain.
7860 				coveredNode->covered_by = coveringNode;
7861 				coveringNode->covers = coveredNode;
7862 				vnode->ref_count -= 2;
7863 
7864 				vnode->covered_by = NULL;
7865 				vnode->covers = NULL;
7866 				vnode->SetCovering(false);
7867 				vnode->SetCovered(false);
7868 			} else {
7869 				// We only have a covered vnode. Remove its link to us.
7870 				coveredNode->covered_by = NULL;
7871 				coveredNode->SetCovered(false);
7872 				vnode->ref_count--;
7873 
7874 				// If the other node is an external vnode, we keep its link
7875 				// link around so we can put the reference later on. Otherwise
7876 				// we get rid of it right now.
7877 				if (coveredNode->mount == mount) {
7878 					vnode->covers = NULL;
7879 					coveredNode->ref_count--;
7880 				}
7881 			}
7882 		} else if (Vnode* coveringNode = vnode->covered_by) {
7883 			// We only have a covering vnode. Remove its link to us.
7884 			coveringNode->covers = NULL;
7885 			coveringNode->SetCovering(false);
7886 			vnode->ref_count--;
7887 
7888 			// If the other node is an external vnode, we keep its link
7889 			// link around so we can put the reference later on. Otherwise
7890 			// we get rid of it right now.
7891 			if (coveringNode->mount == mount) {
7892 				vnode->covered_by = NULL;
7893 				coveringNode->ref_count--;
7894 			}
7895 		}
7896 
7897 		vnode->SetBusy(true);
7898 		vnode_to_be_freed(vnode);
7899 	}
7900 
7901 	vnodesWriteLocker.Unlock();
7902 
7903 	// Free all vnodes associated with this mount.
7904 	// They will be removed from the mount list by free_vnode(), so
7905 	// we don't have to do this.
7906 	while (struct vnode* vnode = mount->vnodes.Head()) {
7907 		// Put the references to external covered/covering vnodes we kept above.
7908 		if (Vnode* coveredNode = vnode->covers)
7909 			put_vnode(coveredNode);
7910 		if (Vnode* coveringNode = vnode->covered_by)
7911 			put_vnode(coveringNode);
7912 
7913 		free_vnode(vnode, false);
7914 	}
7915 
7916 	// remove the mount structure from the hash table
7917 	rw_lock_write_lock(&sMountLock);
7918 	sMountsTable->Remove(mount);
7919 	rw_lock_write_unlock(&sMountLock);
7920 
7921 	mountOpLocker.Unlock();
7922 
7923 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7924 	notify_unmount(mount->id);
7925 
7926 	// dereference the partition and mark it unmounted
7927 	if (partition) {
7928 		partition->SetVolumeID(-1);
7929 		partition->SetMountCookie(NULL);
7930 
7931 		if (mount->owns_file_device)
7932 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7933 		partition->Unregister();
7934 	}
7935 
7936 	delete mount;
7937 	return B_OK;
7938 }
7939 
7940 
7941 static status_t
7942 fs_sync(dev_t device)
7943 {
7944 	struct fs_mount* mount;
7945 	status_t status = get_mount(device, &mount);
7946 	if (status != B_OK)
7947 		return status;
7948 
7949 	struct vnode marker;
7950 	memset(&marker, 0, sizeof(marker));
7951 	marker.SetBusy(true);
7952 	marker.SetRemoved(true);
7953 
7954 	// First, synchronize all file caches
7955 
7956 	while (true) {
7957 		WriteLocker locker(sVnodeLock);
7958 			// Note: That's the easy way. Which is probably OK for sync(),
7959 			// since it's a relatively rare call and doesn't need to allow for
7960 			// a lot of concurrency. Using a read lock would be possible, but
7961 			// also more involved, since we had to lock the individual nodes
7962 			// and take care of the locking order, which we might not want to
7963 			// do while holding fs_mount::lock.
7964 
7965 		// synchronize access to vnode list
7966 		mutex_lock(&mount->lock);
7967 
7968 		struct vnode* vnode;
7969 		if (!marker.IsRemoved()) {
7970 			vnode = mount->vnodes.GetNext(&marker);
7971 			mount->vnodes.Remove(&marker);
7972 			marker.SetRemoved(true);
7973 		} else
7974 			vnode = mount->vnodes.First();
7975 
7976 		while (vnode != NULL && (vnode->cache == NULL
7977 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7978 			// TODO: we could track writes (and writable mapped vnodes)
7979 			//	and have a simple flag that we could test for here
7980 			vnode = mount->vnodes.GetNext(vnode);
7981 		}
7982 
7983 		if (vnode != NULL) {
7984 			// insert marker vnode again
7985 			mount->vnodes.InsertBefore(mount->vnodes.GetNext(vnode), &marker);
7986 			marker.SetRemoved(false);
7987 		}
7988 
7989 		mutex_unlock(&mount->lock);
7990 
7991 		if (vnode == NULL)
7992 			break;
7993 
7994 		vnode = lookup_vnode(mount->id, vnode->id);
7995 		if (vnode == NULL || vnode->IsBusy())
7996 			continue;
7997 
7998 		if (vnode->ref_count == 0) {
7999 			// this vnode has been unused before
8000 			vnode_used(vnode);
8001 		}
8002 		inc_vnode_ref_count(vnode);
8003 
8004 		locker.Unlock();
8005 
8006 		if (vnode->cache != NULL && !vnode->IsRemoved())
8007 			vnode->cache->WriteModified();
8008 
8009 		put_vnode(vnode);
8010 	}
8011 
8012 	// Let the file systems do their synchronizing work
8013 	if (HAS_FS_MOUNT_CALL(mount, sync))
8014 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8015 
8016 	// Finally, flush the underlying device's write cache (if possible.)
8017 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8018 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8019 
8020 	put_mount(mount);
8021 	return status;
8022 }
8023 
8024 
8025 static status_t
8026 fs_read_info(dev_t device, struct fs_info* info)
8027 {
8028 	struct fs_mount* mount;
8029 	status_t status = get_mount(device, &mount);
8030 	if (status != B_OK)
8031 		return status;
8032 
8033 	memset(info, 0, sizeof(struct fs_info));
8034 
8035 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8036 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8037 
8038 	// fill in info the file system doesn't (have to) know about
8039 	if (status == B_OK) {
8040 		info->dev = mount->id;
8041 		info->root = mount->root_vnode->id;
8042 
8043 		fs_volume* volume = mount->volume;
8044 		while (volume->super_volume != NULL)
8045 			volume = volume->super_volume;
8046 
8047 		strlcpy(info->fsh_name, volume->file_system_name,
8048 			sizeof(info->fsh_name));
8049 		if (mount->device_name != NULL) {
8050 			strlcpy(info->device_name, mount->device_name,
8051 				sizeof(info->device_name));
8052 		}
8053 	}
8054 
8055 	// if the call is not supported by the file system, there are still
8056 	// the parts that we filled out ourselves
8057 
8058 	put_mount(mount);
8059 	return status;
8060 }
8061 
8062 
8063 static status_t
8064 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8065 {
8066 	struct fs_mount* mount;
8067 	status_t status = get_mount(device, &mount);
8068 	if (status != B_OK)
8069 		return status;
8070 
8071 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8072 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8073 	else
8074 		status = B_READ_ONLY_DEVICE;
8075 
8076 	put_mount(mount);
8077 	return status;
8078 }
8079 
8080 
8081 static dev_t
8082 fs_next_device(int32* _cookie)
8083 {
8084 	struct fs_mount* mount = NULL;
8085 	dev_t device = *_cookie;
8086 
8087 	rw_lock_read_lock(&sMountLock);
8088 
8089 	// Since device IDs are assigned sequentially, this algorithm
8090 	// does work good enough. It makes sure that the device list
8091 	// returned is sorted, and that no device is skipped when an
8092 	// already visited device got unmounted.
8093 
8094 	while (device < sNextMountID) {
8095 		mount = find_mount(device++);
8096 		if (mount != NULL && mount->volume->private_volume != NULL)
8097 			break;
8098 	}
8099 
8100 	*_cookie = device;
8101 
8102 	if (mount != NULL)
8103 		device = mount->id;
8104 	else
8105 		device = B_BAD_VALUE;
8106 
8107 	rw_lock_read_unlock(&sMountLock);
8108 
8109 	return device;
8110 }
8111 
8112 
8113 ssize_t
8114 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8115 	void *buffer, size_t readBytes)
8116 {
8117 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8118 	if (attrFD < 0)
8119 		return attrFD;
8120 
8121 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8122 
8123 	_kern_close(attrFD);
8124 
8125 	return bytesRead;
8126 }
8127 
8128 
8129 static status_t
8130 get_cwd(char* buffer, size_t size, bool kernel)
8131 {
8132 	// Get current working directory from io context
8133 	struct io_context* context = get_current_io_context(kernel);
8134 	status_t status;
8135 
8136 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8137 
8138 	mutex_lock(&context->io_mutex);
8139 
8140 	struct vnode* vnode = context->cwd;
8141 	if (vnode)
8142 		inc_vnode_ref_count(vnode);
8143 
8144 	mutex_unlock(&context->io_mutex);
8145 
8146 	if (vnode) {
8147 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8148 		put_vnode(vnode);
8149 	} else
8150 		status = B_ERROR;
8151 
8152 	return status;
8153 }
8154 
8155 
8156 static status_t
8157 set_cwd(int fd, char* path, bool kernel)
8158 {
8159 	struct io_context* context;
8160 	struct vnode* oldDirectory;
8161 
8162 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8163 
8164 	// Get vnode for passed path, and bail if it failed
8165 	VnodePutter vnode;
8166 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
8167 	if (status < 0)
8168 		return status;
8169 
8170 	if (!S_ISDIR(vnode->Type())) {
8171 		// nope, can't cwd to here
8172 		return B_NOT_A_DIRECTORY;
8173 	}
8174 
8175 	// We need to have the permission to enter the directory, too
8176 	if (HAS_FS_CALL(vnode, access)) {
8177 		status = FS_CALL(vnode.Get(), access, X_OK);
8178 		if (status != B_OK)
8179 			return status;
8180 	}
8181 
8182 	// Get current io context and lock
8183 	context = get_current_io_context(kernel);
8184 	mutex_lock(&context->io_mutex);
8185 
8186 	// save the old current working directory first
8187 	oldDirectory = context->cwd;
8188 	context->cwd = vnode.Detach();
8189 
8190 	mutex_unlock(&context->io_mutex);
8191 
8192 	if (oldDirectory)
8193 		put_vnode(oldDirectory);
8194 
8195 	return B_NO_ERROR;
8196 }
8197 
8198 
8199 static status_t
8200 user_copy_name(char* to, const char* from, size_t length)
8201 {
8202 	ssize_t len = user_strlcpy(to, from, length);
8203 	if (len < 0)
8204 		return len;
8205 	if (len >= (ssize_t)length)
8206 		return B_NAME_TOO_LONG;
8207 	return B_OK;
8208 }
8209 
8210 
8211 //	#pragma mark - kernel mirrored syscalls
8212 
8213 
8214 dev_t
8215 _kern_mount(const char* path, const char* device, const char* fsName,
8216 	uint32 flags, const char* args, size_t argsLength)
8217 {
8218 	KPath pathBuffer(path);
8219 	if (pathBuffer.InitCheck() != B_OK)
8220 		return B_NO_MEMORY;
8221 
8222 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8223 }
8224 
8225 
8226 status_t
8227 _kern_unmount(const char* path, uint32 flags)
8228 {
8229 	KPath pathBuffer(path);
8230 	if (pathBuffer.InitCheck() != B_OK)
8231 		return B_NO_MEMORY;
8232 
8233 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8234 }
8235 
8236 
8237 status_t
8238 _kern_read_fs_info(dev_t device, struct fs_info* info)
8239 {
8240 	if (info == NULL)
8241 		return B_BAD_VALUE;
8242 
8243 	return fs_read_info(device, info);
8244 }
8245 
8246 
8247 status_t
8248 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8249 {
8250 	if (info == NULL)
8251 		return B_BAD_VALUE;
8252 
8253 	return fs_write_info(device, info, mask);
8254 }
8255 
8256 
8257 status_t
8258 _kern_sync(void)
8259 {
8260 	// Note: _kern_sync() is also called from _user_sync()
8261 	int32 cookie = 0;
8262 	dev_t device;
8263 	while ((device = next_dev(&cookie)) >= 0) {
8264 		status_t status = fs_sync(device);
8265 		if (status != B_OK && status != B_BAD_VALUE) {
8266 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8267 				strerror(status));
8268 		}
8269 	}
8270 
8271 	return B_OK;
8272 }
8273 
8274 
8275 dev_t
8276 _kern_next_device(int32* _cookie)
8277 {
8278 	return fs_next_device(_cookie);
8279 }
8280 
8281 
8282 status_t
8283 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8284 	size_t infoSize)
8285 {
8286 	if (infoSize != sizeof(fd_info))
8287 		return B_BAD_VALUE;
8288 
8289 	// get the team
8290 	Team* team = Team::Get(teamID);
8291 	if (team == NULL)
8292 		return B_BAD_TEAM_ID;
8293 	BReference<Team> teamReference(team, true);
8294 
8295 	// now that we have a team reference, its I/O context won't go away
8296 	io_context* context = team->io_context;
8297 	MutexLocker contextLocker(context->io_mutex);
8298 
8299 	uint32 slot = *_cookie;
8300 
8301 	struct file_descriptor* descriptor;
8302 	while (slot < context->table_size
8303 		&& (descriptor = context->fds[slot]) == NULL) {
8304 		slot++;
8305 	}
8306 
8307 	if (slot >= context->table_size)
8308 		return B_ENTRY_NOT_FOUND;
8309 
8310 	info->number = slot;
8311 	info->open_mode = descriptor->open_mode;
8312 
8313 	struct vnode* vnode = fd_vnode(descriptor);
8314 	if (vnode != NULL) {
8315 		info->device = vnode->device;
8316 		info->node = vnode->id;
8317 	} else if (descriptor->u.mount != NULL) {
8318 		info->device = descriptor->u.mount->id;
8319 		info->node = -1;
8320 	}
8321 
8322 	*_cookie = slot + 1;
8323 	return B_OK;
8324 }
8325 
8326 
8327 int
8328 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8329 	int perms)
8330 {
8331 	if ((openMode & O_CREAT) != 0) {
8332 		return file_create_entry_ref(device, inode, name, openMode, perms,
8333 			true);
8334 	}
8335 
8336 	return file_open_entry_ref(device, inode, name, openMode, true);
8337 }
8338 
8339 
8340 /*!	\brief Opens a node specified by a FD + path pair.
8341 
8342 	At least one of \a fd and \a path must be specified.
8343 	If only \a fd is given, the function opens the node identified by this
8344 	FD. If only a path is given, this path is opened. If both are given and
8345 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8346 	of the directory (!) identified by \a fd.
8347 
8348 	\param fd The FD. May be < 0.
8349 	\param path The absolute or relative path. May be \c NULL.
8350 	\param openMode The open mode.
8351 	\return A FD referring to the newly opened node, or an error code,
8352 			if an error occurs.
8353 */
8354 int
8355 _kern_open(int fd, const char* path, int openMode, int perms)
8356 {
8357 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8358 	if (pathBuffer.InitCheck() != B_OK)
8359 		return B_NO_MEMORY;
8360 
8361 	if ((openMode & O_CREAT) != 0)
8362 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8363 
8364 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8365 }
8366 
8367 
8368 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8369 
8370 	The supplied name may be \c NULL, in which case directory identified
8371 	by \a device and \a inode will be opened. Otherwise \a device and
8372 	\a inode identify the parent directory of the directory to be opened
8373 	and \a name its entry name.
8374 
8375 	\param device If \a name is specified the ID of the device the parent
8376 		   directory of the directory to be opened resides on, otherwise
8377 		   the device of the directory itself.
8378 	\param inode If \a name is specified the node ID of the parent
8379 		   directory of the directory to be opened, otherwise node ID of the
8380 		   directory itself.
8381 	\param name The entry name of the directory to be opened. If \c NULL,
8382 		   the \a device + \a inode pair identify the node to be opened.
8383 	\return The FD of the newly opened directory or an error code, if
8384 			something went wrong.
8385 */
8386 int
8387 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8388 {
8389 	return dir_open_entry_ref(device, inode, name, true);
8390 }
8391 
8392 
8393 /*!	\brief Opens a directory specified by a FD + path pair.
8394 
8395 	At least one of \a fd and \a path must be specified.
8396 	If only \a fd is given, the function opens the directory identified by this
8397 	FD. If only a path is given, this path is opened. If both are given and
8398 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8399 	of the directory (!) identified by \a fd.
8400 
8401 	\param fd The FD. May be < 0.
8402 	\param path The absolute or relative path. May be \c NULL.
8403 	\return A FD referring to the newly opened directory, or an error code,
8404 			if an error occurs.
8405 */
8406 int
8407 _kern_open_dir(int fd, const char* path)
8408 {
8409 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8410 	if (pathBuffer.InitCheck() != B_OK)
8411 		return B_NO_MEMORY;
8412 
8413 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8414 }
8415 
8416 
8417 status_t
8418 _kern_fcntl(int fd, int op, size_t argument)
8419 {
8420 	return common_fcntl(fd, op, argument, true);
8421 }
8422 
8423 
8424 status_t
8425 _kern_fsync(int fd)
8426 {
8427 	return common_sync(fd, true);
8428 }
8429 
8430 
8431 status_t
8432 _kern_lock_node(int fd)
8433 {
8434 	return common_lock_node(fd, true);
8435 }
8436 
8437 
8438 status_t
8439 _kern_unlock_node(int fd)
8440 {
8441 	return common_unlock_node(fd, true);
8442 }
8443 
8444 
8445 status_t
8446 _kern_preallocate(int fd, off_t offset, off_t length)
8447 {
8448 	return common_preallocate(fd, offset, length, true);
8449 }
8450 
8451 
8452 status_t
8453 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8454 	int perms)
8455 {
8456 	return dir_create_entry_ref(device, inode, name, perms, true);
8457 }
8458 
8459 
8460 /*!	\brief Creates a directory specified by a FD + path pair.
8461 
8462 	\a path must always be specified (it contains the name of the new directory
8463 	at least). If only a path is given, this path identifies the location at
8464 	which the directory shall be created. If both \a fd and \a path are given
8465 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8466 	of the directory (!) identified by \a fd.
8467 
8468 	\param fd The FD. May be < 0.
8469 	\param path The absolute or relative path. Must not be \c NULL.
8470 	\param perms The access permissions the new directory shall have.
8471 	\return \c B_OK, if the directory has been created successfully, another
8472 			error code otherwise.
8473 */
8474 status_t
8475 _kern_create_dir(int fd, const char* path, int perms)
8476 {
8477 	KPath pathBuffer(path, KPath::DEFAULT);
8478 	if (pathBuffer.InitCheck() != B_OK)
8479 		return B_NO_MEMORY;
8480 
8481 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8482 }
8483 
8484 
8485 status_t
8486 _kern_remove_dir(int fd, const char* path)
8487 {
8488 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8489 	if (pathBuffer.InitCheck() != B_OK)
8490 		return B_NO_MEMORY;
8491 
8492 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8493 }
8494 
8495 
8496 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8497 
8498 	At least one of \a fd and \a path must be specified.
8499 	If only \a fd is given, the function the symlink to be read is the node
8500 	identified by this FD. If only a path is given, this path identifies the
8501 	symlink to be read. If both are given and the path is absolute, \a fd is
8502 	ignored; a relative path is reckoned off of the directory (!) identified
8503 	by \a fd.
8504 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8505 	will still be updated to reflect the required buffer size.
8506 
8507 	\param fd The FD. May be < 0.
8508 	\param path The absolute or relative path. May be \c NULL.
8509 	\param buffer The buffer into which the contents of the symlink shall be
8510 		   written.
8511 	\param _bufferSize A pointer to the size of the supplied buffer.
8512 	\return The length of the link on success or an appropriate error code
8513 */
8514 status_t
8515 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8516 {
8517 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8518 	if (pathBuffer.InitCheck() != B_OK)
8519 		return B_NO_MEMORY;
8520 
8521 	return common_read_link(fd, pathBuffer.LockBuffer(),
8522 		buffer, _bufferSize, true);
8523 }
8524 
8525 
8526 /*!	\brief Creates a symlink specified by a FD + path pair.
8527 
8528 	\a path must always be specified (it contains the name of the new symlink
8529 	at least). If only a path is given, this path identifies the location at
8530 	which the symlink shall be created. If both \a fd and \a path are given and
8531 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8532 	of the directory (!) identified by \a fd.
8533 
8534 	\param fd The FD. May be < 0.
8535 	\param toPath The absolute or relative path. Must not be \c NULL.
8536 	\param mode The access permissions the new symlink shall have.
8537 	\return \c B_OK, if the symlink has been created successfully, another
8538 			error code otherwise.
8539 */
8540 status_t
8541 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8542 {
8543 	KPath pathBuffer(path);
8544 	if (pathBuffer.InitCheck() != B_OK)
8545 		return B_NO_MEMORY;
8546 
8547 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8548 		toPath, mode, true);
8549 }
8550 
8551 
8552 status_t
8553 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8554 	bool traverseLeafLink)
8555 {
8556 	KPath pathBuffer(path);
8557 	KPath toPathBuffer(toPath);
8558 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8559 		return B_NO_MEMORY;
8560 
8561 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8562 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8563 }
8564 
8565 
8566 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8567 
8568 	\a path must always be specified (it contains at least the name of the entry
8569 	to be deleted). If only a path is given, this path identifies the entry
8570 	directly. If both \a fd and \a path are given and the path is absolute,
8571 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8572 	identified by \a fd.
8573 
8574 	\param fd The FD. May be < 0.
8575 	\param path The absolute or relative path. Must not be \c NULL.
8576 	\return \c B_OK, if the entry has been removed successfully, another
8577 			error code otherwise.
8578 */
8579 status_t
8580 _kern_unlink(int fd, const char* path)
8581 {
8582 	KPath pathBuffer(path);
8583 	if (pathBuffer.InitCheck() != B_OK)
8584 		return B_NO_MEMORY;
8585 
8586 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8587 }
8588 
8589 
8590 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8591 		   by another FD + path pair.
8592 
8593 	\a oldPath and \a newPath must always be specified (they contain at least
8594 	the name of the entry). If only a path is given, this path identifies the
8595 	entry directly. If both a FD and a path are given and the path is absolute,
8596 	the FD is ignored; a relative path is reckoned off of the directory (!)
8597 	identified by the respective FD.
8598 
8599 	\param oldFD The FD of the old location. May be < 0.
8600 	\param oldPath The absolute or relative path of the old location. Must not
8601 		   be \c NULL.
8602 	\param newFD The FD of the new location. May be < 0.
8603 	\param newPath The absolute or relative path of the new location. Must not
8604 		   be \c NULL.
8605 	\return \c B_OK, if the entry has been moved successfully, another
8606 			error code otherwise.
8607 */
8608 status_t
8609 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8610 {
8611 	KPath oldPathBuffer(oldPath);
8612 	KPath newPathBuffer(newPath);
8613 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8614 		return B_NO_MEMORY;
8615 
8616 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8617 		newFD, newPathBuffer.LockBuffer(), true);
8618 }
8619 
8620 
8621 status_t
8622 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8623 {
8624 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8625 	if (pathBuffer.InitCheck() != B_OK)
8626 		return B_NO_MEMORY;
8627 
8628 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8629 		true);
8630 }
8631 
8632 
8633 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8634 
8635 	If only \a fd is given, the stat operation associated with the type
8636 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8637 	given, this path identifies the entry for whose node to retrieve the
8638 	stat data. If both \a fd and \a path are given and the path is absolute,
8639 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8640 	identified by \a fd and specifies the entry whose stat data shall be
8641 	retrieved.
8642 
8643 	\param fd The FD. May be < 0.
8644 	\param path The absolute or relative path. Must not be \c NULL.
8645 	\param traverseLeafLink If \a path is given, \c true specifies that the
8646 		   function shall not stick to symlinks, but traverse them.
8647 	\param stat The buffer the stat data shall be written into.
8648 	\param statSize The size of the supplied stat buffer.
8649 	\return \c B_OK, if the the stat data have been read successfully, another
8650 			error code otherwise.
8651 */
8652 status_t
8653 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8654 	struct stat* stat, size_t statSize)
8655 {
8656 	struct stat completeStat;
8657 	struct stat* originalStat = NULL;
8658 	status_t status;
8659 
8660 	if (statSize > sizeof(struct stat))
8661 		return B_BAD_VALUE;
8662 
8663 	// this supports different stat extensions
8664 	if (statSize < sizeof(struct stat)) {
8665 		originalStat = stat;
8666 		stat = &completeStat;
8667 	}
8668 
8669 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8670 
8671 	if (status == B_OK && originalStat != NULL)
8672 		memcpy(originalStat, stat, statSize);
8673 
8674 	return status;
8675 }
8676 
8677 
8678 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8679 
8680 	If only \a fd is given, the stat operation associated with the type
8681 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8682 	given, this path identifies the entry for whose node to write the
8683 	stat data. If both \a fd and \a path are given and the path is absolute,
8684 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8685 	identified by \a fd and specifies the entry whose stat data shall be
8686 	written.
8687 
8688 	\param fd The FD. May be < 0.
8689 	\param path The absolute or relative path. May be \c NULL.
8690 	\param traverseLeafLink If \a path is given, \c true specifies that the
8691 		   function shall not stick to symlinks, but traverse them.
8692 	\param stat The buffer containing the stat data to be written.
8693 	\param statSize The size of the supplied stat buffer.
8694 	\param statMask A mask specifying which parts of the stat data shall be
8695 		   written.
8696 	\return \c B_OK, if the the stat data have been written successfully,
8697 			another error code otherwise.
8698 */
8699 status_t
8700 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8701 	const struct stat* stat, size_t statSize, int statMask)
8702 {
8703 	struct stat completeStat;
8704 
8705 	if (statSize > sizeof(struct stat))
8706 		return B_BAD_VALUE;
8707 
8708 	// this supports different stat extensions
8709 	if (statSize < sizeof(struct stat)) {
8710 		memset((uint8*)&completeStat + statSize, 0,
8711 			sizeof(struct stat) - statSize);
8712 		memcpy(&completeStat, stat, statSize);
8713 		stat = &completeStat;
8714 	}
8715 
8716 	status_t status;
8717 
8718 	if (path != NULL) {
8719 		// path given: write the stat of the node referred to by (fd, path)
8720 		KPath pathBuffer(path);
8721 		if (pathBuffer.InitCheck() != B_OK)
8722 			return B_NO_MEMORY;
8723 
8724 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8725 			traverseLeafLink, stat, statMask, true);
8726 	} else {
8727 		// no path given: get the FD and use the FD operation
8728 		FileDescriptorPutter descriptor
8729 			(get_fd(get_current_io_context(true), fd));
8730 		if (!descriptor.IsSet())
8731 			return B_FILE_ERROR;
8732 
8733 		if (descriptor->ops->fd_write_stat)
8734 			status = descriptor->ops->fd_write_stat(descriptor.Get(), stat, statMask);
8735 		else
8736 			status = B_UNSUPPORTED;
8737 	}
8738 
8739 	return status;
8740 }
8741 
8742 
8743 int
8744 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8745 {
8746 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8747 	if (pathBuffer.InitCheck() != B_OK)
8748 		return B_NO_MEMORY;
8749 
8750 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8751 }
8752 
8753 
8754 int
8755 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8756 	int openMode)
8757 {
8758 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8759 	if (pathBuffer.InitCheck() != B_OK)
8760 		return B_NO_MEMORY;
8761 
8762 	if ((openMode & O_CREAT) != 0) {
8763 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8764 			true);
8765 	}
8766 
8767 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8768 }
8769 
8770 
8771 status_t
8772 _kern_remove_attr(int fd, const char* name)
8773 {
8774 	return attr_remove(fd, name, true);
8775 }
8776 
8777 
8778 status_t
8779 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8780 	const char* toName)
8781 {
8782 	return attr_rename(fromFile, fromName, toFile, toName, true);
8783 }
8784 
8785 
8786 int
8787 _kern_open_index_dir(dev_t device)
8788 {
8789 	return index_dir_open(device, true);
8790 }
8791 
8792 
8793 status_t
8794 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8795 {
8796 	return index_create(device, name, type, flags, true);
8797 }
8798 
8799 
8800 status_t
8801 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8802 {
8803 	return index_name_read_stat(device, name, stat, true);
8804 }
8805 
8806 
8807 status_t
8808 _kern_remove_index(dev_t device, const char* name)
8809 {
8810 	return index_remove(device, name, true);
8811 }
8812 
8813 
8814 status_t
8815 _kern_getcwd(char* buffer, size_t size)
8816 {
8817 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8818 
8819 	// Call vfs to get current working directory
8820 	return get_cwd(buffer, size, true);
8821 }
8822 
8823 
8824 status_t
8825 _kern_setcwd(int fd, const char* path)
8826 {
8827 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8828 	if (pathBuffer.InitCheck() != B_OK)
8829 		return B_NO_MEMORY;
8830 
8831 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8832 }
8833 
8834 
8835 //	#pragma mark - userland syscalls
8836 
8837 
8838 dev_t
8839 _user_mount(const char* userPath, const char* userDevice,
8840 	const char* userFileSystem, uint32 flags, const char* userArgs,
8841 	size_t argsLength)
8842 {
8843 	char fileSystem[B_FILE_NAME_LENGTH];
8844 	KPath path, device;
8845 	char* args = NULL;
8846 	status_t status;
8847 
8848 	if (!IS_USER_ADDRESS(userPath))
8849 		return B_BAD_ADDRESS;
8850 
8851 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8852 		return B_NO_MEMORY;
8853 
8854 	status = user_copy_name(path.LockBuffer(), userPath,
8855 		B_PATH_NAME_LENGTH);
8856 	if (status != B_OK)
8857 		return status;
8858 	path.UnlockBuffer();
8859 
8860 	if (userFileSystem != NULL) {
8861 		if (!IS_USER_ADDRESS(userFileSystem))
8862 			return B_BAD_ADDRESS;
8863 
8864 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8865 		if (status != B_OK)
8866 			return status;
8867 	}
8868 
8869 	if (userDevice != NULL) {
8870 		if (!IS_USER_ADDRESS(userDevice))
8871 			return B_BAD_ADDRESS;
8872 
8873 		status = user_copy_name(device.LockBuffer(), userDevice,
8874 			B_PATH_NAME_LENGTH);
8875 		if (status != B_OK)
8876 			return status;
8877 		device.UnlockBuffer();
8878 	}
8879 
8880 	if (userArgs != NULL && argsLength > 0) {
8881 		if (!IS_USER_ADDRESS(userArgs))
8882 			return B_BAD_ADDRESS;
8883 
8884 		// this is a safety restriction
8885 		if (argsLength >= 65536)
8886 			return B_NAME_TOO_LONG;
8887 
8888 		args = (char*)malloc(argsLength + 1);
8889 		if (args == NULL)
8890 			return B_NO_MEMORY;
8891 
8892 		status = user_copy_name(args, userArgs, argsLength + 1);
8893 		if (status != B_OK) {
8894 			free(args);
8895 			return status;
8896 		}
8897 	}
8898 
8899 	status = fs_mount(path.LockBuffer(),
8900 		userDevice != NULL ? device.Path() : NULL,
8901 		userFileSystem ? fileSystem : NULL, flags, args, false);
8902 
8903 	free(args);
8904 	return status;
8905 }
8906 
8907 
8908 status_t
8909 _user_unmount(const char* userPath, uint32 flags)
8910 {
8911 	if (!IS_USER_ADDRESS(userPath))
8912 		return B_BAD_ADDRESS;
8913 
8914 	KPath pathBuffer;
8915 	if (pathBuffer.InitCheck() != B_OK)
8916 		return B_NO_MEMORY;
8917 
8918 	char* path = pathBuffer.LockBuffer();
8919 
8920 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8921 	if (status != B_OK)
8922 		return status;
8923 
8924 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8925 }
8926 
8927 
8928 status_t
8929 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8930 {
8931 	struct fs_info info;
8932 	status_t status;
8933 
8934 	if (userInfo == NULL)
8935 		return B_BAD_VALUE;
8936 
8937 	if (!IS_USER_ADDRESS(userInfo))
8938 		return B_BAD_ADDRESS;
8939 
8940 	status = fs_read_info(device, &info);
8941 	if (status != B_OK)
8942 		return status;
8943 
8944 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8945 		return B_BAD_ADDRESS;
8946 
8947 	return B_OK;
8948 }
8949 
8950 
8951 status_t
8952 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8953 {
8954 	struct fs_info info;
8955 
8956 	if (userInfo == NULL)
8957 		return B_BAD_VALUE;
8958 
8959 	if (!IS_USER_ADDRESS(userInfo)
8960 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8961 		return B_BAD_ADDRESS;
8962 
8963 	return fs_write_info(device, &info, mask);
8964 }
8965 
8966 
8967 dev_t
8968 _user_next_device(int32* _userCookie)
8969 {
8970 	int32 cookie;
8971 	dev_t device;
8972 
8973 	if (!IS_USER_ADDRESS(_userCookie)
8974 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8975 		return B_BAD_ADDRESS;
8976 
8977 	device = fs_next_device(&cookie);
8978 
8979 	if (device >= B_OK) {
8980 		// update user cookie
8981 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8982 			return B_BAD_ADDRESS;
8983 	}
8984 
8985 	return device;
8986 }
8987 
8988 
8989 status_t
8990 _user_sync(void)
8991 {
8992 	return _kern_sync();
8993 }
8994 
8995 
8996 status_t
8997 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8998 	size_t infoSize)
8999 {
9000 	struct fd_info info;
9001 	uint32 cookie;
9002 
9003 	// only root can do this
9004 	if (geteuid() != 0)
9005 		return B_NOT_ALLOWED;
9006 
9007 	if (infoSize != sizeof(fd_info))
9008 		return B_BAD_VALUE;
9009 
9010 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9011 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9012 		return B_BAD_ADDRESS;
9013 
9014 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9015 	if (status != B_OK)
9016 		return status;
9017 
9018 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9019 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9020 		return B_BAD_ADDRESS;
9021 
9022 	return status;
9023 }
9024 
9025 
9026 status_t
9027 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9028 	char* userPath, size_t pathLength)
9029 {
9030 	if (!IS_USER_ADDRESS(userPath))
9031 		return B_BAD_ADDRESS;
9032 
9033 	KPath path;
9034 	if (path.InitCheck() != B_OK)
9035 		return B_NO_MEMORY;
9036 
9037 	// copy the leaf name onto the stack
9038 	char stackLeaf[B_FILE_NAME_LENGTH];
9039 	if (leaf != NULL) {
9040 		if (!IS_USER_ADDRESS(leaf))
9041 			return B_BAD_ADDRESS;
9042 
9043 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9044 		if (status != B_OK)
9045 			return status;
9046 
9047 		leaf = stackLeaf;
9048 	}
9049 
9050 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9051 		false, path.LockBuffer(), path.BufferSize());
9052 	if (status != B_OK)
9053 		return status;
9054 
9055 	path.UnlockBuffer();
9056 
9057 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9058 	if (length < 0)
9059 		return length;
9060 	if (length >= (int)pathLength)
9061 		return B_BUFFER_OVERFLOW;
9062 
9063 	return B_OK;
9064 }
9065 
9066 
9067 status_t
9068 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9069 {
9070 	if (userPath == NULL || buffer == NULL)
9071 		return B_BAD_VALUE;
9072 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9073 		return B_BAD_ADDRESS;
9074 
9075 	// copy path from userland
9076 	KPath pathBuffer;
9077 	if (pathBuffer.InitCheck() != B_OK)
9078 		return B_NO_MEMORY;
9079 	char* path = pathBuffer.LockBuffer();
9080 
9081 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9082 	if (status != B_OK)
9083 		return status;
9084 
9085 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9086 		false);
9087 	if (error != B_OK)
9088 		return error;
9089 
9090 	// copy back to userland
9091 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9092 	if (len < 0)
9093 		return len;
9094 	if (len >= B_PATH_NAME_LENGTH)
9095 		return B_BUFFER_OVERFLOW;
9096 
9097 	return B_OK;
9098 }
9099 
9100 
9101 int
9102 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9103 	int openMode, int perms)
9104 {
9105 	char name[B_FILE_NAME_LENGTH];
9106 
9107 	if (userName == NULL || device < 0 || inode < 0)
9108 		return B_BAD_VALUE;
9109 	if (!IS_USER_ADDRESS(userName))
9110 		return B_BAD_ADDRESS;
9111 	status_t status = user_copy_name(name, userName, sizeof(name));
9112 	if (status != B_OK)
9113 		return status;
9114 
9115 	if ((openMode & O_CREAT) != 0) {
9116 		return file_create_entry_ref(device, inode, name, openMode, perms,
9117 			false);
9118 	}
9119 
9120 	return file_open_entry_ref(device, inode, name, openMode, false);
9121 }
9122 
9123 
9124 int
9125 _user_open(int fd, const char* userPath, int openMode, int perms)
9126 {
9127 	KPath path;
9128 	if (path.InitCheck() != B_OK)
9129 		return B_NO_MEMORY;
9130 
9131 	char* buffer = path.LockBuffer();
9132 
9133 	if (!IS_USER_ADDRESS(userPath))
9134 		return B_BAD_ADDRESS;
9135 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9136 	if (status != B_OK)
9137 		return status;
9138 
9139 	if ((openMode & O_CREAT) != 0)
9140 		return file_create(fd, buffer, openMode, perms, false);
9141 
9142 	return file_open(fd, buffer, openMode, false);
9143 }
9144 
9145 
9146 int
9147 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9148 {
9149 	if (userName != NULL) {
9150 		char name[B_FILE_NAME_LENGTH];
9151 
9152 		if (!IS_USER_ADDRESS(userName))
9153 			return B_BAD_ADDRESS;
9154 		status_t status = user_copy_name(name, userName, sizeof(name));
9155 		if (status != B_OK)
9156 			return status;
9157 
9158 		return dir_open_entry_ref(device, inode, name, false);
9159 	}
9160 	return dir_open_entry_ref(device, inode, NULL, false);
9161 }
9162 
9163 
9164 int
9165 _user_open_dir(int fd, const char* userPath)
9166 {
9167 	if (userPath == NULL)
9168 		return dir_open(fd, NULL, false);
9169 
9170 	KPath path;
9171 	if (path.InitCheck() != B_OK)
9172 		return B_NO_MEMORY;
9173 
9174 	char* buffer = path.LockBuffer();
9175 
9176 	if (!IS_USER_ADDRESS(userPath))
9177 		return B_BAD_ADDRESS;
9178 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9179 	if (status != B_OK)
9180 		return status;
9181 
9182 	return dir_open(fd, buffer, false);
9183 }
9184 
9185 
9186 /*!	\brief Opens a directory's parent directory and returns the entry name
9187 		   of the former.
9188 
9189 	Aside from that it returns the directory's entry name, this method is
9190 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9191 	equivalent, if \a userName is \c NULL.
9192 
9193 	If a name buffer is supplied and the name does not fit the buffer, the
9194 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9195 
9196 	\param fd A FD referring to a directory.
9197 	\param userName Buffer the directory's entry name shall be written into.
9198 		   May be \c NULL.
9199 	\param nameLength Size of the name buffer.
9200 	\return The file descriptor of the opened parent directory, if everything
9201 			went fine, an error code otherwise.
9202 */
9203 int
9204 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9205 {
9206 	bool kernel = false;
9207 
9208 	if (userName && !IS_USER_ADDRESS(userName))
9209 		return B_BAD_ADDRESS;
9210 
9211 	// open the parent dir
9212 	int parentFD = dir_open(fd, (char*)"..", kernel);
9213 	if (parentFD < 0)
9214 		return parentFD;
9215 	FDCloser fdCloser(parentFD, kernel);
9216 
9217 	if (userName) {
9218 		// get the vnodes
9219 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9220 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9221 		VnodePutter parentVNodePutter(parentVNode);
9222 		VnodePutter dirVNodePutter(dirVNode);
9223 		if (!parentVNode || !dirVNode)
9224 			return B_FILE_ERROR;
9225 
9226 		// get the vnode name
9227 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9228 		struct dirent* buffer = (struct dirent*)_buffer;
9229 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9230 			sizeof(_buffer), get_current_io_context(false));
9231 		if (status != B_OK)
9232 			return status;
9233 
9234 		// copy the name to the userland buffer
9235 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9236 		if (len < 0)
9237 			return len;
9238 		if (len >= (int)nameLength)
9239 			return B_BUFFER_OVERFLOW;
9240 	}
9241 
9242 	return fdCloser.Detach();
9243 }
9244 
9245 
9246 status_t
9247 _user_fcntl(int fd, int op, size_t argument)
9248 {
9249 	status_t status = common_fcntl(fd, op, argument, false);
9250 	if (op == F_SETLKW)
9251 		syscall_restart_handle_post(status);
9252 
9253 	return status;
9254 }
9255 
9256 
9257 status_t
9258 _user_fsync(int fd)
9259 {
9260 	return common_sync(fd, false);
9261 }
9262 
9263 
9264 status_t
9265 _user_flock(int fd, int operation)
9266 {
9267 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9268 
9269 	// Check if the operation is valid
9270 	switch (operation & ~LOCK_NB) {
9271 		case LOCK_UN:
9272 		case LOCK_SH:
9273 		case LOCK_EX:
9274 			break;
9275 
9276 		default:
9277 			return B_BAD_VALUE;
9278 	}
9279 
9280 	struct vnode* vnode;
9281 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, false));
9282 	if (!descriptor.IsSet())
9283 		return B_FILE_ERROR;
9284 
9285 	if (descriptor->ops != &sFileOps)
9286 		return B_BAD_VALUE;
9287 
9288 	struct flock flock;
9289 	flock.l_start = 0;
9290 	flock.l_len = OFF_MAX;
9291 	flock.l_whence = 0;
9292 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9293 
9294 	status_t status;
9295 	if ((operation & LOCK_UN) != 0) {
9296 		if (HAS_FS_CALL(vnode, release_lock))
9297 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9298 		else
9299 			status = release_advisory_lock(vnode, NULL, descriptor.Get(), &flock);
9300 	} else {
9301 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9302 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9303 				(operation & LOCK_NB) == 0);
9304 		} else {
9305 			status = acquire_advisory_lock(vnode, NULL, descriptor.Get(), &flock,
9306 				(operation & LOCK_NB) == 0);
9307 		}
9308 	}
9309 
9310 	syscall_restart_handle_post(status);
9311 
9312 	return status;
9313 }
9314 
9315 
9316 status_t
9317 _user_lock_node(int fd)
9318 {
9319 	return common_lock_node(fd, false);
9320 }
9321 
9322 
9323 status_t
9324 _user_unlock_node(int fd)
9325 {
9326 	return common_unlock_node(fd, false);
9327 }
9328 
9329 
9330 status_t
9331 _user_preallocate(int fd, off_t offset, off_t length)
9332 {
9333 	return common_preallocate(fd, offset, length, false);
9334 }
9335 
9336 
9337 status_t
9338 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9339 	int perms)
9340 {
9341 	char name[B_FILE_NAME_LENGTH];
9342 	status_t status;
9343 
9344 	if (!IS_USER_ADDRESS(userName))
9345 		return B_BAD_ADDRESS;
9346 
9347 	status = user_copy_name(name, userName, sizeof(name));
9348 	if (status != B_OK)
9349 		return status;
9350 
9351 	return dir_create_entry_ref(device, inode, name, perms, false);
9352 }
9353 
9354 
9355 status_t
9356 _user_create_dir(int fd, const char* userPath, int perms)
9357 {
9358 	KPath pathBuffer;
9359 	if (pathBuffer.InitCheck() != B_OK)
9360 		return B_NO_MEMORY;
9361 
9362 	char* path = pathBuffer.LockBuffer();
9363 
9364 	if (!IS_USER_ADDRESS(userPath))
9365 		return B_BAD_ADDRESS;
9366 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9367 	if (status != B_OK)
9368 		return status;
9369 
9370 	return dir_create(fd, path, perms, false);
9371 }
9372 
9373 
9374 status_t
9375 _user_remove_dir(int fd, const char* userPath)
9376 {
9377 	KPath pathBuffer;
9378 	if (pathBuffer.InitCheck() != B_OK)
9379 		return B_NO_MEMORY;
9380 
9381 	char* path = pathBuffer.LockBuffer();
9382 
9383 	if (userPath != NULL) {
9384 		if (!IS_USER_ADDRESS(userPath))
9385 			return B_BAD_ADDRESS;
9386 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9387 		if (status != B_OK)
9388 			return status;
9389 	}
9390 
9391 	return dir_remove(fd, userPath ? path : NULL, false);
9392 }
9393 
9394 
9395 status_t
9396 _user_read_link(int fd, const char* userPath, char* userBuffer,
9397 	size_t* userBufferSize)
9398 {
9399 	KPath pathBuffer, linkBuffer;
9400 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9401 		return B_NO_MEMORY;
9402 
9403 	size_t bufferSize;
9404 
9405 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9406 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9407 		return B_BAD_ADDRESS;
9408 
9409 	char* path = pathBuffer.LockBuffer();
9410 	char* buffer = linkBuffer.LockBuffer();
9411 
9412 	if (userPath) {
9413 		if (!IS_USER_ADDRESS(userPath))
9414 			return B_BAD_ADDRESS;
9415 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9416 		if (status != B_OK)
9417 			return status;
9418 
9419 		if (bufferSize > B_PATH_NAME_LENGTH)
9420 			bufferSize = B_PATH_NAME_LENGTH;
9421 	}
9422 
9423 	size_t newBufferSize = bufferSize;
9424 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9425 		&newBufferSize, false);
9426 
9427 	// we also update the bufferSize in case of errors
9428 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9429 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9430 		return B_BAD_ADDRESS;
9431 
9432 	if (status != B_OK)
9433 		return status;
9434 
9435 	bufferSize = min_c(newBufferSize, bufferSize);
9436 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9437 		return B_BAD_ADDRESS;
9438 
9439 	return B_OK;
9440 }
9441 
9442 
9443 status_t
9444 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9445 	int mode)
9446 {
9447 	KPath pathBuffer;
9448 	KPath toPathBuffer;
9449 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9450 		return B_NO_MEMORY;
9451 
9452 	char* path = pathBuffer.LockBuffer();
9453 	char* toPath = toPathBuffer.LockBuffer();
9454 
9455 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9456 		return B_BAD_ADDRESS;
9457 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9458 	if (status != B_OK)
9459 		return status;
9460 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9461 	if (status != B_OK)
9462 		return status;
9463 
9464 	return common_create_symlink(fd, path, toPath, mode, false);
9465 }
9466 
9467 
9468 status_t
9469 _user_create_link(int pathFD, const char* userPath, int toFD,
9470 	const char* userToPath, bool traverseLeafLink)
9471 {
9472 	KPath pathBuffer;
9473 	KPath toPathBuffer;
9474 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9475 		return B_NO_MEMORY;
9476 
9477 	char* path = pathBuffer.LockBuffer();
9478 	char* toPath = toPathBuffer.LockBuffer();
9479 
9480 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9481 		return B_BAD_ADDRESS;
9482 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9483 	if (status != B_OK)
9484 		return status;
9485 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9486 	if (status != B_OK)
9487 		return status;
9488 
9489 	status = check_path(toPath);
9490 	if (status != B_OK)
9491 		return status;
9492 
9493 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9494 		false);
9495 }
9496 
9497 
9498 status_t
9499 _user_unlink(int fd, const char* userPath)
9500 {
9501 	KPath pathBuffer;
9502 	if (pathBuffer.InitCheck() != B_OK)
9503 		return B_NO_MEMORY;
9504 
9505 	char* path = pathBuffer.LockBuffer();
9506 
9507 	if (!IS_USER_ADDRESS(userPath))
9508 		return B_BAD_ADDRESS;
9509 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9510 	if (status != B_OK)
9511 		return status;
9512 
9513 	return common_unlink(fd, path, false);
9514 }
9515 
9516 
9517 status_t
9518 _user_rename(int oldFD, const char* userOldPath, int newFD,
9519 	const char* userNewPath)
9520 {
9521 	KPath oldPathBuffer;
9522 	KPath newPathBuffer;
9523 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9524 		return B_NO_MEMORY;
9525 
9526 	char* oldPath = oldPathBuffer.LockBuffer();
9527 	char* newPath = newPathBuffer.LockBuffer();
9528 
9529 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9530 		return B_BAD_ADDRESS;
9531 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9532 	if (status != B_OK)
9533 		return status;
9534 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9535 	if (status != B_OK)
9536 		return status;
9537 
9538 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9539 }
9540 
9541 
9542 status_t
9543 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9544 {
9545 	KPath pathBuffer;
9546 	if (pathBuffer.InitCheck() != B_OK)
9547 		return B_NO_MEMORY;
9548 
9549 	char* path = pathBuffer.LockBuffer();
9550 
9551 	if (!IS_USER_ADDRESS(userPath))
9552 		return B_BAD_ADDRESS;
9553 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9554 	if (status != B_OK)
9555 		return status;
9556 
9557 	// split into directory vnode and filename path
9558 	char filename[B_FILE_NAME_LENGTH];
9559 	VnodePutter dir;
9560 	status = fd_and_path_to_dir_vnode(fd, path, dir, filename, false);
9561 	if (status != B_OK)
9562 		return status;
9563 
9564 	// the underlying FS needs to support creating FIFOs
9565 	if (!HAS_FS_CALL(dir, create_special_node))
9566 		return B_UNSUPPORTED;
9567 
9568 	// create the entry	-- the FIFO sub node is set up automatically
9569 	fs_vnode superVnode;
9570 	ino_t nodeID;
9571 	status = FS_CALL(dir.Get(), create_special_node, filename, NULL,
9572 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9573 
9574 	// create_special_node() acquired a reference for us that we don't need.
9575 	if (status == B_OK)
9576 		put_vnode(dir->mount->volume, nodeID);
9577 
9578 	return status;
9579 }
9580 
9581 
9582 status_t
9583 _user_create_pipe(int* userFDs)
9584 {
9585 	// rootfs should support creating FIFOs, but let's be sure
9586 	if (!HAS_FS_CALL(sRoot, create_special_node))
9587 		return B_UNSUPPORTED;
9588 
9589 	// create the node	-- the FIFO sub node is set up automatically
9590 	fs_vnode superVnode;
9591 	ino_t nodeID;
9592 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9593 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9594 	if (status != B_OK)
9595 		return status;
9596 
9597 	// We've got one reference to the node and need another one.
9598 	struct vnode* vnode;
9599 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9600 	if (status != B_OK) {
9601 		// that should not happen
9602 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9603 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9604 		return status;
9605 	}
9606 
9607 	// Everything looks good so far. Open two FDs for reading respectively
9608 	// writing.
9609 	int fds[2];
9610 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9611 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9612 
9613 	FDCloser closer0(fds[0], false);
9614 	FDCloser closer1(fds[1], false);
9615 
9616 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9617 
9618 	// copy FDs to userland
9619 	if (status == B_OK) {
9620 		if (!IS_USER_ADDRESS(userFDs)
9621 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9622 			status = B_BAD_ADDRESS;
9623 		}
9624 	}
9625 
9626 	// keep FDs, if everything went fine
9627 	if (status == B_OK) {
9628 		closer0.Detach();
9629 		closer1.Detach();
9630 	}
9631 
9632 	return status;
9633 }
9634 
9635 
9636 status_t
9637 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9638 {
9639 	KPath pathBuffer;
9640 	if (pathBuffer.InitCheck() != B_OK)
9641 		return B_NO_MEMORY;
9642 
9643 	char* path = pathBuffer.LockBuffer();
9644 
9645 	if (!IS_USER_ADDRESS(userPath))
9646 		return B_BAD_ADDRESS;
9647 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9648 	if (status != B_OK)
9649 		return status;
9650 
9651 	return common_access(fd, path, mode, effectiveUserGroup, false);
9652 }
9653 
9654 
9655 status_t
9656 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9657 	struct stat* userStat, size_t statSize)
9658 {
9659 	struct stat stat = {0};
9660 	status_t status;
9661 
9662 	if (statSize > sizeof(struct stat))
9663 		return B_BAD_VALUE;
9664 
9665 	if (!IS_USER_ADDRESS(userStat))
9666 		return B_BAD_ADDRESS;
9667 
9668 	if (userPath != NULL) {
9669 		// path given: get the stat of the node referred to by (fd, path)
9670 		if (!IS_USER_ADDRESS(userPath))
9671 			return B_BAD_ADDRESS;
9672 
9673 		KPath pathBuffer;
9674 		if (pathBuffer.InitCheck() != B_OK)
9675 			return B_NO_MEMORY;
9676 
9677 		char* path = pathBuffer.LockBuffer();
9678 
9679 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9680 		if (status != B_OK)
9681 			return status;
9682 
9683 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9684 	} else {
9685 		// no path given: get the FD and use the FD operation
9686 		FileDescriptorPutter descriptor
9687 			(get_fd(get_current_io_context(false), fd));
9688 		if (!descriptor.IsSet())
9689 			return B_FILE_ERROR;
9690 
9691 		if (descriptor->ops->fd_read_stat)
9692 			status = descriptor->ops->fd_read_stat(descriptor.Get(), &stat);
9693 		else
9694 			status = B_UNSUPPORTED;
9695 	}
9696 
9697 	if (status != B_OK)
9698 		return status;
9699 
9700 	return user_memcpy(userStat, &stat, statSize);
9701 }
9702 
9703 
9704 status_t
9705 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9706 	const struct stat* userStat, size_t statSize, int statMask)
9707 {
9708 	if (statSize > sizeof(struct stat))
9709 		return B_BAD_VALUE;
9710 
9711 	struct stat stat;
9712 
9713 	if (!IS_USER_ADDRESS(userStat)
9714 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9715 		return B_BAD_ADDRESS;
9716 
9717 	// clear additional stat fields
9718 	if (statSize < sizeof(struct stat))
9719 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9720 
9721 	status_t status;
9722 
9723 	if (userPath != NULL) {
9724 		// path given: write the stat of the node referred to by (fd, path)
9725 		if (!IS_USER_ADDRESS(userPath))
9726 			return B_BAD_ADDRESS;
9727 
9728 		KPath pathBuffer;
9729 		if (pathBuffer.InitCheck() != B_OK)
9730 			return B_NO_MEMORY;
9731 
9732 		char* path = pathBuffer.LockBuffer();
9733 
9734 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9735 		if (status != B_OK)
9736 			return status;
9737 
9738 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9739 			statMask, false);
9740 	} else {
9741 		// no path given: get the FD and use the FD operation
9742 		FileDescriptorPutter descriptor
9743 			(get_fd(get_current_io_context(false), fd));
9744 		if (!descriptor.IsSet())
9745 			return B_FILE_ERROR;
9746 
9747 		if (descriptor->ops->fd_write_stat) {
9748 			status = descriptor->ops->fd_write_stat(descriptor.Get(), &stat,
9749 				statMask);
9750 		} else
9751 			status = B_UNSUPPORTED;
9752 	}
9753 
9754 	return status;
9755 }
9756 
9757 
9758 int
9759 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9760 {
9761 	KPath pathBuffer;
9762 	if (pathBuffer.InitCheck() != B_OK)
9763 		return B_NO_MEMORY;
9764 
9765 	char* path = pathBuffer.LockBuffer();
9766 
9767 	if (userPath != NULL) {
9768 		if (!IS_USER_ADDRESS(userPath))
9769 			return B_BAD_ADDRESS;
9770 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9771 		if (status != B_OK)
9772 			return status;
9773 	}
9774 
9775 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9776 }
9777 
9778 
9779 ssize_t
9780 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9781 	size_t readBytes)
9782 {
9783 	char attribute[B_FILE_NAME_LENGTH];
9784 
9785 	if (userAttribute == NULL)
9786 		return B_BAD_VALUE;
9787 	if (!IS_USER_ADDRESS(userAttribute))
9788 		return B_BAD_ADDRESS;
9789 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9790 	if (status != B_OK)
9791 		return status;
9792 
9793 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9794 	if (attr < 0)
9795 		return attr;
9796 
9797 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9798 	_user_close(attr);
9799 
9800 	return bytes;
9801 }
9802 
9803 
9804 ssize_t
9805 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9806 	const void* buffer, size_t writeBytes)
9807 {
9808 	char attribute[B_FILE_NAME_LENGTH];
9809 
9810 	if (userAttribute == NULL)
9811 		return B_BAD_VALUE;
9812 	if (!IS_USER_ADDRESS(userAttribute))
9813 		return B_BAD_ADDRESS;
9814 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9815 	if (status != B_OK)
9816 		return status;
9817 
9818 	// Try to support the BeOS typical truncation as well as the position
9819 	// argument
9820 	int attr = attr_create(fd, NULL, attribute, type,
9821 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9822 	if (attr < 0)
9823 		return attr;
9824 
9825 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9826 	_user_close(attr);
9827 
9828 	return bytes;
9829 }
9830 
9831 
9832 status_t
9833 _user_stat_attr(int fd, const char* userAttribute,
9834 	struct attr_info* userAttrInfo)
9835 {
9836 	char attribute[B_FILE_NAME_LENGTH];
9837 
9838 	if (userAttribute == NULL || userAttrInfo == NULL)
9839 		return B_BAD_VALUE;
9840 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9841 		return B_BAD_ADDRESS;
9842 	status_t status = user_copy_name(attribute, userAttribute,
9843 		sizeof(attribute));
9844 	if (status != B_OK)
9845 		return status;
9846 
9847 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9848 	if (attr < 0)
9849 		return attr;
9850 
9851 	struct file_descriptor* descriptor
9852 		= get_fd(get_current_io_context(false), attr);
9853 	if (descriptor == NULL) {
9854 		_user_close(attr);
9855 		return B_FILE_ERROR;
9856 	}
9857 
9858 	struct stat stat;
9859 	if (descriptor->ops->fd_read_stat)
9860 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9861 	else
9862 		status = B_UNSUPPORTED;
9863 
9864 	put_fd(descriptor);
9865 	_user_close(attr);
9866 
9867 	if (status == B_OK) {
9868 		attr_info info;
9869 		info.type = stat.st_type;
9870 		info.size = stat.st_size;
9871 
9872 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9873 			return B_BAD_ADDRESS;
9874 	}
9875 
9876 	return status;
9877 }
9878 
9879 
9880 int
9881 _user_open_attr(int fd, const char* userPath, const char* userName,
9882 	uint32 type, int openMode)
9883 {
9884 	char name[B_FILE_NAME_LENGTH];
9885 
9886 	if (!IS_USER_ADDRESS(userName))
9887 		return B_BAD_ADDRESS;
9888 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9889 	if (status != B_OK)
9890 		return status;
9891 
9892 	KPath pathBuffer;
9893 	if (pathBuffer.InitCheck() != B_OK)
9894 		return B_NO_MEMORY;
9895 
9896 	char* path = pathBuffer.LockBuffer();
9897 
9898 	if (userPath != NULL) {
9899 		if (!IS_USER_ADDRESS(userPath))
9900 			return B_BAD_ADDRESS;
9901 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9902 		if (status != B_OK)
9903 			return status;
9904 	}
9905 
9906 	if ((openMode & O_CREAT) != 0) {
9907 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9908 			false);
9909 	}
9910 
9911 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9912 }
9913 
9914 
9915 status_t
9916 _user_remove_attr(int fd, const char* userName)
9917 {
9918 	char name[B_FILE_NAME_LENGTH];
9919 
9920 	if (!IS_USER_ADDRESS(userName))
9921 		return B_BAD_ADDRESS;
9922 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9923 	if (status != B_OK)
9924 		return status;
9925 
9926 	return attr_remove(fd, name, false);
9927 }
9928 
9929 
9930 status_t
9931 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9932 	const char* userToName)
9933 {
9934 	if (!IS_USER_ADDRESS(userFromName)
9935 		|| !IS_USER_ADDRESS(userToName))
9936 		return B_BAD_ADDRESS;
9937 
9938 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9939 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9940 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9941 		return B_NO_MEMORY;
9942 
9943 	char* fromName = fromNameBuffer.LockBuffer();
9944 	char* toName = toNameBuffer.LockBuffer();
9945 
9946 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9947 	if (status != B_OK)
9948 		return status;
9949 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9950 	if (status != B_OK)
9951 		return status;
9952 
9953 	return attr_rename(fromFile, fromName, toFile, toName, false);
9954 }
9955 
9956 
9957 int
9958 _user_open_index_dir(dev_t device)
9959 {
9960 	return index_dir_open(device, false);
9961 }
9962 
9963 
9964 status_t
9965 _user_create_index(dev_t device, const char* userName, uint32 type,
9966 	uint32 flags)
9967 {
9968 	char name[B_FILE_NAME_LENGTH];
9969 
9970 	if (!IS_USER_ADDRESS(userName))
9971 		return B_BAD_ADDRESS;
9972 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9973 	if (status != B_OK)
9974 		return status;
9975 
9976 	return index_create(device, name, type, flags, false);
9977 }
9978 
9979 
9980 status_t
9981 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9982 {
9983 	char name[B_FILE_NAME_LENGTH];
9984 	struct stat stat = {0};
9985 	status_t status;
9986 
9987 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9988 		return B_BAD_ADDRESS;
9989 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9990 	if (status != B_OK)
9991 		return status;
9992 
9993 	status = index_name_read_stat(device, name, &stat, false);
9994 	if (status == B_OK) {
9995 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9996 			return B_BAD_ADDRESS;
9997 	}
9998 
9999 	return status;
10000 }
10001 
10002 
10003 status_t
10004 _user_remove_index(dev_t device, const char* userName)
10005 {
10006 	char name[B_FILE_NAME_LENGTH];
10007 
10008 	if (!IS_USER_ADDRESS(userName))
10009 		return B_BAD_ADDRESS;
10010 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10011 	if (status != B_OK)
10012 		return status;
10013 
10014 	return index_remove(device, name, false);
10015 }
10016 
10017 
10018 status_t
10019 _user_getcwd(char* userBuffer, size_t size)
10020 {
10021 	if (size == 0)
10022 		return B_BAD_VALUE;
10023 	if (!IS_USER_ADDRESS(userBuffer))
10024 		return B_BAD_ADDRESS;
10025 
10026 	if (size > kMaxPathLength)
10027 		size = kMaxPathLength;
10028 
10029 	KPath pathBuffer(size);
10030 	if (pathBuffer.InitCheck() != B_OK)
10031 		return B_NO_MEMORY;
10032 
10033 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10034 
10035 	char* path = pathBuffer.LockBuffer();
10036 
10037 	status_t status = get_cwd(path, size, false);
10038 	if (status != B_OK)
10039 		return status;
10040 
10041 	// Copy back the result
10042 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10043 		return B_BAD_ADDRESS;
10044 
10045 	return status;
10046 }
10047 
10048 
10049 status_t
10050 _user_setcwd(int fd, const char* userPath)
10051 {
10052 	TRACE(("user_setcwd: path = %p\n", userPath));
10053 
10054 	KPath pathBuffer;
10055 	if (pathBuffer.InitCheck() != B_OK)
10056 		return B_NO_MEMORY;
10057 
10058 	char* path = pathBuffer.LockBuffer();
10059 
10060 	if (userPath != NULL) {
10061 		if (!IS_USER_ADDRESS(userPath))
10062 			return B_BAD_ADDRESS;
10063 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10064 		if (status != B_OK)
10065 			return status;
10066 	}
10067 
10068 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10069 }
10070 
10071 
10072 status_t
10073 _user_change_root(const char* userPath)
10074 {
10075 	// only root is allowed to chroot()
10076 	if (geteuid() != 0)
10077 		return B_NOT_ALLOWED;
10078 
10079 	// alloc path buffer
10080 	KPath pathBuffer;
10081 	if (pathBuffer.InitCheck() != B_OK)
10082 		return B_NO_MEMORY;
10083 
10084 	// copy userland path to kernel
10085 	char* path = pathBuffer.LockBuffer();
10086 	if (userPath != NULL) {
10087 		if (!IS_USER_ADDRESS(userPath))
10088 			return B_BAD_ADDRESS;
10089 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10090 		if (status != B_OK)
10091 			return status;
10092 	}
10093 
10094 	// get the vnode
10095 	VnodePutter vnode;
10096 	status_t status = path_to_vnode(path, true, vnode, NULL, false);
10097 	if (status != B_OK)
10098 		return status;
10099 
10100 	// set the new root
10101 	struct io_context* context = get_current_io_context(false);
10102 	mutex_lock(&sIOContextRootLock);
10103 	struct vnode* oldRoot = context->root;
10104 	context->root = vnode.Detach();
10105 	mutex_unlock(&sIOContextRootLock);
10106 
10107 	put_vnode(oldRoot);
10108 
10109 	return B_OK;
10110 }
10111 
10112 
10113 int
10114 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10115 	uint32 flags, port_id port, int32 token)
10116 {
10117 	if (device < 0 || userQuery == NULL || queryLength == 0)
10118 		return B_BAD_VALUE;
10119 
10120 	if (!IS_USER_ADDRESS(userQuery))
10121 		return B_BAD_ADDRESS;
10122 
10123 	// this is a safety restriction
10124 	if (queryLength >= 65536)
10125 		return B_NAME_TOO_LONG;
10126 
10127 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10128 	if (!query.IsValid())
10129 		return B_NO_MEMORY;
10130 
10131 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10132 		return B_BAD_ADDRESS;
10133 
10134 	return query_open(device, query, flags, port, token, false);
10135 }
10136 
10137 
10138 #include "vfs_request_io.cpp"
10139