xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 50b3e74489a1a46fec88df793e4f6780e4de933c)
1 /*
2  * Copyright 2005-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and covers_vnode fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the covers_vnode, and thus
130 	making the access path vnode->mount->covers_vnode->mount->... safe if a
131 	reference to vnode is held (note that for the root mount covers_vnode
132 	is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
224 
225 	The thread trying to lock the lock must not hold sVnodeLock or
226 	sMountMutex.
227 */
228 static recursive_lock sMountOpLock;
229 
230 /*!	\brief Guards sVnodeTable.
231 
232 	The holder is allowed read/write access to sVnodeTable and to
233 	any unbusy vnode in that table, save to the immutable fields (device, id,
234 	private_node, mount) to which only read-only access is allowed.
235 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
236 	well as the busy, removed, unused flags, and the vnode's type can also be
237 	write access when holding a read lock to sVnodeLock *and* having the vnode
238 	locked. Writing access to covered_by requires to write lock sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not have this lock held when calling create_sem(), as this
242 	might call vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
549 		status_t status, size_t bytesTransferred)
550 		:
551 		fVnode(vnode),
552 		fMountID(vnode->mount->id),
553 		fNodeID(vnode->id),
554 		fCookie(cookie),
555 		fPos(pos),
556 		fCount(count),
557 		fFlags(flags),
558 		fBytesRequested(bytesRequested),
559 		fStatus(status),
560 		fBytesTransferred(bytesTransferred)
561 	{
562 		fVecs = (iovec*)alloc_tracing_buffer_memcpy(vecs, sizeof(iovec) * count,
563 			false);
564 	}
565 
566 	void AddDump(TraceOutput& out, const char* mode)
567 	{
568 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
569 			"pos: %lld, size: %lu, vecs: {", mode, fVnode, fMountID, fNodeID,
570 			fCookie, fPos, fBytesRequested);
571 
572 		if (fVecs != NULL) {
573 			for (uint32 i = 0; i < fCount; i++) {
574 				if (i > 0)
575 					out.Print(", ");
576 				out.Print("(%p, %lu)", fVecs[i].iov_base, fVecs[i].iov_len);
577 			}
578 		}
579 
580 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %lu",
581 			fFlags, fStatus, fBytesTransferred);
582 	}
583 
584 protected:
585 	struct vnode*	fVnode;
586 	dev_t			fMountID;
587 	ino_t			fNodeID;
588 	void*			fCookie;
589 	off_t			fPos;
590 	iovec*			fVecs;
591 	uint32			fCount;
592 	uint32			fFlags;
593 	size_t			fBytesRequested;
594 	status_t		fStatus;
595 	size_t			fBytesTransferred;
596 };
597 
598 
599 class ReadPages : public PagesIOTraceEntry {
600 public:
601 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
602 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
603 		status_t status, size_t bytesTransferred)
604 		:
605 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
606 			bytesRequested, status, bytesTransferred)
607 	{
608 		Initialized();
609 	}
610 
611 	virtual void AddDump(TraceOutput& out)
612 	{
613 		PagesIOTraceEntry::AddDump(out, "read");
614 	}
615 };
616 
617 
618 class WritePages : public PagesIOTraceEntry {
619 public:
620 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
621 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
622 		status_t status, size_t bytesTransferred)
623 		:
624 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
625 			bytesRequested, status, bytesTransferred)
626 	{
627 		Initialized();
628 	}
629 
630 	virtual void AddDump(TraceOutput& out)
631 	{
632 		PagesIOTraceEntry::AddDump(out, "write");
633 	}
634 };
635 
636 }	// namespace VFSPagesIOTracing
637 
638 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
639 #else
640 #	define TPIO(x) ;
641 #endif	// VFS_PAGES_IO_TRACING
642 
643 
644 static int
645 mount_compare(void* _m, const void* _key)
646 {
647 	struct fs_mount* mount = (fs_mount*)_m;
648 	const dev_t* id = (dev_t*)_key;
649 
650 	if (mount->id == *id)
651 		return 0;
652 
653 	return -1;
654 }
655 
656 
657 static uint32
658 mount_hash(void* _m, const void* _key, uint32 range)
659 {
660 	struct fs_mount* mount = (fs_mount*)_m;
661 	const dev_t* id = (dev_t*)_key;
662 
663 	if (mount)
664 		return mount->id % range;
665 
666 	return (uint32)*id % range;
667 }
668 
669 
670 /*! Finds the mounted device (the fs_mount structure) with the given ID.
671 	Note, you must hold the gMountMutex lock when you call this function.
672 */
673 static struct fs_mount*
674 find_mount(dev_t id)
675 {
676 	ASSERT_LOCKED_MUTEX(&sMountMutex);
677 
678 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
679 }
680 
681 
682 static status_t
683 get_mount(dev_t id, struct fs_mount** _mount)
684 {
685 	struct fs_mount* mount;
686 
687 	ReadLocker nodeLocker(sVnodeLock);
688 	MutexLocker mountLocker(sMountMutex);
689 
690 	mount = find_mount(id);
691 	if (mount == NULL)
692 		return B_BAD_VALUE;
693 
694 	struct vnode* rootNode = mount->root_vnode;
695 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
696 		// might have been called during a mount/unmount operation
697 		return B_BUSY;
698 	}
699 
700 	inc_vnode_ref_count(mount->root_vnode);
701 	*_mount = mount;
702 	return B_OK;
703 }
704 
705 
706 static void
707 put_mount(struct fs_mount* mount)
708 {
709 	if (mount)
710 		put_vnode(mount->root_vnode);
711 }
712 
713 
714 /*!	Tries to open the specified file system module.
715 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
716 	Returns a pointer to file system module interface, or NULL if it
717 	could not open the module.
718 */
719 static file_system_module_info*
720 get_file_system(const char* fsName)
721 {
722 	char name[B_FILE_NAME_LENGTH];
723 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
724 		// construct module name if we didn't get one
725 		// (we currently support only one API)
726 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
727 		fsName = NULL;
728 	}
729 
730 	file_system_module_info* info;
731 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
732 		return NULL;
733 
734 	return info;
735 }
736 
737 
738 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
739 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
740 	The name is allocated for you, and you have to free() it when you're
741 	done with it.
742 	Returns NULL if the required memory is not available.
743 */
744 static char*
745 get_file_system_name(const char* fsName)
746 {
747 	const size_t length = strlen("file_systems/");
748 
749 	if (strncmp(fsName, "file_systems/", length)) {
750 		// the name already seems to be the module's file name
751 		return strdup(fsName);
752 	}
753 
754 	fsName += length;
755 	const char* end = strchr(fsName, '/');
756 	if (end == NULL) {
757 		// this doesn't seem to be a valid name, but well...
758 		return strdup(fsName);
759 	}
760 
761 	// cut off the trailing /v1
762 
763 	char* name = (char*)malloc(end + 1 - fsName);
764 	if (name == NULL)
765 		return NULL;
766 
767 	strlcpy(name, fsName, end + 1 - fsName);
768 	return name;
769 }
770 
771 
772 /*!	Accepts a list of file system names separated by a colon, one for each
773 	layer and returns the file system name for the specified layer.
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available or if there is no
777 	name for the specified layer.
778 */
779 static char*
780 get_file_system_name_for_layer(const char* fsNames, int32 layer)
781 {
782 	while (layer >= 0) {
783 		const char* end = strchr(fsNames, ':');
784 		if (end == NULL) {
785 			if (layer == 0)
786 				return strdup(fsNames);
787 			return NULL;
788 		}
789 
790 		if (layer == 0) {
791 			size_t length = end - fsNames + 1;
792 			char* result = (char*)malloc(length);
793 			strlcpy(result, fsNames, length);
794 			return result;
795 		}
796 
797 		fsNames = end + 1;
798 		layer--;
799 	}
800 
801 	return NULL;
802 }
803 
804 
805 static int
806 vnode_compare(void* _vnode, const void* _key)
807 {
808 	struct vnode* vnode = (struct vnode*)_vnode;
809 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
810 
811 	if (vnode->device == key->device && vnode->id == key->vnode)
812 		return 0;
813 
814 	return -1;
815 }
816 
817 
818 static uint32
819 vnode_hash(void* _vnode, const void* _key, uint32 range)
820 {
821 	struct vnode* vnode = (struct vnode*)_vnode;
822 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
823 
824 #define VHASH(mountid, vnodeid) \
825 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
826 
827 	if (vnode != NULL)
828 		return VHASH(vnode->device, vnode->id) % range;
829 
830 	return VHASH(key->device, key->vnode) % range;
831 
832 #undef VHASH
833 }
834 
835 
836 static void
837 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
838 {
839 	RecursiveLocker _(mount->rlock);
840 	mount->vnodes.Add(vnode);
841 }
842 
843 
844 static void
845 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
846 {
847 	RecursiveLocker _(mount->rlock);
848 	mount->vnodes.Remove(vnode);
849 }
850 
851 
852 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
853 
854 	The caller must hold the sVnodeLock (read lock at least).
855 
856 	\param mountID the mount ID.
857 	\param vnodeID the node ID.
858 
859 	\return The vnode structure, if it was found in the hash table, \c NULL
860 			otherwise.
861 */
862 static struct vnode*
863 lookup_vnode(dev_t mountID, ino_t vnodeID)
864 {
865 	struct vnode_hash_key key;
866 
867 	key.device = mountID;
868 	key.vnode = vnodeID;
869 
870 	return (vnode*)hash_lookup(sVnodeTable, &key);
871 }
872 
873 
874 /*!	Creates a new vnode with the given mount and node ID.
875 	If the node already exists, it is returned instead and no new node is
876 	created. In either case -- but not, if an error occurs -- the function write
877 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
878 	error the lock is not not held on return.
879 
880 	\param mountID The mount ID.
881 	\param vnodeID The vnode ID.
882 	\param _vnode Will be set to the new vnode on success.
883 	\param _nodeCreated Will be set to \c true when the returned vnode has
884 		been newly created, \c false when it already existed. Will not be
885 		changed on error.
886 	\return \c B_OK, when the vnode was successfully created and inserted or
887 		a node with the given ID was found, \c B_NO_MEMORY or
888 		\c B_ENTRY_NOT_FOUND on error.
889 */
890 static status_t
891 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
892 	bool& _nodeCreated)
893 {
894 	FUNCTION(("create_new_vnode_and_lock()\n"));
895 
896 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
897 	if (vnode == NULL)
898 		return B_NO_MEMORY;
899 
900 	// initialize basic values
901 	memset(vnode, 0, sizeof(struct vnode));
902 	vnode->device = mountID;
903 	vnode->id = vnodeID;
904 	vnode->ref_count = 1;
905 	vnode->SetBusy(true);
906 
907 	// look up the the node -- it might have been added by someone else in the
908 	// meantime
909 	rw_lock_write_lock(&sVnodeLock);
910 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
911 	if (existingVnode != NULL) {
912 		free(vnode);
913 		_vnode = existingVnode;
914 		_nodeCreated = false;
915 		return B_OK;
916 	}
917 
918 	// get the mount structure
919 	mutex_lock(&sMountMutex);
920 	vnode->mount = find_mount(mountID);
921 	if (!vnode->mount || vnode->mount->unmounting) {
922 		mutex_unlock(&sMountMutex);
923 		rw_lock_write_unlock(&sVnodeLock);
924 		free(vnode);
925 		return B_ENTRY_NOT_FOUND;
926 	}
927 
928 	// add the vnode to the mount's node list and the hash table
929 	hash_insert(sVnodeTable, vnode);
930 	add_vnode_to_mount_list(vnode, vnode->mount);
931 
932 	mutex_unlock(&sMountMutex);
933 
934 	_vnode = vnode;
935 	_nodeCreated = true;
936 
937 	// keep the vnode lock locked
938 	return B_OK;
939 }
940 
941 
942 /*!	Frees the vnode and all resources it has acquired, and removes
943 	it from the vnode hash as well as from its mount structure.
944 	Will also make sure that any cache modifications are written back.
945 */
946 static void
947 free_vnode(struct vnode* vnode, bool reenter)
948 {
949 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
950 		vnode);
951 
952 	// write back any changes in this vnode's cache -- but only
953 	// if the vnode won't be deleted, in which case the changes
954 	// will be discarded
955 
956 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
957 		FS_CALL_NO_PARAMS(vnode, fsync);
958 
959 	// Note: If this vnode has a cache attached, there will still be two
960 	// references to that cache at this point. The last one belongs to the vnode
961 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
962 	// cache. Each but the last reference to a cache also includes a reference
963 	// to the vnode. The file cache, however, released its reference (cf.
964 	// file_cache_create()), so that this vnode's ref count has the chance to
965 	// ever drop to 0. Deleting the file cache now, will cause the next to last
966 	// cache reference to be released, which will also release a (no longer
967 	// existing) vnode reference. To avoid problems, we set the vnode's ref
968 	// count, so that it will neither become negative nor 0.
969 	vnode->ref_count = 2;
970 
971 	if (!vnode->IsUnpublished()) {
972 		if (vnode->IsRemoved())
973 			FS_CALL(vnode, remove_vnode, reenter);
974 		else
975 			FS_CALL(vnode, put_vnode, reenter);
976 	}
977 
978 	// If the vnode has a VMCache attached, make sure that it won't try to get
979 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
980 	// long as the vnode is busy and in the hash, that won't happen, but as
981 	// soon as we've removed it from the hash, it could reload the vnode -- with
982 	// a new cache attached!
983 	if (vnode->cache != NULL)
984 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
985 
986 	// The file system has removed the resources of the vnode now, so we can
987 	// make it available again (by removing the busy vnode from the hash).
988 	rw_lock_write_lock(&sVnodeLock);
989 	hash_remove(sVnodeTable, vnode);
990 	rw_lock_write_unlock(&sVnodeLock);
991 
992 	// if we have a VMCache attached, remove it
993 	if (vnode->cache)
994 		vnode->cache->ReleaseRef();
995 
996 	vnode->cache = NULL;
997 
998 	remove_vnode_from_mount_list(vnode, vnode->mount);
999 
1000 	free(vnode);
1001 }
1002 
1003 
1004 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1005 	if the counter dropped to 0.
1006 
1007 	The caller must, of course, own a reference to the vnode to call this
1008 	function.
1009 	The caller must not hold the sVnodeLock or the sMountMutex.
1010 
1011 	\param vnode the vnode.
1012 	\param alwaysFree don't move this vnode into the unused list, but really
1013 		   delete it if possible.
1014 	\param reenter \c true, if this function is called (indirectly) from within
1015 		   a file system. This will be passed to file system hooks only.
1016 	\return \c B_OK, if everything went fine, an error code otherwise.
1017 */
1018 static status_t
1019 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1020 {
1021 	ReadLocker locker(sVnodeLock);
1022 	AutoLocker<Vnode> nodeLocker(vnode);
1023 
1024 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1025 
1026 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1027 
1028 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1029 		vnode->ref_count));
1030 
1031 	if (oldRefCount != 1)
1032 		return B_OK;
1033 
1034 	if (vnode->IsBusy())
1035 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1036 
1037 	bool freeNode = false;
1038 	bool freeUnusedNodes = false;
1039 
1040 	// Just insert the vnode into an unused list if we don't need
1041 	// to delete it
1042 	if (vnode->IsRemoved() || alwaysFree) {
1043 		vnode_to_be_freed(vnode);
1044 		vnode->SetBusy(true);
1045 		freeNode = true;
1046 	} else
1047 		freeUnusedNodes = vnode_unused(vnode);
1048 
1049 	nodeLocker.Unlock();
1050 	locker.Unlock();
1051 
1052 	if (freeNode)
1053 		free_vnode(vnode, reenter);
1054 	else if (freeUnusedNodes)
1055 		free_unused_vnodes();
1056 
1057 	return B_OK;
1058 }
1059 
1060 
1061 /*!	\brief Increments the reference counter of the given vnode.
1062 
1063 	The caller must make sure that the node isn't deleted while this function
1064 	is called. This can be done either:
1065 	- by ensuring that a reference to the node exists and remains in existence,
1066 	  or
1067 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1068 	  or by holding sVnodeLock write locked.
1069 
1070 	In the second case the caller is responsible for dealing with the ref count
1071 	0 -> 1 transition. That is 1. this function must not be invoked when the
1072 	node is busy in the first place and 2. vnode_used() must be called for the
1073 	node.
1074 
1075 	\param vnode the vnode.
1076 */
1077 static void
1078 inc_vnode_ref_count(struct vnode* vnode)
1079 {
1080 	atomic_add(&vnode->ref_count, 1);
1081 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1082 		vnode->ref_count));
1083 }
1084 
1085 
1086 static bool
1087 is_special_node_type(int type)
1088 {
1089 	// at the moment only FIFOs are supported
1090 	return S_ISFIFO(type);
1091 }
1092 
1093 
1094 static status_t
1095 create_special_sub_node(struct vnode* vnode, uint32 flags)
1096 {
1097 	if (S_ISFIFO(vnode->Type()))
1098 		return create_fifo_vnode(vnode->mount->volume, vnode);
1099 
1100 	return B_BAD_VALUE;
1101 }
1102 
1103 
1104 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1105 
1106 	If the node is not yet in memory, it will be loaded.
1107 
1108 	The caller must not hold the sVnodeLock or the sMountMutex.
1109 
1110 	\param mountID the mount ID.
1111 	\param vnodeID the node ID.
1112 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1113 		   retrieved vnode structure shall be written.
1114 	\param reenter \c true, if this function is called (indirectly) from within
1115 		   a file system.
1116 	\return \c B_OK, if everything when fine, an error code otherwise.
1117 */
1118 static status_t
1119 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1120 	int reenter)
1121 {
1122 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1123 		_vnode));
1124 
1125 	rw_lock_read_lock(&sVnodeLock);
1126 
1127 	int32 tries = 2000;
1128 		// try for 10 secs
1129 restart:
1130 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1131 	AutoLocker<Vnode> nodeLocker(vnode);
1132 
1133 	if (vnode && vnode->IsBusy()) {
1134 		nodeLocker.Unlock();
1135 		rw_lock_read_unlock(&sVnodeLock);
1136 		if (!canWait || --tries < 0) {
1137 			// vnode doesn't seem to become unbusy
1138 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1139 				vnodeID);
1140 			return B_BUSY;
1141 		}
1142 		snooze(5000); // 5 ms
1143 		rw_lock_read_lock(&sVnodeLock);
1144 		goto restart;
1145 	}
1146 
1147 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1148 
1149 	status_t status;
1150 
1151 	if (vnode) {
1152 		if (vnode->ref_count == 0) {
1153 			// this vnode has been unused before
1154 			vnode_used(vnode);
1155 		}
1156 		inc_vnode_ref_count(vnode);
1157 
1158 		nodeLocker.Unlock();
1159 		rw_lock_read_unlock(&sVnodeLock);
1160 	} else {
1161 		// we need to create a new vnode and read it in
1162 		rw_lock_read_unlock(&sVnodeLock);
1163 			// unlock -- create_new_vnode_and_lock() write-locks on success
1164 		bool nodeCreated;
1165 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1166 			nodeCreated);
1167 		if (status != B_OK)
1168 			return status;
1169 
1170 		if (!nodeCreated) {
1171 			rw_lock_read_lock(&sVnodeLock);
1172 			rw_lock_write_unlock(&sVnodeLock);
1173 			goto restart;
1174 		}
1175 
1176 		rw_lock_write_unlock(&sVnodeLock);
1177 
1178 		int type;
1179 		uint32 flags;
1180 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1181 			&flags, reenter);
1182 		if (status == B_OK && vnode->private_node == NULL)
1183 			status = B_BAD_VALUE;
1184 
1185 		bool gotNode = status == B_OK;
1186 		bool publishSpecialSubNode = false;
1187 		if (gotNode) {
1188 			vnode->SetType(type);
1189 			publishSpecialSubNode = is_special_node_type(type)
1190 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1191 		}
1192 
1193 		if (gotNode && publishSpecialSubNode)
1194 			status = create_special_sub_node(vnode, flags);
1195 
1196 		if (status != B_OK) {
1197 			if (gotNode)
1198 				FS_CALL(vnode, put_vnode, reenter);
1199 
1200 			rw_lock_write_lock(&sVnodeLock);
1201 			hash_remove(sVnodeTable, vnode);
1202 			remove_vnode_from_mount_list(vnode, vnode->mount);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 
1205 			free(vnode);
1206 			return status;
1207 		}
1208 
1209 		rw_lock_read_lock(&sVnodeLock);
1210 		vnode->Lock();
1211 
1212 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1213 		vnode->SetBusy(false);
1214 
1215 		vnode->Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	}
1218 
1219 	TRACE(("get_vnode: returning %p\n", vnode));
1220 
1221 	*_vnode = vnode;
1222 	return B_OK;
1223 }
1224 
1225 
1226 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1227 	if the counter dropped to 0.
1228 
1229 	The caller must, of course, own a reference to the vnode to call this
1230 	function.
1231 	The caller must not hold the sVnodeLock or the sMountMutex.
1232 
1233 	\param vnode the vnode.
1234 */
1235 static inline void
1236 put_vnode(struct vnode* vnode)
1237 {
1238 	dec_vnode_ref_count(vnode, false, false);
1239 }
1240 
1241 
1242 static void
1243 free_unused_vnodes(int32 level)
1244 {
1245 	unused_vnodes_check_started();
1246 
1247 	if (level == B_NO_LOW_RESOURCE) {
1248 		unused_vnodes_check_done();
1249 		return;
1250 	}
1251 
1252 	flush_hot_vnodes();
1253 
1254 	// determine how many nodes to free
1255 	uint32 count = 1;
1256 	{
1257 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1258 
1259 		switch (level) {
1260 			case B_LOW_RESOURCE_NOTE:
1261 				count = sUnusedVnodes / 100;
1262 				break;
1263 			case B_LOW_RESOURCE_WARNING:
1264 				count = sUnusedVnodes / 10;
1265 				break;
1266 			case B_LOW_RESOURCE_CRITICAL:
1267 				count = sUnusedVnodes;
1268 				break;
1269 		}
1270 
1271 		if (count > sUnusedVnodes)
1272 			count = sUnusedVnodes;
1273 	}
1274 
1275 	// Write back the modified pages of some unused vnodes and free them.
1276 
1277 	for (uint32 i = 0; i < count; i++) {
1278 		ReadLocker vnodesReadLocker(sVnodeLock);
1279 
1280 		// get the first node
1281 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1282 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1283 			&sUnusedVnodeList);
1284 		unusedVnodesLocker.Unlock();
1285 
1286 		if (vnode == NULL)
1287 			break;
1288 
1289 		// lock the node
1290 		AutoLocker<Vnode> nodeLocker(vnode);
1291 
1292 		// Check whether the node is still unused -- since we only append to the
1293 		// the tail of the unused queue, the vnode should still be at its head.
1294 		// Alternatively we could check its ref count for 0 and its busy flag,
1295 		// but if the node is no longer at the head of the queue, it means it
1296 		// has been touched in the meantime, i.e. it is no longer the least
1297 		// recently used unused vnode and we rather don't free it.
1298 		unusedVnodesLocker.Lock();
1299 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1300 			continue;
1301 		unusedVnodesLocker.Unlock();
1302 
1303 		ASSERT(!vnode->IsBusy());
1304 
1305 		// grab a reference
1306 		inc_vnode_ref_count(vnode);
1307 		vnode_used(vnode);
1308 
1309 		// write back changes and free the node
1310 		nodeLocker.Unlock();
1311 		vnodesReadLocker.Unlock();
1312 
1313 		if (vnode->cache != NULL)
1314 			vnode->cache->WriteModified();
1315 
1316 		dec_vnode_ref_count(vnode, true, false);
1317 			// this should free the vnode when it's still unused
1318 	}
1319 
1320 	unused_vnodes_check_done();
1321 }
1322 
1323 
1324 static void
1325 free_unused_vnodes()
1326 {
1327 	free_unused_vnodes(
1328 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY));
1329 }
1330 
1331 
1332 static void
1333 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1334 {
1335 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1336 
1337 	free_unused_vnodes(level);
1338 }
1339 
1340 
1341 static inline void
1342 put_advisory_locking(struct advisory_locking* locking)
1343 {
1344 	release_sem(locking->lock);
1345 }
1346 
1347 
1348 /*!	Returns the advisory_locking object of the \a vnode in case it
1349 	has one, and locks it.
1350 	You have to call put_advisory_locking() when you're done with
1351 	it.
1352 	Note, you must not have the vnode mutex locked when calling
1353 	this function.
1354 */
1355 static struct advisory_locking*
1356 get_advisory_locking(struct vnode* vnode)
1357 {
1358 	rw_lock_read_lock(&sVnodeLock);
1359 	vnode->Lock();
1360 
1361 	struct advisory_locking* locking = vnode->advisory_locking;
1362 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1363 
1364 	vnode->Unlock();
1365 	rw_lock_read_unlock(&sVnodeLock);
1366 
1367 	if (lock >= 0)
1368 		lock = acquire_sem(lock);
1369 	if (lock < 0) {
1370 		// This means the locking has been deleted in the mean time
1371 		// or had never existed in the first place - otherwise, we
1372 		// would get the lock at some point.
1373 		return NULL;
1374 	}
1375 
1376 	return locking;
1377 }
1378 
1379 
1380 /*!	Creates a locked advisory_locking object, and attaches it to the
1381 	given \a vnode.
1382 	Returns B_OK in case of success - also if the vnode got such an
1383 	object from someone else in the mean time, you'll still get this
1384 	one locked then.
1385 */
1386 static status_t
1387 create_advisory_locking(struct vnode* vnode)
1388 {
1389 	if (vnode == NULL)
1390 		return B_FILE_ERROR;
1391 
1392 	ObjectDeleter<advisory_locking> lockingDeleter;
1393 	struct advisory_locking* locking = NULL;
1394 
1395 	while (get_advisory_locking(vnode) == NULL) {
1396 		// no locking object set on the vnode yet, create one
1397 		if (locking == NULL) {
1398 			locking = new(std::nothrow) advisory_locking;
1399 			if (locking == NULL)
1400 				return B_NO_MEMORY;
1401 			lockingDeleter.SetTo(locking);
1402 
1403 			locking->wait_sem = create_sem(0, "advisory lock");
1404 			if (locking->wait_sem < 0)
1405 				return locking->wait_sem;
1406 
1407 			locking->lock = create_sem(0, "advisory locking");
1408 			if (locking->lock < 0)
1409 				return locking->lock;
1410 		}
1411 
1412 		// set our newly created locking object
1413 		ReadLocker _(sVnodeLock);
1414 		AutoLocker<Vnode> nodeLocker(vnode);
1415 		if (vnode->advisory_locking == NULL) {
1416 			vnode->advisory_locking = locking;
1417 			lockingDeleter.Detach();
1418 			return B_OK;
1419 		}
1420 	}
1421 
1422 	// The vnode already had a locking object. That's just as well.
1423 
1424 	return B_OK;
1425 }
1426 
1427 
1428 /*!	Retrieves the first lock that has been set by the current team.
1429 */
1430 static status_t
1431 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1432 {
1433 	struct advisory_locking* locking = get_advisory_locking(vnode);
1434 	if (locking == NULL)
1435 		return B_BAD_VALUE;
1436 
1437 	// TODO: this should probably get the flock by its file descriptor!
1438 	team_id team = team_get_current_team_id();
1439 	status_t status = B_BAD_VALUE;
1440 
1441 	LockList::Iterator iterator = locking->locks.GetIterator();
1442 	while (iterator.HasNext()) {
1443 		struct advisory_lock* lock = iterator.Next();
1444 
1445 		if (lock->team == team) {
1446 			flock->l_start = lock->start;
1447 			flock->l_len = lock->end - lock->start + 1;
1448 			status = B_OK;
1449 			break;
1450 		}
1451 	}
1452 
1453 	put_advisory_locking(locking);
1454 	return status;
1455 }
1456 
1457 
1458 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1459 	with the advisory_lock \a lock.
1460 */
1461 static bool
1462 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1463 {
1464 	if (flock == NULL)
1465 		return true;
1466 
1467 	return lock->start <= flock->l_start - 1 + flock->l_len
1468 		&& lock->end >= flock->l_start;
1469 }
1470 
1471 
1472 /*!	Removes the specified lock, or all locks of the calling team
1473 	if \a flock is NULL.
1474 */
1475 static status_t
1476 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1477 {
1478 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1479 
1480 	struct advisory_locking* locking = get_advisory_locking(vnode);
1481 	if (locking == NULL)
1482 		return B_OK;
1483 
1484 	// TODO: use the thread ID instead??
1485 	team_id team = team_get_current_team_id();
1486 	pid_t session = thread_get_current_thread()->team->session_id;
1487 
1488 	// find matching lock entries
1489 
1490 	LockList::Iterator iterator = locking->locks.GetIterator();
1491 	while (iterator.HasNext()) {
1492 		struct advisory_lock* lock = iterator.Next();
1493 		bool removeLock = false;
1494 
1495 		if (lock->session == session)
1496 			removeLock = true;
1497 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1498 			bool endsBeyond = false;
1499 			bool startsBefore = false;
1500 			if (flock != NULL) {
1501 				startsBefore = lock->start < flock->l_start;
1502 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1503 			}
1504 
1505 			if (!startsBefore && !endsBeyond) {
1506 				// lock is completely contained in flock
1507 				removeLock = true;
1508 			} else if (startsBefore && !endsBeyond) {
1509 				// cut the end of the lock
1510 				lock->end = flock->l_start - 1;
1511 			} else if (!startsBefore && endsBeyond) {
1512 				// cut the start of the lock
1513 				lock->start = flock->l_start + flock->l_len;
1514 			} else {
1515 				// divide the lock into two locks
1516 				struct advisory_lock* secondLock = new advisory_lock;
1517 				if (secondLock == NULL) {
1518 					// TODO: we should probably revert the locks we already
1519 					// changed... (ie. allocate upfront)
1520 					put_advisory_locking(locking);
1521 					return B_NO_MEMORY;
1522 				}
1523 
1524 				lock->end = flock->l_start - 1;
1525 
1526 				secondLock->team = lock->team;
1527 				secondLock->session = lock->session;
1528 				// values must already be normalized when getting here
1529 				secondLock->start = flock->l_start + flock->l_len;
1530 				secondLock->end = lock->end;
1531 				secondLock->shared = lock->shared;
1532 
1533 				locking->locks.Add(secondLock);
1534 			}
1535 		}
1536 
1537 		if (removeLock) {
1538 			// this lock is no longer used
1539 			iterator.Remove();
1540 			free(lock);
1541 		}
1542 	}
1543 
1544 	bool removeLocking = locking->locks.IsEmpty();
1545 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1546 
1547 	put_advisory_locking(locking);
1548 
1549 	if (removeLocking) {
1550 		// We can remove the whole advisory locking structure; it's no
1551 		// longer used
1552 		locking = get_advisory_locking(vnode);
1553 		if (locking != NULL) {
1554 			ReadLocker locker(sVnodeLock);
1555 			AutoLocker<Vnode> nodeLocker(vnode);
1556 
1557 			// the locking could have been changed in the mean time
1558 			if (locking->locks.IsEmpty()) {
1559 				vnode->advisory_locking = NULL;
1560 				nodeLocker.Unlock();
1561 				locker.Unlock();
1562 
1563 				// we've detached the locking from the vnode, so we can
1564 				// safely delete it
1565 				delete_sem(locking->lock);
1566 				delete_sem(locking->wait_sem);
1567 				delete locking;
1568 			} else {
1569 				// the locking is in use again
1570 				nodeLocker.Unlock();
1571 				locker.Unlock();
1572 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1573 			}
1574 		}
1575 	}
1576 
1577 	return B_OK;
1578 }
1579 
1580 
1581 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1582 	will wait for the lock to become available, if there are any collisions
1583 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1584 
1585 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1586 	BSD flock() semantics are used, that is, all children can unlock the file
1587 	in question (we even allow parents to remove the lock, though, but that
1588 	seems to be in line to what the BSD's are doing).
1589 */
1590 static status_t
1591 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1592 	bool wait)
1593 {
1594 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1595 		vnode, flock, wait ? "yes" : "no"));
1596 
1597 	bool shared = flock->l_type == F_RDLCK;
1598 	status_t status = B_OK;
1599 
1600 	// TODO: do deadlock detection!
1601 
1602 	struct advisory_locking* locking;
1603 	sem_id waitForLock;
1604 
1605 	while (true) {
1606 		// if this vnode has an advisory_locking structure attached,
1607 		// lock that one and search for any colliding file lock
1608 		status = create_advisory_locking(vnode);
1609 		if (status != B_OK)
1610 			return status;
1611 
1612 		locking = vnode->advisory_locking;
1613 		team_id team = team_get_current_team_id();
1614 		waitForLock = -1;
1615 
1616 		// test for collisions
1617 		LockList::Iterator iterator = locking->locks.GetIterator();
1618 		while (iterator.HasNext()) {
1619 			struct advisory_lock* lock = iterator.Next();
1620 
1621 			// TODO: locks from the same team might be joinable!
1622 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1623 				// locks do overlap
1624 				if (!shared || !lock->shared) {
1625 					// we need to wait
1626 					waitForLock = locking->wait_sem;
1627 					break;
1628 				}
1629 			}
1630 		}
1631 
1632 		if (waitForLock < 0)
1633 			break;
1634 
1635 		// We need to wait. Do that or fail now, if we've been asked not to.
1636 
1637 		if (!wait) {
1638 			put_advisory_locking(locking);
1639 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1640 		}
1641 
1642 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1643 			B_CAN_INTERRUPT, 0);
1644 		if (status != B_OK && status != B_BAD_SEM_ID)
1645 			return status;
1646 
1647 		// We have been notified, but we need to re-lock the locking object. So
1648 		// go another round...
1649 	}
1650 
1651 	// install new lock
1652 
1653 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1654 		sizeof(struct advisory_lock));
1655 	if (lock == NULL) {
1656 		if (waitForLock >= B_OK)
1657 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1658 		release_sem(locking->lock);
1659 		return B_NO_MEMORY;
1660 	}
1661 
1662 	lock->team = team_get_current_team_id();
1663 	lock->session = session;
1664 	// values must already be normalized when getting here
1665 	lock->start = flock->l_start;
1666 	lock->end = flock->l_start - 1 + flock->l_len;
1667 	lock->shared = shared;
1668 
1669 	locking->locks.Add(lock);
1670 	put_advisory_locking(locking);
1671 
1672 	return status;
1673 }
1674 
1675 
1676 /*!	Normalizes the \a flock structure to make it easier to compare the
1677 	structure with others. The l_start and l_len fields are set to absolute
1678 	values according to the l_whence field.
1679 */
1680 static status_t
1681 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1682 {
1683 	switch (flock->l_whence) {
1684 		case SEEK_SET:
1685 			break;
1686 		case SEEK_CUR:
1687 			flock->l_start += descriptor->pos;
1688 			break;
1689 		case SEEK_END:
1690 		{
1691 			struct vnode* vnode = descriptor->u.vnode;
1692 			struct stat stat;
1693 			status_t status;
1694 
1695 			if (!HAS_FS_CALL(vnode, read_stat))
1696 				return EOPNOTSUPP;
1697 
1698 			status = FS_CALL(vnode, read_stat, &stat);
1699 			if (status != B_OK)
1700 				return status;
1701 
1702 			flock->l_start += stat.st_size;
1703 			break;
1704 		}
1705 		default:
1706 			return B_BAD_VALUE;
1707 	}
1708 
1709 	if (flock->l_start < 0)
1710 		flock->l_start = 0;
1711 	if (flock->l_len == 0)
1712 		flock->l_len = OFF_MAX;
1713 
1714 	// don't let the offset and length overflow
1715 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1716 		flock->l_len = OFF_MAX - flock->l_start;
1717 
1718 	if (flock->l_len < 0) {
1719 		// a negative length reverses the region
1720 		flock->l_start += flock->l_len;
1721 		flock->l_len = -flock->l_len;
1722 	}
1723 
1724 	return B_OK;
1725 }
1726 
1727 
1728 static void
1729 replace_vnode_if_disconnected(struct fs_mount* mount,
1730 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1731 	struct vnode* fallBack, bool lockRootLock)
1732 {
1733 	if (lockRootLock)
1734 		mutex_lock(&sIOContextRootLock);
1735 
1736 	struct vnode* obsoleteVnode = NULL;
1737 
1738 	if (vnode != NULL && vnode->mount == mount
1739 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1740 		obsoleteVnode = vnode;
1741 
1742 		if (vnode == mount->root_vnode) {
1743 			// redirect the vnode to the covered vnode
1744 			vnode = mount->covers_vnode;
1745 		} else
1746 			vnode = fallBack;
1747 
1748 		if (vnode != NULL)
1749 			inc_vnode_ref_count(vnode);
1750 	}
1751 
1752 	if (lockRootLock)
1753 		mutex_unlock(&sIOContextRootLock);
1754 
1755 	if (obsoleteVnode != NULL)
1756 		put_vnode(obsoleteVnode);
1757 }
1758 
1759 
1760 /*!	Disconnects all file descriptors that are associated with the
1761 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1762 	\a mount object.
1763 
1764 	Note, after you've called this function, there might still be ongoing
1765 	accesses - they won't be interrupted if they already happened before.
1766 	However, any subsequent access will fail.
1767 
1768 	This is not a cheap function and should be used with care and rarely.
1769 	TODO: there is currently no means to stop a blocking read/write!
1770 */
1771 void
1772 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1773 	struct vnode* vnodeToDisconnect)
1774 {
1775 	// iterate over all teams and peek into their file descriptors
1776 	int32 nextTeamID = 0;
1777 
1778 	while (true) {
1779 		struct io_context* context = NULL;
1780 		bool contextLocked = false;
1781 		struct team* team = NULL;
1782 		team_id lastTeamID;
1783 
1784 		cpu_status state = disable_interrupts();
1785 		SpinLocker teamsLock(gTeamSpinlock);
1786 
1787 		lastTeamID = peek_next_thread_id();
1788 		if (nextTeamID < lastTeamID) {
1789 			// get next valid team
1790 			while (nextTeamID < lastTeamID
1791 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1792 				nextTeamID++;
1793 			}
1794 
1795 			if (team) {
1796 				context = (io_context*)team->io_context;
1797 
1798 				// Some acrobatics to lock the context in a safe way
1799 				// (cf. _kern_get_next_fd_info() for details).
1800 				GRAB_THREAD_LOCK();
1801 				teamsLock.Unlock();
1802 				contextLocked = mutex_lock_threads_locked(&context->io_mutex)
1803 					== B_OK;
1804 				RELEASE_THREAD_LOCK();
1805 
1806 				nextTeamID++;
1807 			}
1808 		}
1809 
1810 		teamsLock.Unlock();
1811 		restore_interrupts(state);
1812 
1813 		if (context == NULL)
1814 			break;
1815 
1816 		// we now have a context - since we couldn't lock it while having
1817 		// safe access to the team structure, we now need to lock the mutex
1818 		// manually
1819 
1820 		if (!contextLocked) {
1821 			// team seems to be gone, go over to the next team
1822 			continue;
1823 		}
1824 
1825 		// the team cannot be deleted completely while we're owning its
1826 		// io_context mutex, so we can safely play with it now
1827 
1828 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1829 			sRoot, true);
1830 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1831 			sRoot, false);
1832 
1833 		for (uint32 i = 0; i < context->table_size; i++) {
1834 			if (struct file_descriptor* descriptor = context->fds[i]) {
1835 				inc_fd_ref_count(descriptor);
1836 
1837 				// if this descriptor points at this mount, we
1838 				// need to disconnect it to be able to unmount
1839 				struct vnode* vnode = fd_vnode(descriptor);
1840 				if (vnodeToDisconnect != NULL) {
1841 					if (vnode == vnodeToDisconnect)
1842 						disconnect_fd(descriptor);
1843 				} else if ((vnode != NULL && vnode->mount == mount)
1844 					|| (vnode == NULL && descriptor->u.mount == mount))
1845 					disconnect_fd(descriptor);
1846 
1847 				put_fd(descriptor);
1848 			}
1849 		}
1850 
1851 		mutex_unlock(&context->io_mutex);
1852 	}
1853 }
1854 
1855 
1856 /*!	\brief Gets the root node of the current IO context.
1857 	If \a kernel is \c true, the kernel IO context will be used.
1858 	The caller obtains a reference to the returned node.
1859 */
1860 struct vnode*
1861 get_root_vnode(bool kernel)
1862 {
1863 	if (!kernel) {
1864 		// Get current working directory from io context
1865 		struct io_context* context = get_current_io_context(kernel);
1866 
1867 		mutex_lock(&sIOContextRootLock);
1868 
1869 		struct vnode* root = context->root;
1870 		if (root != NULL)
1871 			inc_vnode_ref_count(root);
1872 
1873 		mutex_unlock(&sIOContextRootLock);
1874 
1875 		if (root != NULL)
1876 			return root;
1877 
1878 		// That should never happen.
1879 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1880 			"root\n", team_get_current_team_id());
1881 	}
1882 
1883 	inc_vnode_ref_count(sRoot);
1884 	return sRoot;
1885 }
1886 
1887 
1888 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1889 		   by.
1890 
1891 	Given an arbitrary vnode, the function checks, whether the node is covered
1892 	by the root of a volume. If it is the function obtains a reference to the
1893 	volume root node and returns it.
1894 
1895 	\param vnode The vnode in question.
1896 	\return The volume root vnode the vnode cover is covered by, if it is
1897 			indeed a mount point, or \c NULL otherwise.
1898 */
1899 static struct vnode*
1900 resolve_mount_point_to_volume_root(struct vnode* vnode)
1901 {
1902 	if (!vnode)
1903 		return NULL;
1904 
1905 	struct vnode* volumeRoot = NULL;
1906 
1907 	rw_lock_read_lock(&sVnodeLock);
1908 
1909 	if (vnode->covered_by) {
1910 		volumeRoot = vnode->covered_by;
1911 		inc_vnode_ref_count(volumeRoot);
1912 	}
1913 
1914 	rw_lock_read_unlock(&sVnodeLock);
1915 
1916 	return volumeRoot;
1917 }
1918 
1919 
1920 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1921 		   by.
1922 
1923 	Given an arbitrary vnode (identified by mount and node ID), the function
1924 	checks, whether the node is covered by the root of a volume. If it is the
1925 	function returns the mount and node ID of the volume root node. Otherwise
1926 	it simply returns the supplied mount and node ID.
1927 
1928 	In case of error (e.g. the supplied node could not be found) the variables
1929 	for storing the resolved mount and node ID remain untouched and an error
1930 	code is returned.
1931 
1932 	\param mountID The mount ID of the vnode in question.
1933 	\param nodeID The node ID of the vnode in question.
1934 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1935 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1936 	\return
1937 	- \c B_OK, if everything went fine,
1938 	- another error code, if something went wrong.
1939 */
1940 status_t
1941 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1942 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1943 {
1944 	// get the node
1945 	struct vnode* node;
1946 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1947 	if (error != B_OK)
1948 		return error;
1949 
1950 	// resolve the node
1951 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
1952 	if (resolvedNode) {
1953 		put_vnode(node);
1954 		node = resolvedNode;
1955 	}
1956 
1957 	// set the return values
1958 	*resolvedMountID = node->device;
1959 	*resolvedNodeID = node->id;
1960 
1961 	put_vnode(node);
1962 
1963 	return B_OK;
1964 }
1965 
1966 
1967 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1968 
1969 	Given an arbitrary vnode, the function checks, whether the node is the
1970 	root of a volume. If it is (and if it is not "/"), the function obtains
1971 	a reference to the underlying mount point node and returns it.
1972 
1973 	\param vnode The vnode in question (caller must have a reference).
1974 	\return The mount point vnode the vnode covers, if it is indeed a volume
1975 			root and not "/", or \c NULL otherwise.
1976 */
1977 static struct vnode*
1978 resolve_volume_root_to_mount_point(struct vnode* vnode)
1979 {
1980 	if (!vnode)
1981 		return NULL;
1982 
1983 	struct vnode* mountPoint = NULL;
1984 
1985 	struct fs_mount* mount = vnode->mount;
1986 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1987 		mountPoint = mount->covers_vnode;
1988 		inc_vnode_ref_count(mountPoint);
1989 	}
1990 
1991 	return mountPoint;
1992 }
1993 
1994 
1995 /*!	\brief Gets the directory path and leaf name for a given path.
1996 
1997 	The supplied \a path is transformed to refer to the directory part of
1998 	the entry identified by the original path, and into the buffer \a filename
1999 	the leaf name of the original entry is written.
2000 	Neither the returned path nor the leaf name can be expected to be
2001 	canonical.
2002 
2003 	\param path The path to be analyzed. Must be able to store at least one
2004 		   additional character.
2005 	\param filename The buffer into which the leaf name will be written.
2006 		   Must be of size B_FILE_NAME_LENGTH at least.
2007 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2008 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2009 		   if the given path name is empty.
2010 */
2011 static status_t
2012 get_dir_path_and_leaf(char* path, char* filename)
2013 {
2014 	if (*path == '\0')
2015 		return B_ENTRY_NOT_FOUND;
2016 
2017 	char* last = strrchr(path, '/');
2018 		// '/' are not allowed in file names!
2019 
2020 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2021 
2022 	if (last == NULL) {
2023 		// this path is single segment with no '/' in it
2024 		// ex. "foo"
2025 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2026 			return B_NAME_TOO_LONG;
2027 
2028 		strcpy(path, ".");
2029 	} else {
2030 		last++;
2031 		if (last[0] == '\0') {
2032 			// special case: the path ends in one or more '/' - remove them
2033 			while (*--last == '/' && last != path);
2034 			last[1] = '\0';
2035 
2036 			if (last == path && last[0] == '/') {
2037 				// This path points to the root of the file system
2038 				strcpy(filename, ".");
2039 				return B_OK;
2040 			}
2041 			for (; last != path && *(last - 1) != '/'; last--);
2042 				// rewind to the start of the leaf before the '/'
2043 		}
2044 
2045 		// normal leaf: replace the leaf portion of the path with a '.'
2046 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2047 			return B_NAME_TOO_LONG;
2048 
2049 		last[0] = '.';
2050 		last[1] = '\0';
2051 	}
2052 	return B_OK;
2053 }
2054 
2055 
2056 static status_t
2057 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2058 	bool traverse, bool kernel, struct vnode** _vnode)
2059 {
2060 	char clonedName[B_FILE_NAME_LENGTH + 1];
2061 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2062 		return B_NAME_TOO_LONG;
2063 
2064 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2065 	struct vnode* directory;
2066 
2067 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2068 	if (status < 0)
2069 		return status;
2070 
2071 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2072 		_vnode, NULL);
2073 }
2074 
2075 
2076 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2077 	and returns the respective vnode.
2078 	On success a reference to the vnode is acquired for the caller.
2079 */
2080 static status_t
2081 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2082 {
2083 	ino_t id;
2084 
2085 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2086 		return get_vnode(dir->device, id, _vnode, true, false);
2087 
2088 	status_t status = FS_CALL(dir, lookup, name, &id);
2089 	if (status != B_OK)
2090 		return status;
2091 
2092 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2093 	// have a reference and just need to look the node up.
2094 	rw_lock_read_lock(&sVnodeLock);
2095 	*_vnode = lookup_vnode(dir->device, id);
2096 	rw_lock_read_unlock(&sVnodeLock);
2097 
2098 	if (*_vnode == NULL) {
2099 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2100 			"0x%Lx)\n", dir->device, id);
2101 		return B_ENTRY_NOT_FOUND;
2102 	}
2103 
2104 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2105 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2106 //		(*_vnode)->mount->id, (*_vnode)->id);
2107 
2108 	return B_OK;
2109 }
2110 
2111 
2112 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2113 	\a path must not be NULL.
2114 	If it returns successfully, \a path contains the name of the last path
2115 	component. This function clobbers the buffer pointed to by \a path only
2116 	if it does contain more than one component.
2117 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2118 	it is successful or not!
2119 */
2120 static status_t
2121 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2122 	int count, struct io_context* ioContext, struct vnode** _vnode,
2123 	ino_t* _parentID)
2124 {
2125 	status_t status = B_OK;
2126 	ino_t lastParentID = vnode->id;
2127 
2128 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2129 
2130 	if (path == NULL) {
2131 		put_vnode(vnode);
2132 		return B_BAD_VALUE;
2133 	}
2134 
2135 	if (*path == '\0') {
2136 		put_vnode(vnode);
2137 		return B_ENTRY_NOT_FOUND;
2138 	}
2139 
2140 	while (true) {
2141 		struct vnode* nextVnode;
2142 		char* nextPath;
2143 
2144 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2145 			path));
2146 
2147 		// done?
2148 		if (path[0] == '\0')
2149 			break;
2150 
2151 		// walk to find the next path component ("path" will point to a single
2152 		// path component), and filter out multiple slashes
2153 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2154 				nextPath++);
2155 
2156 		if (*nextPath == '/') {
2157 			*nextPath = '\0';
2158 			do
2159 				nextPath++;
2160 			while (*nextPath == '/');
2161 		}
2162 
2163 		// See if the '..' is at the root of a mount and move to the covered
2164 		// vnode so we pass the '..' path to the underlying filesystem.
2165 		// Also prevent breaking the root of the IO context.
2166 		if (strcmp("..", path) == 0) {
2167 			if (vnode == ioContext->root) {
2168 				// Attempted prison break! Keep it contained.
2169 				path = nextPath;
2170 				continue;
2171 			} else if (vnode->mount->root_vnode == vnode
2172 				&& vnode->mount->covers_vnode) {
2173 				nextVnode = vnode->mount->covers_vnode;
2174 				inc_vnode_ref_count(nextVnode);
2175 				put_vnode(vnode);
2176 				vnode = nextVnode;
2177 			}
2178 		}
2179 
2180 		// check if vnode is really a directory
2181 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2182 			status = B_NOT_A_DIRECTORY;
2183 
2184 		// Check if we have the right to search the current directory vnode.
2185 		// If a file system doesn't have the access() function, we assume that
2186 		// searching a directory is always allowed
2187 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2188 			status = FS_CALL(vnode, access, X_OK);
2189 
2190 		// Tell the filesystem to get the vnode of this path component (if we
2191 		// got the permission from the call above)
2192 		if (status == B_OK)
2193 			status = lookup_dir_entry(vnode, path, &nextVnode);
2194 
2195 		if (status != B_OK) {
2196 			put_vnode(vnode);
2197 			return status;
2198 		}
2199 
2200 		// If the new node is a symbolic link, resolve it (if we've been told
2201 		// to do it)
2202 		if (S_ISLNK(nextVnode->Type())
2203 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2204 			size_t bufferSize;
2205 			char* buffer;
2206 
2207 			TRACE(("traverse link\n"));
2208 
2209 			// it's not exactly nice style using goto in this way, but hey,
2210 			// it works :-/
2211 			if (count + 1 > B_MAX_SYMLINKS) {
2212 				status = B_LINK_LIMIT;
2213 				goto resolve_link_error;
2214 			}
2215 
2216 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2217 			if (buffer == NULL) {
2218 				status = B_NO_MEMORY;
2219 				goto resolve_link_error;
2220 			}
2221 
2222 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2223 				bufferSize--;
2224 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2225 				// null-terminate
2226 				if (status >= 0)
2227 					buffer[bufferSize] = '\0';
2228 			} else
2229 				status = B_BAD_VALUE;
2230 
2231 			if (status != B_OK) {
2232 				free(buffer);
2233 
2234 		resolve_link_error:
2235 				put_vnode(vnode);
2236 				put_vnode(nextVnode);
2237 
2238 				return status;
2239 			}
2240 			put_vnode(nextVnode);
2241 
2242 			// Check if we start from the root directory or the current
2243 			// directory ("vnode" still points to that one).
2244 			// Cut off all leading slashes if it's the root directory
2245 			path = buffer;
2246 			bool absoluteSymlink = false;
2247 			if (path[0] == '/') {
2248 				// we don't need the old directory anymore
2249 				put_vnode(vnode);
2250 
2251 				while (*++path == '/')
2252 					;
2253 
2254 				mutex_lock(&sIOContextRootLock);
2255 				vnode = ioContext->root;
2256 				inc_vnode_ref_count(vnode);
2257 				mutex_unlock(&sIOContextRootLock);
2258 
2259 				absoluteSymlink = true;
2260 			}
2261 
2262 			inc_vnode_ref_count(vnode);
2263 				// balance the next recursion - we will decrement the
2264 				// ref_count of the vnode, no matter if we succeeded or not
2265 
2266 			if (absoluteSymlink && *path == '\0') {
2267 				// symlink was just "/"
2268 				nextVnode = vnode;
2269 			} else {
2270 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2271 					ioContext, &nextVnode, &lastParentID);
2272 			}
2273 
2274 			free(buffer);
2275 
2276 			if (status != B_OK) {
2277 				put_vnode(vnode);
2278 				return status;
2279 			}
2280 		} else
2281 			lastParentID = vnode->id;
2282 
2283 		// decrease the ref count on the old dir we just looked up into
2284 		put_vnode(vnode);
2285 
2286 		path = nextPath;
2287 		vnode = nextVnode;
2288 
2289 		// see if we hit a mount point
2290 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2291 		if (mountPoint) {
2292 			put_vnode(vnode);
2293 			vnode = mountPoint;
2294 		}
2295 	}
2296 
2297 	*_vnode = vnode;
2298 	if (_parentID)
2299 		*_parentID = lastParentID;
2300 
2301 	return B_OK;
2302 }
2303 
2304 
2305 static status_t
2306 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2307 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2308 {
2309 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2310 		get_current_io_context(kernel), _vnode, _parentID);
2311 }
2312 
2313 
2314 static status_t
2315 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2316 	ino_t* _parentID, bool kernel)
2317 {
2318 	struct vnode* start = NULL;
2319 
2320 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2321 
2322 	if (!path)
2323 		return B_BAD_VALUE;
2324 
2325 	if (*path == '\0')
2326 		return B_ENTRY_NOT_FOUND;
2327 
2328 	// figure out if we need to start at root or at cwd
2329 	if (*path == '/') {
2330 		if (sRoot == NULL) {
2331 			// we're a bit early, aren't we?
2332 			return B_ERROR;
2333 		}
2334 
2335 		while (*++path == '/')
2336 			;
2337 		start = get_root_vnode(kernel);
2338 
2339 		if (*path == '\0') {
2340 			*_vnode = start;
2341 			return B_OK;
2342 		}
2343 
2344 	} else {
2345 		struct io_context* context = get_current_io_context(kernel);
2346 
2347 		mutex_lock(&context->io_mutex);
2348 		start = context->cwd;
2349 		if (start != NULL)
2350 			inc_vnode_ref_count(start);
2351 		mutex_unlock(&context->io_mutex);
2352 
2353 		if (start == NULL)
2354 			return B_ERROR;
2355 	}
2356 
2357 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2358 		_parentID);
2359 }
2360 
2361 
2362 /*! Returns the vnode in the next to last segment of the path, and returns
2363 	the last portion in filename.
2364 	The path buffer must be able to store at least one additional character.
2365 */
2366 static status_t
2367 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2368 	bool kernel)
2369 {
2370 	status_t status = get_dir_path_and_leaf(path, filename);
2371 	if (status != B_OK)
2372 		return status;
2373 
2374 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2375 }
2376 
2377 
2378 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2379 		   to by a FD + path pair.
2380 
2381 	\a path must be given in either case. \a fd might be omitted, in which
2382 	case \a path is either an absolute path or one relative to the current
2383 	directory. If both a supplied and \a path is relative it is reckoned off
2384 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2385 	ignored.
2386 
2387 	The caller has the responsibility to call put_vnode() on the returned
2388 	directory vnode.
2389 
2390 	\param fd The FD. May be < 0.
2391 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2392 	       is modified by this function. It must have at least room for a
2393 	       string one character longer than the path it contains.
2394 	\param _vnode A pointer to a variable the directory vnode shall be written
2395 		   into.
2396 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2397 		   the leaf name of the specified entry will be written.
2398 	\param kernel \c true, if invoked from inside the kernel, \c false if
2399 		   invoked from userland.
2400 	\return \c B_OK, if everything went fine, another error code otherwise.
2401 */
2402 static status_t
2403 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2404 	char* filename, bool kernel)
2405 {
2406 	if (!path)
2407 		return B_BAD_VALUE;
2408 	if (*path == '\0')
2409 		return B_ENTRY_NOT_FOUND;
2410 	if (fd < 0)
2411 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2412 
2413 	status_t status = get_dir_path_and_leaf(path, filename);
2414 	if (status != B_OK)
2415 		return status;
2416 
2417 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2418 }
2419 
2420 
2421 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2422 		   to by a vnode + path pair.
2423 
2424 	\a path must be given in either case. \a vnode might be omitted, in which
2425 	case \a path is either an absolute path or one relative to the current
2426 	directory. If both a supplied and \a path is relative it is reckoned off
2427 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2428 	ignored.
2429 
2430 	The caller has the responsibility to call put_vnode() on the returned
2431 	directory vnode.
2432 
2433 	\param vnode The vnode. May be \c NULL.
2434 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2435 	       is modified by this function. It must have at least room for a
2436 	       string one character longer than the path it contains.
2437 	\param _vnode A pointer to a variable the directory vnode shall be written
2438 		   into.
2439 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2440 		   the leaf name of the specified entry will be written.
2441 	\param kernel \c true, if invoked from inside the kernel, \c false if
2442 		   invoked from userland.
2443 	\return \c B_OK, if everything went fine, another error code otherwise.
2444 */
2445 static status_t
2446 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2447 	struct vnode** _vnode, char* filename, bool kernel)
2448 {
2449 	if (!path)
2450 		return B_BAD_VALUE;
2451 	if (*path == '\0')
2452 		return B_ENTRY_NOT_FOUND;
2453 	if (vnode == NULL || path[0] == '/')
2454 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2455 
2456 	status_t status = get_dir_path_and_leaf(path, filename);
2457 	if (status != B_OK)
2458 		return status;
2459 
2460 	inc_vnode_ref_count(vnode);
2461 		// vnode_path_to_vnode() always decrements the ref count
2462 
2463 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2464 }
2465 
2466 
2467 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2468 */
2469 static status_t
2470 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2471 	size_t bufferSize, struct io_context* ioContext)
2472 {
2473 	if (bufferSize < sizeof(struct dirent))
2474 		return B_BAD_VALUE;
2475 
2476 	// See if vnode is the root of a mount and move to the covered
2477 	// vnode so we get the underlying file system
2478 	VNodePutter vnodePutter;
2479 	if (vnode->mount->root_vnode == vnode
2480 		&& vnode->mount->covers_vnode != NULL) {
2481 		vnode = vnode->mount->covers_vnode;
2482 		inc_vnode_ref_count(vnode);
2483 		vnodePutter.SetTo(vnode);
2484 	}
2485 
2486 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2487 		// The FS supports getting the name of a vnode.
2488 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2489 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2490 			return B_OK;
2491 	}
2492 
2493 	// The FS doesn't support getting the name of a vnode. So we search the
2494 	// parent directory for the vnode, if the caller let us.
2495 
2496 	if (parent == NULL)
2497 		return EOPNOTSUPP;
2498 
2499 	void* cookie;
2500 
2501 	status_t status = FS_CALL(parent, open_dir, &cookie);
2502 	if (status >= B_OK) {
2503 		while (true) {
2504 			uint32 num = 1;
2505 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2506 				&num);
2507 			if (status != B_OK)
2508 				break;
2509 			if (num == 0) {
2510 				status = B_ENTRY_NOT_FOUND;
2511 				break;
2512 			}
2513 
2514 			if (vnode->id == buffer->d_ino) {
2515 				// found correct entry!
2516 				break;
2517 			}
2518 		}
2519 
2520 		FS_CALL(vnode, close_dir, cookie);
2521 		FS_CALL(vnode, free_dir_cookie, cookie);
2522 	}
2523 	return status;
2524 }
2525 
2526 
2527 static status_t
2528 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2529 	size_t nameSize, bool kernel)
2530 {
2531 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2532 	struct dirent* dirent = (struct dirent*)buffer;
2533 
2534 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2535 		get_current_io_context(kernel));
2536 	if (status != B_OK)
2537 		return status;
2538 
2539 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2540 		return B_BUFFER_OVERFLOW;
2541 
2542 	return B_OK;
2543 }
2544 
2545 
2546 /*!	Gets the full path to a given directory vnode.
2547 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2548 	file system doesn't support this call, it will fall back to iterating
2549 	through the parent directory to get the name of the child.
2550 
2551 	To protect against circular loops, it supports a maximum tree depth
2552 	of 256 levels.
2553 
2554 	Note that the path may not be correct the time this function returns!
2555 	It doesn't use any locking to prevent returning the correct path, as
2556 	paths aren't safe anyway: the path to a file can change at any time.
2557 
2558 	It might be a good idea, though, to check if the returned path exists
2559 	in the calling function (it's not done here because of efficiency)
2560 */
2561 static status_t
2562 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2563 	bool kernel)
2564 {
2565 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2566 
2567 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2568 		return B_BAD_VALUE;
2569 
2570 	if (!S_ISDIR(vnode->Type()))
2571 		return B_NOT_A_DIRECTORY;
2572 
2573 	char* path = buffer;
2574 	int32 insert = bufferSize;
2575 	int32 maxLevel = 256;
2576 	int32 length;
2577 	status_t status;
2578 	struct io_context* ioContext = get_current_io_context(kernel);
2579 
2580 	// we don't use get_vnode() here because this call is more
2581 	// efficient and does all we need from get_vnode()
2582 	inc_vnode_ref_count(vnode);
2583 
2584 	if (vnode != ioContext->root) {
2585 		// we don't hit the IO context root
2586 		// resolve a volume root to its mount point
2587 		struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2588 		if (mountPoint) {
2589 			put_vnode(vnode);
2590 			vnode = mountPoint;
2591 		}
2592 	}
2593 
2594 	path[--insert] = '\0';
2595 		// the path is filled right to left
2596 
2597 	while (true) {
2598 		// the name buffer is also used for fs_read_dir()
2599 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2600 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2601 		struct vnode* parentVnode;
2602 		ino_t parentID;
2603 
2604 		// lookup the parent vnode
2605 		if (vnode == ioContext->root) {
2606 			// we hit the IO context root
2607 			parentVnode = vnode;
2608 			inc_vnode_ref_count(vnode);
2609 		} else {
2610 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2611 			if (status != B_OK)
2612 				goto out;
2613 		}
2614 
2615 		// get the node's name
2616 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2617 			sizeof(nameBuffer), ioContext);
2618 
2619 		if (vnode != ioContext->root) {
2620 			// we don't hit the IO context root
2621 			// resolve a volume root to its mount point
2622 			struct vnode* mountPoint
2623 				= resolve_volume_root_to_mount_point(parentVnode);
2624 			if (mountPoint) {
2625 				put_vnode(parentVnode);
2626 				parentVnode = mountPoint;
2627 				parentID = parentVnode->id;
2628 			}
2629 		}
2630 
2631 		bool hitRoot = (parentVnode == vnode);
2632 
2633 		// release the current vnode, we only need its parent from now on
2634 		put_vnode(vnode);
2635 		vnode = parentVnode;
2636 
2637 		if (status != B_OK)
2638 			goto out;
2639 
2640 		if (hitRoot) {
2641 			// we have reached "/", which means we have constructed the full
2642 			// path
2643 			break;
2644 		}
2645 
2646 		// TODO: add an explicit check for loops in about 10 levels to do
2647 		// real loop detection
2648 
2649 		// don't go deeper as 'maxLevel' to prevent circular loops
2650 		if (maxLevel-- < 0) {
2651 			status = B_LINK_LIMIT;
2652 			goto out;
2653 		}
2654 
2655 		// add the name in front of the current path
2656 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2657 		length = strlen(name);
2658 		insert -= length;
2659 		if (insert <= 0) {
2660 			status = B_RESULT_NOT_REPRESENTABLE;
2661 			goto out;
2662 		}
2663 		memcpy(path + insert, name, length);
2664 		path[--insert] = '/';
2665 	}
2666 
2667 	// the root dir will result in an empty path: fix it
2668 	if (path[insert] == '\0')
2669 		path[--insert] = '/';
2670 
2671 	TRACE(("  path is: %s\n", path + insert));
2672 
2673 	// move the path to the start of the buffer
2674 	length = bufferSize - insert;
2675 	memmove(buffer, path + insert, length);
2676 
2677 out:
2678 	put_vnode(vnode);
2679 	return status;
2680 }
2681 
2682 
2683 /*!	Checks the length of every path component, and adds a '.'
2684 	if the path ends in a slash.
2685 	The given path buffer must be able to store at least one
2686 	additional character.
2687 */
2688 static status_t
2689 check_path(char* to)
2690 {
2691 	int32 length = 0;
2692 
2693 	// check length of every path component
2694 
2695 	while (*to) {
2696 		char* begin;
2697 		if (*to == '/')
2698 			to++, length++;
2699 
2700 		begin = to;
2701 		while (*to != '/' && *to)
2702 			to++, length++;
2703 
2704 		if (to - begin > B_FILE_NAME_LENGTH)
2705 			return B_NAME_TOO_LONG;
2706 	}
2707 
2708 	if (length == 0)
2709 		return B_ENTRY_NOT_FOUND;
2710 
2711 	// complete path if there is a slash at the end
2712 
2713 	if (*(to - 1) == '/') {
2714 		if (length > B_PATH_NAME_LENGTH - 2)
2715 			return B_NAME_TOO_LONG;
2716 
2717 		to[0] = '.';
2718 		to[1] = '\0';
2719 	}
2720 
2721 	return B_OK;
2722 }
2723 
2724 
2725 static struct file_descriptor*
2726 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2727 {
2728 	struct file_descriptor* descriptor
2729 		= get_fd(get_current_io_context(kernel), fd);
2730 	if (descriptor == NULL)
2731 		return NULL;
2732 
2733 	struct vnode* vnode = fd_vnode(descriptor);
2734 	if (vnode == NULL) {
2735 		put_fd(descriptor);
2736 		return NULL;
2737 	}
2738 
2739 	// ToDo: when we can close a file descriptor at any point, investigate
2740 	//	if this is still valid to do (accessing the vnode without ref_count
2741 	//	or locking)
2742 	*_vnode = vnode;
2743 	return descriptor;
2744 }
2745 
2746 
2747 static struct vnode*
2748 get_vnode_from_fd(int fd, bool kernel)
2749 {
2750 	struct file_descriptor* descriptor;
2751 	struct vnode* vnode;
2752 
2753 	descriptor = get_fd(get_current_io_context(kernel), fd);
2754 	if (descriptor == NULL)
2755 		return NULL;
2756 
2757 	vnode = fd_vnode(descriptor);
2758 	if (vnode != NULL)
2759 		inc_vnode_ref_count(vnode);
2760 
2761 	put_fd(descriptor);
2762 	return vnode;
2763 }
2764 
2765 
2766 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2767 	only the path will be considered. In this case, the \a path must not be
2768 	NULL.
2769 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2770 	and should be NULL for files.
2771 */
2772 static status_t
2773 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2774 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2775 {
2776 	if (fd < 0 && !path)
2777 		return B_BAD_VALUE;
2778 
2779 	if (path != NULL && *path == '\0')
2780 		return B_ENTRY_NOT_FOUND;
2781 
2782 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2783 		// no FD or absolute path
2784 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2785 	}
2786 
2787 	// FD only, or FD + relative path
2788 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2789 	if (!vnode)
2790 		return B_FILE_ERROR;
2791 
2792 	if (path != NULL) {
2793 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2794 			_vnode, _parentID);
2795 	}
2796 
2797 	// there is no relative path to take into account
2798 
2799 	*_vnode = vnode;
2800 	if (_parentID)
2801 		*_parentID = -1;
2802 
2803 	return B_OK;
2804 }
2805 
2806 
2807 static int
2808 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2809 	void* cookie, int openMode, bool kernel)
2810 {
2811 	struct file_descriptor* descriptor;
2812 	int fd;
2813 
2814 	// If the vnode is locked, we don't allow creating a new file/directory
2815 	// file_descriptor for it
2816 	if (vnode && vnode->mandatory_locked_by != NULL
2817 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2818 		return B_BUSY;
2819 
2820 	descriptor = alloc_fd();
2821 	if (!descriptor)
2822 		return B_NO_MEMORY;
2823 
2824 	if (vnode)
2825 		descriptor->u.vnode = vnode;
2826 	else
2827 		descriptor->u.mount = mount;
2828 	descriptor->cookie = cookie;
2829 
2830 	switch (type) {
2831 		// vnode types
2832 		case FDTYPE_FILE:
2833 			descriptor->ops = &sFileOps;
2834 			break;
2835 		case FDTYPE_DIR:
2836 			descriptor->ops = &sDirectoryOps;
2837 			break;
2838 		case FDTYPE_ATTR:
2839 			descriptor->ops = &sAttributeOps;
2840 			break;
2841 		case FDTYPE_ATTR_DIR:
2842 			descriptor->ops = &sAttributeDirectoryOps;
2843 			break;
2844 
2845 		// mount types
2846 		case FDTYPE_INDEX_DIR:
2847 			descriptor->ops = &sIndexDirectoryOps;
2848 			break;
2849 		case FDTYPE_QUERY:
2850 			descriptor->ops = &sQueryOps;
2851 			break;
2852 
2853 		default:
2854 			panic("get_new_fd() called with unknown type %d\n", type);
2855 			break;
2856 	}
2857 	descriptor->type = type;
2858 	descriptor->open_mode = openMode;
2859 
2860 	io_context* context = get_current_io_context(kernel);
2861 	fd = new_fd(context, descriptor);
2862 	if (fd < 0) {
2863 		free(descriptor);
2864 		return B_NO_MORE_FDS;
2865 	}
2866 
2867 	mutex_lock(&context->io_mutex);
2868 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2869 	mutex_unlock(&context->io_mutex);
2870 
2871 	return fd;
2872 }
2873 
2874 
2875 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2876 	vfs_normalize_path(). See there for more documentation.
2877 */
2878 static status_t
2879 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2880 {
2881 	VNodePutter dirPutter;
2882 	struct vnode* dir = NULL;
2883 	status_t error;
2884 
2885 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2886 		// get dir vnode + leaf name
2887 		struct vnode* nextDir;
2888 		char leaf[B_FILE_NAME_LENGTH];
2889 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2890 		if (error != B_OK)
2891 			return error;
2892 
2893 		dir = nextDir;
2894 		strcpy(path, leaf);
2895 		dirPutter.SetTo(dir);
2896 
2897 		// get file vnode, if we shall resolve links
2898 		bool fileExists = false;
2899 		struct vnode* fileVnode;
2900 		VNodePutter fileVnodePutter;
2901 		if (traverseLink) {
2902 			inc_vnode_ref_count(dir);
2903 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2904 					NULL) == B_OK) {
2905 				fileVnodePutter.SetTo(fileVnode);
2906 				fileExists = true;
2907 			}
2908 		}
2909 
2910 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2911 			// we're done -- construct the path
2912 			bool hasLeaf = true;
2913 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2914 				// special cases "." and ".." -- get the dir, forget the leaf
2915 				inc_vnode_ref_count(dir);
2916 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2917 					&nextDir, NULL);
2918 				if (error != B_OK)
2919 					return error;
2920 				dir = nextDir;
2921 				dirPutter.SetTo(dir);
2922 				hasLeaf = false;
2923 			}
2924 
2925 			// get the directory path
2926 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2927 			if (error != B_OK)
2928 				return error;
2929 
2930 			// append the leaf name
2931 			if (hasLeaf) {
2932 				// insert a directory separator if this is not the file system
2933 				// root
2934 				if ((strcmp(path, "/") != 0
2935 					&& strlcat(path, "/", pathSize) >= pathSize)
2936 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2937 					return B_NAME_TOO_LONG;
2938 				}
2939 			}
2940 
2941 			return B_OK;
2942 		}
2943 
2944 		// read link
2945 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2946 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2947 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2948 			if (error != B_OK)
2949 				return error;
2950 			path[bufferSize] = '\0';
2951 		} else
2952 			return B_BAD_VALUE;
2953 	}
2954 
2955 	return B_LINK_LIMIT;
2956 }
2957 
2958 
2959 #ifdef ADD_DEBUGGER_COMMANDS
2960 
2961 
2962 static void
2963 _dump_advisory_locking(advisory_locking* locking)
2964 {
2965 	if (locking == NULL)
2966 		return;
2967 
2968 	kprintf("   lock:        %ld", locking->lock);
2969 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2970 
2971 	int32 index = 0;
2972 	LockList::Iterator iterator = locking->locks.GetIterator();
2973 	while (iterator.HasNext()) {
2974 		struct advisory_lock* lock = iterator.Next();
2975 
2976 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2977 		kprintf("        start:  %Ld\n", lock->start);
2978 		kprintf("        end:    %Ld\n", lock->end);
2979 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2980 	}
2981 }
2982 
2983 
2984 static void
2985 _dump_mount(struct fs_mount* mount)
2986 {
2987 	kprintf("MOUNT: %p\n", mount);
2988 	kprintf(" id:            %ld\n", mount->id);
2989 	kprintf(" device_name:   %s\n", mount->device_name);
2990 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2991 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2992 	kprintf(" partition:     %p\n", mount->partition);
2993 	kprintf(" lock:          %p\n", &mount->rlock);
2994 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2995 		mount->owns_file_device ? " owns_file_device" : "");
2996 
2997 	fs_volume* volume = mount->volume;
2998 	while (volume != NULL) {
2999 		kprintf(" volume %p:\n", volume);
3000 		kprintf("  layer:            %ld\n", volume->layer);
3001 		kprintf("  private_volume:   %p\n", volume->private_volume);
3002 		kprintf("  ops:              %p\n", volume->ops);
3003 		kprintf("  file_system:      %p\n", volume->file_system);
3004 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3005 		volume = volume->super_volume;
3006 	}
3007 
3008 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3009 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3010 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
3011 	set_debug_variable("_partition", (addr_t)mount->partition);
3012 }
3013 
3014 
3015 static bool
3016 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3017 	const char* name)
3018 {
3019 	bool insertSlash = buffer[bufferSize] != '\0';
3020 	size_t nameLength = strlen(name);
3021 
3022 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3023 		return false;
3024 
3025 	if (insertSlash)
3026 		buffer[--bufferSize] = '/';
3027 
3028 	bufferSize -= nameLength;
3029 	memcpy(buffer + bufferSize, name, nameLength);
3030 
3031 	return true;
3032 }
3033 
3034 
3035 static bool
3036 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3037 	ino_t nodeID)
3038 {
3039 	if (bufferSize == 0)
3040 		return false;
3041 
3042 	bool insertSlash = buffer[bufferSize] != '\0';
3043 	if (insertSlash)
3044 		buffer[--bufferSize] = '/';
3045 
3046 	size_t size = snprintf(buffer, bufferSize,
3047 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3048 	if (size > bufferSize) {
3049 		if (insertSlash)
3050 			bufferSize++;
3051 		return false;
3052 	}
3053 
3054 	if (size < bufferSize)
3055 		memmove(buffer + bufferSize - size, buffer, size);
3056 
3057 	bufferSize -= size;
3058 	return true;
3059 }
3060 
3061 
3062 static char*
3063 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3064 	bool& _truncated)
3065 {
3066 	// null-terminate the path
3067 	buffer[--bufferSize] = '\0';
3068 
3069 	while (true) {
3070 		while (vnode->mount->root_vnode == vnode
3071 				&& vnode->mount->covers_vnode != NULL) {
3072 			vnode = vnode->mount->covers_vnode;
3073 		}
3074 
3075 		if (vnode == sRoot) {
3076 			_truncated = bufferSize == 0;
3077 			if (!_truncated)
3078 				buffer[--bufferSize] = '/';
3079 			return buffer + bufferSize;
3080 		}
3081 
3082 		// resolve the name
3083 		ino_t dirID;
3084 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3085 			vnode->id, dirID);
3086 		if (name == NULL) {
3087 			// Failed to resolve the name -- prepend "<dev,node>/".
3088 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3089 				vnode->mount->id, vnode->id);
3090 			return buffer + bufferSize;
3091 		}
3092 
3093 		// prepend the name
3094 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3095 			_truncated = true;
3096 			return buffer + bufferSize;
3097 		}
3098 
3099 		// resolve the directory node
3100 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3101 		if (nextVnode == NULL) {
3102 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3103 				vnode->mount->id, dirID);
3104 			return buffer + bufferSize;
3105 		}
3106 
3107 		vnode = nextVnode;
3108 	}
3109 }
3110 
3111 
3112 static void
3113 _dump_vnode(struct vnode* vnode, bool printPath)
3114 {
3115 	kprintf("VNODE: %p\n", vnode);
3116 	kprintf(" device:        %ld\n", vnode->device);
3117 	kprintf(" id:            %Ld\n", vnode->id);
3118 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3119 	kprintf(" private_node:  %p\n", vnode->private_node);
3120 	kprintf(" mount:         %p\n", vnode->mount);
3121 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3122 	kprintf(" cache:         %p\n", vnode->cache);
3123 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3124 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3125 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3126 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3127 
3128 	_dump_advisory_locking(vnode->advisory_locking);
3129 
3130 	if (printPath) {
3131 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3132 		if (buffer != NULL) {
3133 			bool truncated;
3134 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3135 				B_PATH_NAME_LENGTH, truncated);
3136 			if (path != NULL) {
3137 				kprintf(" path:          ");
3138 				if (truncated)
3139 					kputs("<truncated>/");
3140 				kputs(path);
3141 				kputs("\n");
3142 			} else
3143 				kprintf("Failed to resolve vnode path.\n");
3144 
3145 			debug_free(buffer);
3146 		} else
3147 			kprintf("Failed to allocate memory for constructing the path.\n");
3148 	}
3149 
3150 	set_debug_variable("_node", (addr_t)vnode->private_node);
3151 	set_debug_variable("_mount", (addr_t)vnode->mount);
3152 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3153 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3154 }
3155 
3156 
3157 static int
3158 dump_mount(int argc, char** argv)
3159 {
3160 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3161 		kprintf("usage: %s [id|address]\n", argv[0]);
3162 		return 0;
3163 	}
3164 
3165 	uint32 id = parse_expression(argv[1]);
3166 	struct fs_mount* mount = NULL;
3167 
3168 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3169 	if (mount == NULL) {
3170 		if (IS_USER_ADDRESS(id)) {
3171 			kprintf("fs_mount not found\n");
3172 			return 0;
3173 		}
3174 		mount = (fs_mount*)id;
3175 	}
3176 
3177 	_dump_mount(mount);
3178 	return 0;
3179 }
3180 
3181 
3182 static int
3183 dump_mounts(int argc, char** argv)
3184 {
3185 	if (argc != 1) {
3186 		kprintf("usage: %s\n", argv[0]);
3187 		return 0;
3188 	}
3189 
3190 	kprintf("address     id root       covers     cookie     fs_name\n");
3191 
3192 	struct hash_iterator iterator;
3193 	struct fs_mount* mount;
3194 
3195 	hash_open(sMountsTable, &iterator);
3196 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3197 			!= NULL) {
3198 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3199 			mount->covers_vnode, mount->volume->private_volume,
3200 			mount->volume->file_system_name);
3201 
3202 		fs_volume* volume = mount->volume;
3203 		while (volume->super_volume != NULL) {
3204 			volume = volume->super_volume;
3205 			kprintf("                                     %p %s\n",
3206 				volume->private_volume, volume->file_system_name);
3207 		}
3208 	}
3209 
3210 	hash_close(sMountsTable, &iterator, false);
3211 	return 0;
3212 }
3213 
3214 
3215 static int
3216 dump_vnode(int argc, char** argv)
3217 {
3218 	bool printPath = false;
3219 	int argi = 1;
3220 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3221 		printPath = true;
3222 		argi++;
3223 	}
3224 
3225 	if (argi >= argc || argi + 2 < argc) {
3226 		print_debugger_command_usage(argv[0]);
3227 		return 0;
3228 	}
3229 
3230 	struct vnode* vnode = NULL;
3231 
3232 	if (argi + 1 == argc) {
3233 		vnode = (struct vnode*)parse_expression(argv[argi]);
3234 		if (IS_USER_ADDRESS(vnode)) {
3235 			kprintf("invalid vnode address\n");
3236 			return 0;
3237 		}
3238 		_dump_vnode(vnode, printPath);
3239 		return 0;
3240 	}
3241 
3242 	struct hash_iterator iterator;
3243 	dev_t device = parse_expression(argv[argi]);
3244 	ino_t id = parse_expression(argv[argi + 1]);
3245 
3246 	hash_open(sVnodeTable, &iterator);
3247 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3248 		if (vnode->id != id || vnode->device != device)
3249 			continue;
3250 
3251 		_dump_vnode(vnode, printPath);
3252 	}
3253 
3254 	hash_close(sVnodeTable, &iterator, false);
3255 	return 0;
3256 }
3257 
3258 
3259 static int
3260 dump_vnodes(int argc, char** argv)
3261 {
3262 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3263 		kprintf("usage: %s [device]\n", argv[0]);
3264 		return 0;
3265 	}
3266 
3267 	// restrict dumped nodes to a certain device if requested
3268 	dev_t device = parse_expression(argv[1]);
3269 
3270 	struct hash_iterator iterator;
3271 	struct vnode* vnode;
3272 
3273 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3274 		"flags\n");
3275 
3276 	hash_open(sVnodeTable, &iterator);
3277 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3278 		if (vnode->device != device)
3279 			continue;
3280 
3281 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3282 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3283 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3284 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3285 	}
3286 
3287 	hash_close(sVnodeTable, &iterator, false);
3288 	return 0;
3289 }
3290 
3291 
3292 static int
3293 dump_vnode_caches(int argc, char** argv)
3294 {
3295 	struct hash_iterator iterator;
3296 	struct vnode* vnode;
3297 
3298 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3299 		kprintf("usage: %s [device]\n", argv[0]);
3300 		return 0;
3301 	}
3302 
3303 	// restrict dumped nodes to a certain device if requested
3304 	dev_t device = -1;
3305 	if (argc > 1)
3306 		device = parse_expression(argv[1]);
3307 
3308 	kprintf("address    dev     inode cache          size   pages\n");
3309 
3310 	hash_open(sVnodeTable, &iterator);
3311 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3312 		if (vnode->cache == NULL)
3313 			continue;
3314 		if (device != -1 && vnode->device != device)
3315 			continue;
3316 
3317 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3318 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3319 				/ B_PAGE_SIZE, vnode->cache->page_count);
3320 	}
3321 
3322 	hash_close(sVnodeTable, &iterator, false);
3323 	return 0;
3324 }
3325 
3326 
3327 int
3328 dump_io_context(int argc, char** argv)
3329 {
3330 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3331 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3332 		return 0;
3333 	}
3334 
3335 	struct io_context* context = NULL;
3336 
3337 	if (argc > 1) {
3338 		uint32 num = parse_expression(argv[1]);
3339 		if (IS_KERNEL_ADDRESS(num))
3340 			context = (struct io_context*)num;
3341 		else {
3342 			struct team* team = team_get_team_struct_locked(num);
3343 			if (team == NULL) {
3344 				kprintf("could not find team with ID %ld\n", num);
3345 				return 0;
3346 			}
3347 			context = (struct io_context*)team->io_context;
3348 		}
3349 	} else
3350 		context = get_current_io_context(true);
3351 
3352 	kprintf("I/O CONTEXT: %p\n", context);
3353 	kprintf(" root vnode:\t%p\n", context->root);
3354 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3355 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3356 	kprintf(" max fds:\t%lu\n", context->table_size);
3357 
3358 	if (context->num_used_fds)
3359 		kprintf("   no.  type         ops  ref  open  mode         pos"
3360 			"      cookie\n");
3361 
3362 	for (uint32 i = 0; i < context->table_size; i++) {
3363 		struct file_descriptor* fd = context->fds[i];
3364 		if (fd == NULL)
3365 			continue;
3366 
3367 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3368 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3369 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3370 			fd->pos, fd->cookie,
3371 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3372 				? "mount" : "vnode",
3373 			fd->u.vnode);
3374 	}
3375 
3376 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3377 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3378 
3379 	set_debug_variable("_cwd", (addr_t)context->cwd);
3380 
3381 	return 0;
3382 }
3383 
3384 
3385 int
3386 dump_vnode_usage(int argc, char** argv)
3387 {
3388 	if (argc != 1) {
3389 		kprintf("usage: %s\n", argv[0]);
3390 		return 0;
3391 	}
3392 
3393 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3394 		kMaxUnusedVnodes);
3395 
3396 	struct hash_iterator iterator;
3397 	hash_open(sVnodeTable, &iterator);
3398 
3399 	uint32 count = 0;
3400 	struct vnode* vnode;
3401 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3402 		count++;
3403 	}
3404 
3405 	hash_close(sVnodeTable, &iterator, false);
3406 
3407 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3408 	return 0;
3409 }
3410 
3411 #endif	// ADD_DEBUGGER_COMMANDS
3412 
3413 /*!	Clears an iovec array of physical pages.
3414 	Returns in \a _bytes the number of bytes successfully cleared.
3415 */
3416 static status_t
3417 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3418 {
3419 	size_t bytes = *_bytes;
3420 	size_t index = 0;
3421 
3422 	while (bytes > 0) {
3423 		size_t length = min_c(vecs[index].iov_len, bytes);
3424 
3425 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3426 			length);
3427 		if (status != B_OK) {
3428 			*_bytes -= bytes;
3429 			return status;
3430 		}
3431 
3432 		bytes -= length;
3433 	}
3434 
3435 	return B_OK;
3436 }
3437 
3438 
3439 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3440 	and calls the file system hooks to read/write the request to disk.
3441 */
3442 static status_t
3443 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3444 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3445 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3446 	bool doWrite)
3447 {
3448 	if (fileVecCount == 0) {
3449 		// There are no file vecs at this offset, so we're obviously trying
3450 		// to access the file outside of its bounds
3451 		return B_BAD_VALUE;
3452 	}
3453 
3454 	size_t numBytes = *_numBytes;
3455 	uint32 fileVecIndex;
3456 	size_t vecOffset = *_vecOffset;
3457 	uint32 vecIndex = *_vecIndex;
3458 	status_t status;
3459 	size_t size;
3460 
3461 	if (!doWrite && vecOffset == 0) {
3462 		// now directly read the data from the device
3463 		// the first file_io_vec can be read directly
3464 
3465 		if (fileVecs[0].length < numBytes)
3466 			size = fileVecs[0].length;
3467 		else
3468 			size = numBytes;
3469 
3470 		if (fileVecs[0].offset >= 0) {
3471 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3472 				&vecs[vecIndex], vecCount - vecIndex, &size);
3473 		} else {
3474 			// sparse read
3475 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3476 		}
3477 		if (status != B_OK)
3478 			return status;
3479 
3480 		// TODO: this is a work-around for buggy device drivers!
3481 		//	When our own drivers honour the length, we can:
3482 		//	a) also use this direct I/O for writes (otherwise, it would
3483 		//	   overwrite precious data)
3484 		//	b) panic if the term below is true (at least for writes)
3485 		if (size > fileVecs[0].length) {
3486 			//dprintf("warning: device driver %p doesn't respect total length "
3487 			//	"in read_pages() call!\n", ref->device);
3488 			size = fileVecs[0].length;
3489 		}
3490 
3491 		ASSERT(size <= fileVecs[0].length);
3492 
3493 		// If the file portion was contiguous, we're already done now
3494 		if (size == numBytes)
3495 			return B_OK;
3496 
3497 		// if we reached the end of the file, we can return as well
3498 		if (size != fileVecs[0].length) {
3499 			*_numBytes = size;
3500 			return B_OK;
3501 		}
3502 
3503 		fileVecIndex = 1;
3504 
3505 		// first, find out where we have to continue in our iovecs
3506 		for (; vecIndex < vecCount; vecIndex++) {
3507 			if (size < vecs[vecIndex].iov_len)
3508 				break;
3509 
3510 			size -= vecs[vecIndex].iov_len;
3511 		}
3512 
3513 		vecOffset = size;
3514 	} else {
3515 		fileVecIndex = 0;
3516 		size = 0;
3517 	}
3518 
3519 	// Too bad, let's process the rest of the file_io_vecs
3520 
3521 	size_t totalSize = size;
3522 	size_t bytesLeft = numBytes - size;
3523 
3524 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3525 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3526 		off_t fileOffset = fileVec.offset;
3527 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3528 
3529 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3530 
3531 		// process the complete fileVec
3532 		while (fileLeft > 0) {
3533 			iovec tempVecs[MAX_TEMP_IO_VECS];
3534 			uint32 tempCount = 0;
3535 
3536 			// size tracks how much of what is left of the current fileVec
3537 			// (fileLeft) has been assigned to tempVecs
3538 			size = 0;
3539 
3540 			// assign what is left of the current fileVec to the tempVecs
3541 			for (size = 0; size < fileLeft && vecIndex < vecCount
3542 					&& tempCount < MAX_TEMP_IO_VECS;) {
3543 				// try to satisfy one iovec per iteration (or as much as
3544 				// possible)
3545 
3546 				// bytes left of the current iovec
3547 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3548 				if (vecLeft == 0) {
3549 					vecOffset = 0;
3550 					vecIndex++;
3551 					continue;
3552 				}
3553 
3554 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3555 					vecIndex, vecOffset, size));
3556 
3557 				// actually available bytes
3558 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3559 
3560 				tempVecs[tempCount].iov_base
3561 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3562 				tempVecs[tempCount].iov_len = tempVecSize;
3563 				tempCount++;
3564 
3565 				size += tempVecSize;
3566 				vecOffset += tempVecSize;
3567 			}
3568 
3569 			size_t bytes = size;
3570 
3571 			if (fileOffset == -1) {
3572 				if (doWrite) {
3573 					panic("sparse write attempt: vnode %p", vnode);
3574 					status = B_IO_ERROR;
3575 				} else {
3576 					// sparse read
3577 					status = zero_pages(tempVecs, tempCount, &bytes);
3578 				}
3579 			} else if (doWrite) {
3580 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3581 					tempVecs, tempCount, &bytes);
3582 			} else {
3583 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3584 					tempVecs, tempCount, &bytes);
3585 			}
3586 			if (status != B_OK)
3587 				return status;
3588 
3589 			totalSize += bytes;
3590 			bytesLeft -= size;
3591 			if (fileOffset >= 0)
3592 				fileOffset += size;
3593 			fileLeft -= size;
3594 			//dprintf("-> file left = %Lu\n", fileLeft);
3595 
3596 			if (size != bytes || vecIndex >= vecCount) {
3597 				// there are no more bytes or iovecs, let's bail out
3598 				*_numBytes = totalSize;
3599 				return B_OK;
3600 			}
3601 		}
3602 	}
3603 
3604 	*_vecIndex = vecIndex;
3605 	*_vecOffset = vecOffset;
3606 	*_numBytes = totalSize;
3607 	return B_OK;
3608 }
3609 
3610 
3611 //	#pragma mark - public API for file systems
3612 
3613 
3614 extern "C" status_t
3615 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3616 	fs_vnode_ops* ops)
3617 {
3618 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3619 		volume, volume->id, vnodeID, privateNode));
3620 
3621 	if (privateNode == NULL)
3622 		return B_BAD_VALUE;
3623 
3624 	// create the node
3625 	bool nodeCreated;
3626 	struct vnode* vnode;
3627 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3628 		nodeCreated);
3629 	if (status != B_OK)
3630 		return status;
3631 
3632 	WriteLocker nodeLocker(sVnodeLock, true);
3633 		// create_new_vnode_and_lock() has locked for us
3634 
3635 	// file system integrity check:
3636 	// test if the vnode already exists and bail out if this is the case!
3637 	if (!nodeCreated) {
3638 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3639 			volume->id, vnodeID, privateNode, vnode->private_node);
3640 		return B_ERROR;
3641 	}
3642 
3643 	vnode->private_node = privateNode;
3644 	vnode->ops = ops;
3645 	vnode->SetUnpublished(true);
3646 
3647 	TRACE(("returns: %s\n", strerror(status)));
3648 
3649 	return status;
3650 }
3651 
3652 
3653 extern "C" status_t
3654 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3655 	fs_vnode_ops* ops, int type, uint32 flags)
3656 {
3657 	FUNCTION(("publish_vnode()\n"));
3658 
3659 	WriteLocker locker(sVnodeLock);
3660 
3661 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3662 
3663 	bool nodeCreated = false;
3664 	if (vnode == NULL) {
3665 		if (privateNode == NULL)
3666 			return B_BAD_VALUE;
3667 
3668 		// create the node
3669 		locker.Unlock();
3670 			// create_new_vnode_and_lock() will re-lock for us on success
3671 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3672 			nodeCreated);
3673 		if (status != B_OK)
3674 			return status;
3675 
3676 		locker.SetTo(sVnodeLock, true);
3677 	}
3678 
3679 	if (nodeCreated) {
3680 		vnode->private_node = privateNode;
3681 		vnode->ops = ops;
3682 		vnode->SetUnpublished(true);
3683 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3684 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3685 		// already known, but not published
3686 	} else
3687 		return B_BAD_VALUE;
3688 
3689 	bool publishSpecialSubNode = false;
3690 
3691 	vnode->SetType(type);
3692 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3693 	publishSpecialSubNode = is_special_node_type(type)
3694 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3695 
3696 	status_t status = B_OK;
3697 
3698 	// create sub vnodes, if necessary
3699 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3700 		locker.Unlock();
3701 
3702 		fs_volume* subVolume = volume;
3703 		if (volume->sub_volume != NULL) {
3704 			while (status == B_OK && subVolume->sub_volume != NULL) {
3705 				subVolume = subVolume->sub_volume;
3706 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3707 					vnode);
3708 			}
3709 		}
3710 
3711 		if (status == B_OK && publishSpecialSubNode)
3712 			status = create_special_sub_node(vnode, flags);
3713 
3714 		if (status != B_OK) {
3715 			// error -- clean up the created sub vnodes
3716 			while (subVolume->super_volume != volume) {
3717 				subVolume = subVolume->super_volume;
3718 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3719 			}
3720 		}
3721 
3722 		if (status == B_OK) {
3723 			ReadLocker vnodesReadLocker(sVnodeLock);
3724 			AutoLocker<Vnode> nodeLocker(vnode);
3725 			vnode->SetBusy(false);
3726 			vnode->SetUnpublished(false);
3727 		} else {
3728 			locker.Lock();
3729 			hash_remove(sVnodeTable, vnode);
3730 			remove_vnode_from_mount_list(vnode, vnode->mount);
3731 			free(vnode);
3732 		}
3733 	} else {
3734 		// we still hold the write lock -- mark the node unbusy and published
3735 		vnode->SetBusy(false);
3736 		vnode->SetUnpublished(false);
3737 	}
3738 
3739 	TRACE(("returns: %s\n", strerror(status)));
3740 
3741 	return status;
3742 }
3743 
3744 
3745 extern "C" status_t
3746 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3747 {
3748 	struct vnode* vnode;
3749 
3750 	if (volume == NULL)
3751 		return B_BAD_VALUE;
3752 
3753 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3754 	if (status != B_OK)
3755 		return status;
3756 
3757 	// If this is a layered FS, we need to get the node cookie for the requested
3758 	// layer.
3759 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3760 		fs_vnode resolvedNode;
3761 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3762 			&resolvedNode);
3763 		if (status != B_OK) {
3764 			panic("get_vnode(): Failed to get super node for vnode %p, "
3765 				"volume: %p", vnode, volume);
3766 			put_vnode(vnode);
3767 			return status;
3768 		}
3769 
3770 		if (_privateNode != NULL)
3771 			*_privateNode = resolvedNode.private_node;
3772 	} else if (_privateNode != NULL)
3773 		*_privateNode = vnode->private_node;
3774 
3775 	return B_OK;
3776 }
3777 
3778 
3779 extern "C" status_t
3780 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3781 {
3782 	struct vnode* vnode;
3783 
3784 	rw_lock_read_lock(&sVnodeLock);
3785 	vnode = lookup_vnode(volume->id, vnodeID);
3786 	rw_lock_read_unlock(&sVnodeLock);
3787 
3788 	if (vnode == NULL)
3789 		return B_BAD_VALUE;
3790 
3791 	inc_vnode_ref_count(vnode);
3792 	return B_OK;
3793 }
3794 
3795 
3796 extern "C" status_t
3797 put_vnode(fs_volume* volume, ino_t vnodeID)
3798 {
3799 	struct vnode* vnode;
3800 
3801 	rw_lock_read_lock(&sVnodeLock);
3802 	vnode = lookup_vnode(volume->id, vnodeID);
3803 	rw_lock_read_unlock(&sVnodeLock);
3804 
3805 	if (vnode == NULL)
3806 		return B_BAD_VALUE;
3807 
3808 	dec_vnode_ref_count(vnode, false, true);
3809 	return B_OK;
3810 }
3811 
3812 
3813 extern "C" status_t
3814 remove_vnode(fs_volume* volume, ino_t vnodeID)
3815 {
3816 	ReadLocker locker(sVnodeLock);
3817 
3818 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3819 	if (vnode == NULL)
3820 		return B_ENTRY_NOT_FOUND;
3821 
3822 	if (vnode->covered_by != NULL) {
3823 		// this vnode is in use
3824 		return B_BUSY;
3825 	}
3826 
3827 	vnode->Lock();
3828 
3829 	vnode->SetRemoved(true);
3830 	bool removeUnpublished = false;
3831 
3832 	if (vnode->IsUnpublished()) {
3833 		// prepare the vnode for deletion
3834 		removeUnpublished = true;
3835 		vnode->SetBusy(true);
3836 	}
3837 
3838 	vnode->Unlock();
3839 	locker.Unlock();
3840 
3841 	if (removeUnpublished) {
3842 		// If the vnode hasn't been published yet, we delete it here
3843 		atomic_add(&vnode->ref_count, -1);
3844 		free_vnode(vnode, true);
3845 	}
3846 
3847 	return B_OK;
3848 }
3849 
3850 
3851 extern "C" status_t
3852 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3853 {
3854 	struct vnode* vnode;
3855 
3856 	rw_lock_read_lock(&sVnodeLock);
3857 
3858 	vnode = lookup_vnode(volume->id, vnodeID);
3859 	if (vnode) {
3860 		AutoLocker<Vnode> nodeLocker(vnode);
3861 		vnode->SetRemoved(false);
3862 	}
3863 
3864 	rw_lock_read_unlock(&sVnodeLock);
3865 	return B_OK;
3866 }
3867 
3868 
3869 extern "C" status_t
3870 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3871 {
3872 	ReadLocker _(sVnodeLock);
3873 
3874 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3875 		if (_removed != NULL)
3876 			*_removed = vnode->IsRemoved();
3877 		return B_OK;
3878 	}
3879 
3880 	return B_BAD_VALUE;
3881 }
3882 
3883 
3884 extern "C" fs_volume*
3885 volume_for_vnode(fs_vnode* _vnode)
3886 {
3887 	if (_vnode == NULL)
3888 		return NULL;
3889 
3890 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3891 	return vnode->mount->volume;
3892 }
3893 
3894 
3895 extern "C" status_t
3896 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3897 	size_t* _numBytes)
3898 {
3899 	struct file_descriptor* descriptor;
3900 	struct vnode* vnode;
3901 
3902 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3903 	if (descriptor == NULL)
3904 		return B_FILE_ERROR;
3905 
3906 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3907 		count, 0, _numBytes);
3908 
3909 	put_fd(descriptor);
3910 	return status;
3911 }
3912 
3913 
3914 extern "C" status_t
3915 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3916 	size_t* _numBytes)
3917 {
3918 	struct file_descriptor* descriptor;
3919 	struct vnode* vnode;
3920 
3921 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3922 	if (descriptor == NULL)
3923 		return B_FILE_ERROR;
3924 
3925 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3926 		count, 0, _numBytes);
3927 
3928 	put_fd(descriptor);
3929 	return status;
3930 }
3931 
3932 
3933 extern "C" status_t
3934 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3935 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3936 	size_t* _bytes)
3937 {
3938 	struct file_descriptor* descriptor;
3939 	struct vnode* vnode;
3940 
3941 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3942 	if (descriptor == NULL)
3943 		return B_FILE_ERROR;
3944 
3945 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3946 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3947 		false);
3948 
3949 	put_fd(descriptor);
3950 	return status;
3951 }
3952 
3953 
3954 extern "C" status_t
3955 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3956 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3957 	size_t* _bytes)
3958 {
3959 	struct file_descriptor* descriptor;
3960 	struct vnode* vnode;
3961 
3962 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3963 	if (descriptor == NULL)
3964 		return B_FILE_ERROR;
3965 
3966 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3967 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3968 		true);
3969 
3970 	put_fd(descriptor);
3971 	return status;
3972 }
3973 
3974 
3975 extern "C" status_t
3976 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3977 {
3978 	// lookup mount -- the caller is required to make sure that the mount
3979 	// won't go away
3980 	MutexLocker locker(sMountMutex);
3981 	struct fs_mount* mount = find_mount(mountID);
3982 	if (mount == NULL)
3983 		return B_BAD_VALUE;
3984 	locker.Unlock();
3985 
3986 	return mount->entry_cache.Add(dirID, name, nodeID);
3987 }
3988 
3989 
3990 extern "C" status_t
3991 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3992 {
3993 	// lookup mount -- the caller is required to make sure that the mount
3994 	// won't go away
3995 	MutexLocker locker(sMountMutex);
3996 	struct fs_mount* mount = find_mount(mountID);
3997 	if (mount == NULL)
3998 		return B_BAD_VALUE;
3999 	locker.Unlock();
4000 
4001 	return mount->entry_cache.Remove(dirID, name);
4002 }
4003 
4004 
4005 //	#pragma mark - private VFS API
4006 //	Functions the VFS exports for other parts of the kernel
4007 
4008 
4009 /*! Acquires another reference to the vnode that has to be released
4010 	by calling vfs_put_vnode().
4011 */
4012 void
4013 vfs_acquire_vnode(struct vnode* vnode)
4014 {
4015 	inc_vnode_ref_count(vnode);
4016 }
4017 
4018 
4019 /*! This is currently called from file_cache_create() only.
4020 	It's probably a temporary solution as long as devfs requires that
4021 	fs_read_pages()/fs_write_pages() are called with the standard
4022 	open cookie and not with a device cookie.
4023 	If that's done differently, remove this call; it has no other
4024 	purpose.
4025 */
4026 extern "C" status_t
4027 vfs_get_cookie_from_fd(int fd, void** _cookie)
4028 {
4029 	struct file_descriptor* descriptor;
4030 
4031 	descriptor = get_fd(get_current_io_context(true), fd);
4032 	if (descriptor == NULL)
4033 		return B_FILE_ERROR;
4034 
4035 	*_cookie = descriptor->cookie;
4036 	return B_OK;
4037 }
4038 
4039 
4040 extern "C" status_t
4041 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4042 {
4043 	*vnode = get_vnode_from_fd(fd, kernel);
4044 
4045 	if (*vnode == NULL)
4046 		return B_FILE_ERROR;
4047 
4048 	return B_NO_ERROR;
4049 }
4050 
4051 
4052 extern "C" status_t
4053 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4054 {
4055 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4056 		path, kernel));
4057 
4058 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4059 	if (pathBuffer.InitCheck() != B_OK)
4060 		return B_NO_MEMORY;
4061 
4062 	char* buffer = pathBuffer.LockBuffer();
4063 	strlcpy(buffer, path, pathBuffer.BufferSize());
4064 
4065 	struct vnode* vnode;
4066 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4067 	if (status != B_OK)
4068 		return status;
4069 
4070 	*_vnode = vnode;
4071 	return B_OK;
4072 }
4073 
4074 
4075 extern "C" status_t
4076 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4077 {
4078 	struct vnode* vnode;
4079 
4080 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4081 	if (status != B_OK)
4082 		return status;
4083 
4084 	*_vnode = vnode;
4085 	return B_OK;
4086 }
4087 
4088 
4089 extern "C" status_t
4090 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4091 	const char* name, struct vnode** _vnode)
4092 {
4093 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4094 }
4095 
4096 
4097 extern "C" void
4098 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4099 {
4100 	*_mountID = vnode->device;
4101 	*_vnodeID = vnode->id;
4102 }
4103 
4104 
4105 /*!
4106 	Calls fs_open() on the given vnode and returns a new
4107 	file descriptor for it
4108 */
4109 int
4110 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4111 {
4112 	return open_vnode(vnode, openMode, kernel);
4113 }
4114 
4115 
4116 /*!	Looks up a vnode with the given mount and vnode ID.
4117 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4118 	to the node.
4119 	It's currently only be used by file_cache_create().
4120 */
4121 extern "C" status_t
4122 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4123 {
4124 	rw_lock_read_lock(&sVnodeLock);
4125 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4126 	rw_lock_read_unlock(&sVnodeLock);
4127 
4128 	if (vnode == NULL)
4129 		return B_ERROR;
4130 
4131 	*_vnode = vnode;
4132 	return B_OK;
4133 }
4134 
4135 
4136 extern "C" status_t
4137 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4138 	bool traverseLeafLink, bool kernel, void** _node)
4139 {
4140 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4141 		volume, path, kernel));
4142 
4143 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4144 	if (pathBuffer.InitCheck() != B_OK)
4145 		return B_NO_MEMORY;
4146 
4147 	fs_mount* mount;
4148 	status_t status = get_mount(volume->id, &mount);
4149 	if (status != B_OK)
4150 		return status;
4151 
4152 	char* buffer = pathBuffer.LockBuffer();
4153 	strlcpy(buffer, path, pathBuffer.BufferSize());
4154 
4155 	struct vnode* vnode = mount->root_vnode;
4156 
4157 	if (buffer[0] == '/')
4158 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4159 	else {
4160 		inc_vnode_ref_count(vnode);
4161 			// vnode_path_to_vnode() releases a reference to the starting vnode
4162 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4163 			kernel, &vnode, NULL);
4164 	}
4165 
4166 	put_mount(mount);
4167 
4168 	if (status != B_OK)
4169 		return status;
4170 
4171 	if (vnode->device != volume->id) {
4172 		// wrong mount ID - must not gain access on foreign file system nodes
4173 		put_vnode(vnode);
4174 		return B_BAD_VALUE;
4175 	}
4176 
4177 	// Use get_vnode() to resolve the cookie for the right layer.
4178 	status = get_vnode(volume, vnode->id, _node);
4179 	put_vnode(vnode);
4180 
4181 	return status;
4182 }
4183 
4184 
4185 status_t
4186 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4187 	struct stat* stat, bool kernel)
4188 {
4189 	status_t status;
4190 
4191 	if (path) {
4192 		// path given: get the stat of the node referred to by (fd, path)
4193 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4194 		if (pathBuffer.InitCheck() != B_OK)
4195 			return B_NO_MEMORY;
4196 
4197 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4198 			traverseLeafLink, stat, kernel);
4199 	} else {
4200 		// no path given: get the FD and use the FD operation
4201 		struct file_descriptor* descriptor
4202 			= get_fd(get_current_io_context(kernel), fd);
4203 		if (descriptor == NULL)
4204 			return B_FILE_ERROR;
4205 
4206 		if (descriptor->ops->fd_read_stat)
4207 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4208 		else
4209 			status = EOPNOTSUPP;
4210 
4211 		put_fd(descriptor);
4212 	}
4213 
4214 	return status;
4215 }
4216 
4217 
4218 /*!	Finds the full path to the file that contains the module \a moduleName,
4219 	puts it into \a pathBuffer, and returns B_OK for success.
4220 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4221 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4222 	\a pathBuffer is clobbered in any case and must not be relied on if this
4223 	functions returns unsuccessfully.
4224 	\a basePath and \a pathBuffer must not point to the same space.
4225 */
4226 status_t
4227 vfs_get_module_path(const char* basePath, const char* moduleName,
4228 	char* pathBuffer, size_t bufferSize)
4229 {
4230 	struct vnode* dir;
4231 	struct vnode* file;
4232 	status_t status;
4233 	size_t length;
4234 	char* path;
4235 
4236 	if (bufferSize == 0
4237 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4238 		return B_BUFFER_OVERFLOW;
4239 
4240 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4241 	if (status != B_OK)
4242 		return status;
4243 
4244 	// the path buffer had been clobbered by the above call
4245 	length = strlcpy(pathBuffer, basePath, bufferSize);
4246 	if (pathBuffer[length - 1] != '/')
4247 		pathBuffer[length++] = '/';
4248 
4249 	path = pathBuffer + length;
4250 	bufferSize -= length;
4251 
4252 	while (moduleName) {
4253 		char* nextPath = strchr(moduleName, '/');
4254 		if (nextPath == NULL)
4255 			length = strlen(moduleName);
4256 		else {
4257 			length = nextPath - moduleName;
4258 			nextPath++;
4259 		}
4260 
4261 		if (length + 1 >= bufferSize) {
4262 			status = B_BUFFER_OVERFLOW;
4263 			goto err;
4264 		}
4265 
4266 		memcpy(path, moduleName, length);
4267 		path[length] = '\0';
4268 		moduleName = nextPath;
4269 
4270 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4271 		if (status != B_OK) {
4272 			// vnode_path_to_vnode() has already released the reference to dir
4273 			return status;
4274 		}
4275 
4276 		if (S_ISDIR(file->Type())) {
4277 			// goto the next directory
4278 			path[length] = '/';
4279 			path[length + 1] = '\0';
4280 			path += length + 1;
4281 			bufferSize -= length + 1;
4282 
4283 			dir = file;
4284 		} else if (S_ISREG(file->Type())) {
4285 			// it's a file so it should be what we've searched for
4286 			put_vnode(file);
4287 
4288 			return B_OK;
4289 		} else {
4290 			TRACE(("vfs_get_module_path(): something is strange here: "
4291 				"0x%08lx...\n", file->Type()));
4292 			status = B_ERROR;
4293 			dir = file;
4294 			goto err;
4295 		}
4296 	}
4297 
4298 	// if we got here, the moduleName just pointed to a directory, not to
4299 	// a real module - what should we do in this case?
4300 	status = B_ENTRY_NOT_FOUND;
4301 
4302 err:
4303 	put_vnode(dir);
4304 	return status;
4305 }
4306 
4307 
4308 /*!	\brief Normalizes a given path.
4309 
4310 	The path must refer to an existing or non-existing entry in an existing
4311 	directory, that is chopping off the leaf component the remaining path must
4312 	refer to an existing directory.
4313 
4314 	The returned will be canonical in that it will be absolute, will not
4315 	contain any "." or ".." components or duplicate occurrences of '/'s,
4316 	and none of the directory components will by symbolic links.
4317 
4318 	Any two paths referring to the same entry, will result in the same
4319 	normalized path (well, that is pretty much the definition of `normalized',
4320 	isn't it :-).
4321 
4322 	\param path The path to be normalized.
4323 	\param buffer The buffer into which the normalized path will be written.
4324 		   May be the same one as \a path.
4325 	\param bufferSize The size of \a buffer.
4326 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4327 	\param kernel \c true, if the IO context of the kernel shall be used,
4328 		   otherwise that of the team this thread belongs to. Only relevant,
4329 		   if the path is relative (to get the CWD).
4330 	\return \c B_OK if everything went fine, another error code otherwise.
4331 */
4332 status_t
4333 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4334 	bool traverseLink, bool kernel)
4335 {
4336 	if (!path || !buffer || bufferSize < 1)
4337 		return B_BAD_VALUE;
4338 
4339 	if (path != buffer) {
4340 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4341 			return B_BUFFER_OVERFLOW;
4342 	}
4343 
4344 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4345 }
4346 
4347 
4348 /*!	\brief Creates a special node in the file system.
4349 
4350 	The caller gets a reference to the newly created node (which is passed
4351 	back through \a _createdVnode) and is responsible for releasing it.
4352 
4353 	\param path The path where to create the entry for the node. Can be \c NULL,
4354 		in which case the node is created without an entry in the root FS -- it
4355 		will automatically be deleted when the last reference has been released.
4356 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4357 		the target file system will just create the node with its standard
4358 		operations. Depending on the type of the node a subnode might be created
4359 		automatically, though.
4360 	\param mode The type and permissions for the node to be created.
4361 	\param flags Flags to be passed to the creating FS.
4362 	\param kernel \c true, if called in the kernel context (relevant only if
4363 		\a path is not \c NULL and not absolute).
4364 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4365 		file system creating the node, with the private data pointer and
4366 		operations for the super node. Can be \c NULL.
4367 	\param _createVnode Pointer to pre-allocated storage where to store the
4368 		pointer to the newly created node.
4369 	\return \c B_OK, if everything went fine, another error code otherwise.
4370 */
4371 status_t
4372 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4373 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4374 	struct vnode** _createdVnode)
4375 {
4376 	struct vnode* dirNode;
4377 	char _leaf[B_FILE_NAME_LENGTH];
4378 	char* leaf = NULL;
4379 
4380 	if (path) {
4381 		// We've got a path. Get the dir vnode and the leaf name.
4382 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4383 		if (tmpPathBuffer.InitCheck() != B_OK)
4384 			return B_NO_MEMORY;
4385 
4386 		char* tmpPath = tmpPathBuffer.LockBuffer();
4387 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4388 			return B_NAME_TOO_LONG;
4389 
4390 		// get the dir vnode and the leaf name
4391 		leaf = _leaf;
4392 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4393 		if (error != B_OK)
4394 			return error;
4395 	} else {
4396 		// No path. Create the node in the root FS.
4397 		dirNode = sRoot;
4398 		inc_vnode_ref_count(dirNode);
4399 	}
4400 
4401 	VNodePutter _(dirNode);
4402 
4403 	// check support for creating special nodes
4404 	if (!HAS_FS_CALL(dirNode, create_special_node))
4405 		return B_UNSUPPORTED;
4406 
4407 	// create the node
4408 	fs_vnode superVnode;
4409 	ino_t nodeID;
4410 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4411 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4412 	if (status != B_OK)
4413 		return status;
4414 
4415 	// lookup the node
4416 	rw_lock_read_lock(&sVnodeLock);
4417 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4418 	rw_lock_read_unlock(&sVnodeLock);
4419 
4420 	if (*_createdVnode == NULL) {
4421 		panic("vfs_create_special_node(): lookup of node failed");
4422 		return B_ERROR;
4423 	}
4424 
4425 	return B_OK;
4426 }
4427 
4428 
4429 extern "C" void
4430 vfs_put_vnode(struct vnode* vnode)
4431 {
4432 	put_vnode(vnode);
4433 }
4434 
4435 
4436 extern "C" status_t
4437 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4438 {
4439 	// Get current working directory from io context
4440 	struct io_context* context = get_current_io_context(false);
4441 	status_t status = B_OK;
4442 
4443 	mutex_lock(&context->io_mutex);
4444 
4445 	if (context->cwd != NULL) {
4446 		*_mountID = context->cwd->device;
4447 		*_vnodeID = context->cwd->id;
4448 	} else
4449 		status = B_ERROR;
4450 
4451 	mutex_unlock(&context->io_mutex);
4452 	return status;
4453 }
4454 
4455 
4456 status_t
4457 vfs_unmount(dev_t mountID, uint32 flags)
4458 {
4459 	return fs_unmount(NULL, mountID, flags, true);
4460 }
4461 
4462 
4463 extern "C" status_t
4464 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4465 {
4466 	struct vnode* vnode;
4467 
4468 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4469 	if (status != B_OK)
4470 		return status;
4471 
4472 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4473 	put_vnode(vnode);
4474 	return B_OK;
4475 }
4476 
4477 
4478 extern "C" void
4479 vfs_free_unused_vnodes(int32 level)
4480 {
4481 	vnode_low_resource_handler(NULL,
4482 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, level);
4483 }
4484 
4485 
4486 extern "C" bool
4487 vfs_can_page(struct vnode* vnode, void* cookie)
4488 {
4489 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4490 
4491 	if (HAS_FS_CALL(vnode, can_page))
4492 		return FS_CALL(vnode, can_page, cookie);
4493 	return false;
4494 }
4495 
4496 
4497 extern "C" status_t
4498 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4499 	size_t count, uint32 flags, size_t* _numBytes)
4500 {
4501 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4502 		pos));
4503 
4504 #if VFS_PAGES_IO_TRACING
4505 	size_t bytesRequested = *_numBytes;
4506 #endif
4507 
4508 	IORequest request;
4509 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4510 	if (status == B_OK) {
4511 		status = vfs_vnode_io(vnode, cookie, &request);
4512 		if (status == B_OK)
4513 			status = request.Wait();
4514 		*_numBytes = request.TransferredBytes();
4515 	}
4516 
4517 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4518 		status, *_numBytes));
4519 
4520 	return status;
4521 }
4522 
4523 
4524 extern "C" status_t
4525 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4526 	size_t count, uint32 flags, size_t* _numBytes)
4527 {
4528 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4529 		pos));
4530 
4531 #if VFS_PAGES_IO_TRACING
4532 	size_t bytesRequested = *_numBytes;
4533 #endif
4534 
4535 	IORequest request;
4536 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4537 	if (status == B_OK) {
4538 		status = vfs_vnode_io(vnode, cookie, &request);
4539 		if (status == B_OK)
4540 			status = request.Wait();
4541 		*_numBytes = request.TransferredBytes();
4542 	}
4543 
4544 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4545 		status, *_numBytes));
4546 
4547 	return status;
4548 }
4549 
4550 
4551 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4552 	created if \a allocate is \c true.
4553 	In case it's successful, it will also grab a reference to the cache
4554 	it returns.
4555 */
4556 extern "C" status_t
4557 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4558 {
4559 	if (vnode->cache != NULL) {
4560 		vnode->cache->AcquireRef();
4561 		*_cache = vnode->cache;
4562 		return B_OK;
4563 	}
4564 
4565 	rw_lock_read_lock(&sVnodeLock);
4566 	vnode->Lock();
4567 
4568 	status_t status = B_OK;
4569 
4570 	// The cache could have been created in the meantime
4571 	if (vnode->cache == NULL) {
4572 		if (allocate) {
4573 			// TODO: actually the vnode needs to be busy already here, or
4574 			//	else this won't work...
4575 			bool wasBusy = vnode->IsBusy();
4576 			vnode->SetBusy(true);
4577 
4578 			vnode->Unlock();
4579 			rw_lock_read_unlock(&sVnodeLock);
4580 
4581 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4582 
4583 			rw_lock_read_lock(&sVnodeLock);
4584 			vnode->Lock();
4585 			vnode->SetBusy(wasBusy);
4586 		} else
4587 			status = B_BAD_VALUE;
4588 	}
4589 
4590 	vnode->Unlock();
4591 	rw_lock_read_unlock(&sVnodeLock);
4592 
4593 	if (status == B_OK) {
4594 		vnode->cache->AcquireRef();
4595 		*_cache = vnode->cache;
4596 	}
4597 
4598 	return status;
4599 }
4600 
4601 
4602 status_t
4603 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4604 	file_io_vec* vecs, size_t* _count)
4605 {
4606 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4607 		vnode, vecs, offset, size));
4608 
4609 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4610 }
4611 
4612 
4613 status_t
4614 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4615 {
4616 	status_t status = FS_CALL(vnode, read_stat, stat);
4617 
4618 	// fill in the st_dev and st_ino fields
4619 	if (status == B_OK) {
4620 		stat->st_dev = vnode->device;
4621 		stat->st_ino = vnode->id;
4622 		stat->st_rdev = -1;
4623 	}
4624 
4625 	return status;
4626 }
4627 
4628 
4629 status_t
4630 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4631 {
4632 	struct vnode* vnode;
4633 	status_t status = get_vnode(device, inode, &vnode, true, false);
4634 	if (status != B_OK)
4635 		return status;
4636 
4637 	status = FS_CALL(vnode, read_stat, stat);
4638 
4639 	// fill in the st_dev and st_ino fields
4640 	if (status == B_OK) {
4641 		stat->st_dev = vnode->device;
4642 		stat->st_ino = vnode->id;
4643 		stat->st_rdev = -1;
4644 	}
4645 
4646 	put_vnode(vnode);
4647 	return status;
4648 }
4649 
4650 
4651 status_t
4652 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4653 {
4654 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4655 }
4656 
4657 
4658 status_t
4659 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4660 	char* path, size_t pathLength)
4661 {
4662 	struct vnode* vnode;
4663 	status_t status;
4664 
4665 	// filter invalid leaf names
4666 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4667 		return B_BAD_VALUE;
4668 
4669 	// get the vnode matching the dir's node_ref
4670 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4671 		// special cases "." and "..": we can directly get the vnode of the
4672 		// referenced directory
4673 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4674 		leaf = NULL;
4675 	} else
4676 		status = get_vnode(device, inode, &vnode, true, false);
4677 	if (status != B_OK)
4678 		return status;
4679 
4680 	// get the directory path
4681 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4682 	put_vnode(vnode);
4683 		// we don't need the vnode anymore
4684 	if (status != B_OK)
4685 		return status;
4686 
4687 	// append the leaf name
4688 	if (leaf) {
4689 		// insert a directory separator if this is not the file system root
4690 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4691 				>= pathLength)
4692 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4693 			return B_NAME_TOO_LONG;
4694 		}
4695 	}
4696 
4697 	return B_OK;
4698 }
4699 
4700 
4701 /*!	If the given descriptor locked its vnode, that lock will be released. */
4702 void
4703 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4704 {
4705 	struct vnode* vnode = fd_vnode(descriptor);
4706 
4707 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4708 		vnode->mandatory_locked_by = NULL;
4709 }
4710 
4711 
4712 /*!	Closes all file descriptors of the specified I/O context that
4713 	have the O_CLOEXEC flag set.
4714 */
4715 void
4716 vfs_exec_io_context(io_context* context)
4717 {
4718 	uint32 i;
4719 
4720 	for (i = 0; i < context->table_size; i++) {
4721 		mutex_lock(&context->io_mutex);
4722 
4723 		struct file_descriptor* descriptor = context->fds[i];
4724 		bool remove = false;
4725 
4726 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4727 			context->fds[i] = NULL;
4728 			context->num_used_fds--;
4729 
4730 			remove = true;
4731 		}
4732 
4733 		mutex_unlock(&context->io_mutex);
4734 
4735 		if (remove) {
4736 			close_fd(descriptor);
4737 			put_fd(descriptor);
4738 		}
4739 	}
4740 }
4741 
4742 
4743 /*! Sets up a new io_control structure, and inherits the properties
4744 	of the parent io_control if it is given.
4745 */
4746 io_context*
4747 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4748 {
4749 	io_context* context = (io_context*)malloc(sizeof(io_context));
4750 	if (context == NULL)
4751 		return NULL;
4752 
4753 	TIOC(NewIOContext(context, parentContext));
4754 
4755 	memset(context, 0, sizeof(io_context));
4756 	context->ref_count = 1;
4757 
4758 	MutexLocker parentLocker;
4759 
4760 	size_t tableSize;
4761 	if (parentContext) {
4762 		parentLocker.SetTo(parentContext->io_mutex, false);
4763 		tableSize = parentContext->table_size;
4764 	} else
4765 		tableSize = DEFAULT_FD_TABLE_SIZE;
4766 
4767 	// allocate space for FDs and their close-on-exec flag
4768 	context->fds = (file_descriptor**)malloc(
4769 		sizeof(struct file_descriptor*) * tableSize
4770 		+ sizeof(struct select_sync*) * tableSize
4771 		+ (tableSize + 7) / 8);
4772 	if (context->fds == NULL) {
4773 		free(context);
4774 		return NULL;
4775 	}
4776 
4777 	context->select_infos = (select_info**)(context->fds + tableSize);
4778 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4779 
4780 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4781 		+ sizeof(struct select_sync*) * tableSize
4782 		+ (tableSize + 7) / 8);
4783 
4784 	mutex_init(&context->io_mutex, "I/O context");
4785 
4786 	// Copy all parent file descriptors
4787 
4788 	if (parentContext) {
4789 		size_t i;
4790 
4791 		mutex_lock(&sIOContextRootLock);
4792 		context->root = parentContext->root;
4793 		if (context->root)
4794 			inc_vnode_ref_count(context->root);
4795 		mutex_unlock(&sIOContextRootLock);
4796 
4797 		context->cwd = parentContext->cwd;
4798 		if (context->cwd)
4799 			inc_vnode_ref_count(context->cwd);
4800 
4801 		for (i = 0; i < tableSize; i++) {
4802 			struct file_descriptor* descriptor = parentContext->fds[i];
4803 
4804 			if (descriptor != NULL) {
4805 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4806 				if (closeOnExec && purgeCloseOnExec)
4807 					continue;
4808 
4809 				TFD(InheritFD(context, i, descriptor, parentContext));
4810 
4811 				context->fds[i] = descriptor;
4812 				context->num_used_fds++;
4813 				atomic_add(&descriptor->ref_count, 1);
4814 				atomic_add(&descriptor->open_count, 1);
4815 
4816 				if (closeOnExec)
4817 					fd_set_close_on_exec(context, i, true);
4818 			}
4819 		}
4820 
4821 		parentLocker.Unlock();
4822 	} else {
4823 		context->root = sRoot;
4824 		context->cwd = sRoot;
4825 
4826 		if (context->root)
4827 			inc_vnode_ref_count(context->root);
4828 
4829 		if (context->cwd)
4830 			inc_vnode_ref_count(context->cwd);
4831 	}
4832 
4833 	context->table_size = tableSize;
4834 
4835 	list_init(&context->node_monitors);
4836 	context->max_monitors = DEFAULT_NODE_MONITORS;
4837 
4838 	return context;
4839 }
4840 
4841 
4842 static status_t
4843 vfs_free_io_context(io_context* context)
4844 {
4845 	uint32 i;
4846 
4847 	TIOC(FreeIOContext(context));
4848 
4849 	if (context->root)
4850 		put_vnode(context->root);
4851 
4852 	if (context->cwd)
4853 		put_vnode(context->cwd);
4854 
4855 	mutex_lock(&context->io_mutex);
4856 
4857 	for (i = 0; i < context->table_size; i++) {
4858 		if (struct file_descriptor* descriptor = context->fds[i]) {
4859 			close_fd(descriptor);
4860 			put_fd(descriptor);
4861 		}
4862 	}
4863 
4864 	mutex_destroy(&context->io_mutex);
4865 
4866 	remove_node_monitors(context);
4867 	free(context->fds);
4868 	free(context);
4869 
4870 	return B_OK;
4871 }
4872 
4873 
4874 void
4875 vfs_get_io_context(io_context* context)
4876 {
4877 	atomic_add(&context->ref_count, 1);
4878 }
4879 
4880 
4881 void
4882 vfs_put_io_context(io_context* context)
4883 {
4884 	if (atomic_add(&context->ref_count, -1) == 1)
4885 		vfs_free_io_context(context);
4886 }
4887 
4888 
4889 static status_t
4890 vfs_resize_fd_table(struct io_context* context, const int newSize)
4891 {
4892 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4893 		return EINVAL;
4894 
4895 	TIOC(ResizeIOContext(context, newSize));
4896 
4897 	MutexLocker _(context->io_mutex);
4898 
4899 	int oldSize = context->table_size;
4900 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4901 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4902 
4903 	// If the tables shrink, make sure none of the fds being dropped are in use.
4904 	if (newSize < oldSize) {
4905 		for (int i = oldSize; i-- > newSize;) {
4906 			if (context->fds[i])
4907 				return EBUSY;
4908 		}
4909 	}
4910 
4911 	// store pointers to the old tables
4912 	file_descriptor** oldFDs = context->fds;
4913 	select_info** oldSelectInfos = context->select_infos;
4914 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4915 
4916 	// allocate new tables
4917 	file_descriptor** newFDs = (file_descriptor**)malloc(
4918 		sizeof(struct file_descriptor*) * newSize
4919 		+ sizeof(struct select_sync*) * newSize
4920 		+ newCloseOnExitBitmapSize);
4921 	if (newFDs == NULL)
4922 		return ENOMEM;
4923 
4924 	context->fds = newFDs;
4925 	context->select_infos = (select_info**)(context->fds + newSize);
4926 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4927 	context->table_size = newSize;
4928 
4929 	// copy entries from old tables
4930 	int toCopy = min_c(oldSize, newSize);
4931 
4932 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4933 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4934 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4935 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4936 
4937 	// clear additional entries, if the tables grow
4938 	if (newSize > oldSize) {
4939 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4940 		memset(context->select_infos + oldSize, 0,
4941 			sizeof(void*) * (newSize - oldSize));
4942 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4943 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4944 	}
4945 
4946 	free(oldFDs);
4947 
4948 	return B_OK;
4949 }
4950 
4951 
4952 static status_t
4953 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4954 {
4955 	int	status = B_OK;
4956 
4957 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4958 		return EINVAL;
4959 
4960 	mutex_lock(&context->io_mutex);
4961 
4962 	if ((size_t)newSize < context->num_monitors) {
4963 		status = EBUSY;
4964 		goto out;
4965 	}
4966 	context->max_monitors = newSize;
4967 
4968 out:
4969 	mutex_unlock(&context->io_mutex);
4970 	return status;
4971 }
4972 
4973 
4974 int
4975 vfs_getrlimit(int resource, struct rlimit* rlp)
4976 {
4977 	if (!rlp)
4978 		return B_BAD_ADDRESS;
4979 
4980 	switch (resource) {
4981 		case RLIMIT_NOFILE:
4982 		{
4983 			struct io_context* context = get_current_io_context(false);
4984 			MutexLocker _(context->io_mutex);
4985 
4986 			rlp->rlim_cur = context->table_size;
4987 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4988 			return 0;
4989 		}
4990 
4991 		case RLIMIT_NOVMON:
4992 		{
4993 			struct io_context* context = get_current_io_context(false);
4994 			MutexLocker _(context->io_mutex);
4995 
4996 			rlp->rlim_cur = context->max_monitors;
4997 			rlp->rlim_max = MAX_NODE_MONITORS;
4998 			return 0;
4999 		}
5000 
5001 		default:
5002 			return B_BAD_VALUE;
5003 	}
5004 }
5005 
5006 
5007 int
5008 vfs_setrlimit(int resource, const struct rlimit* rlp)
5009 {
5010 	if (!rlp)
5011 		return B_BAD_ADDRESS;
5012 
5013 	switch (resource) {
5014 		case RLIMIT_NOFILE:
5015 			/* TODO: check getuid() */
5016 			if (rlp->rlim_max != RLIM_SAVED_MAX
5017 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5018 				return B_NOT_ALLOWED;
5019 
5020 			return vfs_resize_fd_table(get_current_io_context(false),
5021 				rlp->rlim_cur);
5022 
5023 		case RLIMIT_NOVMON:
5024 			/* TODO: check getuid() */
5025 			if (rlp->rlim_max != RLIM_SAVED_MAX
5026 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5027 				return B_NOT_ALLOWED;
5028 
5029 			return vfs_resize_monitor_table(get_current_io_context(false),
5030 				rlp->rlim_cur);
5031 
5032 		default:
5033 			return B_BAD_VALUE;
5034 	}
5035 }
5036 
5037 
5038 status_t
5039 vfs_init(kernel_args* args)
5040 {
5041 	vnode::StaticInit();
5042 
5043 	struct vnode dummyVnode;
5044 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5045 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5046 	if (sVnodeTable == NULL)
5047 		panic("vfs_init: error creating vnode hash table\n");
5048 
5049 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5050 
5051 	struct fs_mount dummyMount;
5052 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5053 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5054 	if (sMountsTable == NULL)
5055 		panic("vfs_init: error creating mounts hash table\n");
5056 
5057 	node_monitor_init();
5058 
5059 	sRoot = NULL;
5060 
5061 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5062 
5063 	if (block_cache_init() != B_OK)
5064 		return B_ERROR;
5065 
5066 #ifdef ADD_DEBUGGER_COMMANDS
5067 	// add some debugger commands
5068 	add_debugger_command_etc("vnode", &dump_vnode,
5069 		"Print info about the specified vnode",
5070 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5071 		"Prints information about the vnode specified by address <vnode> or\n"
5072 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5073 		"constructed and printed. It might not be possible to construct a\n"
5074 		"complete path, though.\n",
5075 		0);
5076 	add_debugger_command("vnodes", &dump_vnodes,
5077 		"list all vnodes (from the specified device)");
5078 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5079 		"list all vnode caches");
5080 	add_debugger_command("mount", &dump_mount,
5081 		"info about the specified fs_mount");
5082 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5083 	add_debugger_command("io_context", &dump_io_context,
5084 		"info about the I/O context");
5085 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5086 		"info about vnode usage");
5087 #endif
5088 
5089 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5090 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, 0);
5091 
5092 	file_map_init();
5093 
5094 	return file_cache_init();
5095 }
5096 
5097 
5098 //	#pragma mark - fd_ops implementations
5099 
5100 
5101 /*!
5102 	Calls fs_open() on the given vnode and returns a new
5103 	file descriptor for it
5104 */
5105 static int
5106 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5107 {
5108 	void* cookie;
5109 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5110 	if (status != B_OK)
5111 		return status;
5112 
5113 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5114 	if (fd < 0) {
5115 		FS_CALL(vnode, close, cookie);
5116 		FS_CALL(vnode, free_cookie, cookie);
5117 	}
5118 	return fd;
5119 }
5120 
5121 
5122 /*!
5123 	Calls fs_open() on the given vnode and returns a new
5124 	file descriptor for it
5125 */
5126 static int
5127 create_vnode(struct vnode* directory, const char* name, int openMode,
5128 	int perms, bool kernel)
5129 {
5130 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5131 	status_t status = B_ERROR;
5132 	struct vnode* vnode;
5133 	void* cookie;
5134 	ino_t newID;
5135 
5136 	// This is somewhat tricky: If the entry already exists, the FS responsible
5137 	// for the directory might not necessarily also be the one responsible for
5138 	// the node the entry refers to. So we can actually never call the create()
5139 	// hook without O_EXCL. Instead we try to look the entry up first. If it
5140 	// already exists, we just open the node (unless O_EXCL), otherwise we call
5141 	// create() with O_EXCL. This introduces a race condition, since someone
5142 	// else might have created the entry in the meantime. We hope the respective
5143 	// FS returns the correct error code and retry (up to 3 times) again.
5144 
5145 	for (int i = 0; i < 3 && status != B_OK; i++) {
5146 		// look the node up
5147 		status = lookup_dir_entry(directory, name, &vnode);
5148 		if (status == B_OK) {
5149 			VNodePutter putter(vnode);
5150 
5151 			if ((openMode & O_EXCL) != 0)
5152 				return B_FILE_EXISTS;
5153 
5154 			// If the node is a symlink, we have to follow it, unless
5155 			// O_NOTRAVERSE is set.
5156 			if (S_ISLNK(vnode->Type()) && traverse) {
5157 				putter.Put();
5158 				char clonedName[B_FILE_NAME_LENGTH + 1];
5159 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5160 						>= B_FILE_NAME_LENGTH) {
5161 					return B_NAME_TOO_LONG;
5162 				}
5163 
5164 				inc_vnode_ref_count(directory);
5165 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5166 					kernel, &vnode, NULL);
5167 				if (status != B_OK)
5168 					return status;
5169 
5170 				putter.SetTo(vnode);
5171 			}
5172 
5173 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5174 				put_vnode(vnode);
5175 				return B_LINK_LIMIT;
5176 			}
5177 
5178 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5179 			// on success keep the vnode reference for the FD
5180 			if (fd >= 0)
5181 				putter.Detach();
5182 
5183 			return fd;
5184 		}
5185 
5186 		// it doesn't exist yet -- try to create it
5187 
5188 		if (!HAS_FS_CALL(directory, create))
5189 			return EROFS;
5190 
5191 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5192 			&cookie, &newID);
5193 		if (status != B_OK
5194 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5195 			return status;
5196 		}
5197 	}
5198 
5199 	if (status != B_OK)
5200 		return status;
5201 
5202 	// the node has been created successfully
5203 
5204 	rw_lock_read_lock(&sVnodeLock);
5205 	vnode = lookup_vnode(directory->device, newID);
5206 	rw_lock_read_unlock(&sVnodeLock);
5207 
5208 	if (vnode == NULL) {
5209 		panic("vfs: fs_create() returned success but there is no vnode, "
5210 			"mount ID %ld!\n", directory->device);
5211 		return B_BAD_VALUE;
5212 	}
5213 
5214 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5215 	if (fd >= 0)
5216 		return fd;
5217 
5218 	status = fd;
5219 
5220 	// something went wrong, clean up
5221 
5222 	FS_CALL(vnode, close, cookie);
5223 	FS_CALL(vnode, free_cookie, cookie);
5224 	put_vnode(vnode);
5225 
5226 	FS_CALL(directory, unlink, name);
5227 
5228 	return status;
5229 }
5230 
5231 
5232 /*! Calls fs open_dir() on the given vnode and returns a new
5233 	file descriptor for it
5234 */
5235 static int
5236 open_dir_vnode(struct vnode* vnode, bool kernel)
5237 {
5238 	void* cookie;
5239 	int status;
5240 
5241 	status = FS_CALL(vnode, open_dir, &cookie);
5242 	if (status != B_OK)
5243 		return status;
5244 
5245 	// directory is opened, create a fd
5246 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5247 	if (status >= 0)
5248 		return status;
5249 
5250 	FS_CALL(vnode, close_dir, cookie);
5251 	FS_CALL(vnode, free_dir_cookie, cookie);
5252 
5253 	return status;
5254 }
5255 
5256 
5257 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5258 	file descriptor for it.
5259 	Used by attr_dir_open(), and attr_dir_open_fd().
5260 */
5261 static int
5262 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5263 {
5264 	void* cookie;
5265 	int status;
5266 
5267 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5268 		return EOPNOTSUPP;
5269 
5270 	status = FS_CALL(vnode, open_attr_dir, &cookie);
5271 	if (status != B_OK)
5272 		return status;
5273 
5274 	// directory is opened, create a fd
5275 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5276 	if (status >= 0)
5277 		return status;
5278 
5279 	FS_CALL(vnode, close_attr_dir, cookie);
5280 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5281 
5282 	return status;
5283 }
5284 
5285 
5286 static int
5287 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5288 	int openMode, int perms, bool kernel)
5289 {
5290 	struct vnode* directory;
5291 	int status;
5292 
5293 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5294 		"kernel %d\n", name, openMode, perms, kernel));
5295 
5296 	// get directory to put the new file in
5297 	status = get_vnode(mountID, directoryID, &directory, true, false);
5298 	if (status != B_OK)
5299 		return status;
5300 
5301 	status = create_vnode(directory, name, openMode, perms, kernel);
5302 	put_vnode(directory);
5303 
5304 	return status;
5305 }
5306 
5307 
5308 static int
5309 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5310 {
5311 	char name[B_FILE_NAME_LENGTH];
5312 	struct vnode* directory;
5313 	int status;
5314 
5315 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5316 		openMode, perms, kernel));
5317 
5318 	// get directory to put the new file in
5319 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5320 	if (status < 0)
5321 		return status;
5322 
5323 	status = create_vnode(directory, name, openMode, perms, kernel);
5324 
5325 	put_vnode(directory);
5326 	return status;
5327 }
5328 
5329 
5330 static int
5331 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5332 	int openMode, bool kernel)
5333 {
5334 	if (name == NULL || *name == '\0')
5335 		return B_BAD_VALUE;
5336 
5337 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5338 		mountID, directoryID, name, openMode));
5339 
5340 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5341 
5342 	// get the vnode matching the entry_ref
5343 	struct vnode* vnode;
5344 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5345 		kernel, &vnode);
5346 	if (status != B_OK)
5347 		return status;
5348 
5349 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5350 		put_vnode(vnode);
5351 		return B_LINK_LIMIT;
5352 	}
5353 
5354 	int fd = open_vnode(vnode, openMode, kernel);
5355 	if (fd < 0)
5356 		put_vnode(vnode);
5357 
5358 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID,
5359 		vnode->id, name);
5360 	return fd;
5361 }
5362 
5363 
5364 static int
5365 file_open(int fd, char* path, int openMode, bool kernel)
5366 {
5367 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5368 
5369 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5370 		fd, path, openMode, kernel));
5371 
5372 	// get the vnode matching the vnode + path combination
5373 	struct vnode* vnode;
5374 	ino_t parentID;
5375 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5376 		&parentID, kernel);
5377 	if (status != B_OK)
5378 		return status;
5379 
5380 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5381 		put_vnode(vnode);
5382 		return B_LINK_LIMIT;
5383 	}
5384 
5385 	// open the vnode
5386 	int newFD = open_vnode(vnode, openMode, kernel);
5387 	// put only on error -- otherwise our reference was transferred to the FD
5388 	if (newFD < 0)
5389 		put_vnode(vnode);
5390 
5391 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5392 		vnode->device, parentID, vnode->id, NULL);
5393 
5394 	return newFD;
5395 }
5396 
5397 
5398 static status_t
5399 file_close(struct file_descriptor* descriptor)
5400 {
5401 	struct vnode* vnode = descriptor->u.vnode;
5402 	status_t status = B_OK;
5403 
5404 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5405 
5406 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5407 		vnode->id);
5408 	if (HAS_FS_CALL(vnode, close)) {
5409 		status = FS_CALL(vnode, close, descriptor->cookie);
5410 	}
5411 
5412 	if (status == B_OK) {
5413 		// remove all outstanding locks for this team
5414 		release_advisory_lock(vnode, NULL);
5415 	}
5416 	return status;
5417 }
5418 
5419 
5420 static void
5421 file_free_fd(struct file_descriptor* descriptor)
5422 {
5423 	struct vnode* vnode = descriptor->u.vnode;
5424 
5425 	if (vnode != NULL) {
5426 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5427 		put_vnode(vnode);
5428 	}
5429 }
5430 
5431 
5432 static status_t
5433 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5434 	size_t* length)
5435 {
5436 	struct vnode* vnode = descriptor->u.vnode;
5437 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5438 		*length));
5439 
5440 	if (S_ISDIR(vnode->Type()))
5441 		return B_IS_A_DIRECTORY;
5442 
5443 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5444 }
5445 
5446 
5447 static status_t
5448 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5449 	size_t* length)
5450 {
5451 	struct vnode* vnode = descriptor->u.vnode;
5452 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5453 
5454 	if (S_ISDIR(vnode->Type()))
5455 		return B_IS_A_DIRECTORY;
5456 	if (!HAS_FS_CALL(vnode, write))
5457 		return EROFS;
5458 
5459 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5460 }
5461 
5462 
5463 static off_t
5464 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5465 {
5466 	struct vnode* vnode = descriptor->u.vnode;
5467 	off_t offset;
5468 
5469 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5470 
5471 	// some kinds of files are not seekable
5472 	switch (vnode->Type() & S_IFMT) {
5473 		case S_IFIFO:
5474 		case S_IFSOCK:
5475 			return ESPIPE;
5476 
5477 		// The Open Group Base Specs don't mention any file types besides pipes,
5478 		// fifos, and sockets specially, so we allow seeking them.
5479 		case S_IFREG:
5480 		case S_IFBLK:
5481 		case S_IFDIR:
5482 		case S_IFLNK:
5483 		case S_IFCHR:
5484 			break;
5485 	}
5486 
5487 	switch (seekType) {
5488 		case SEEK_SET:
5489 			offset = 0;
5490 			break;
5491 		case SEEK_CUR:
5492 			offset = descriptor->pos;
5493 			break;
5494 		case SEEK_END:
5495 		{
5496 			// stat() the node
5497 			if (!HAS_FS_CALL(vnode, read_stat))
5498 				return EOPNOTSUPP;
5499 
5500 			struct stat stat;
5501 			status_t status = FS_CALL(vnode, read_stat, &stat);
5502 			if (status != B_OK)
5503 				return status;
5504 
5505 			offset = stat.st_size;
5506 			break;
5507 		}
5508 		default:
5509 			return B_BAD_VALUE;
5510 	}
5511 
5512 	// assumes off_t is 64 bits wide
5513 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5514 		return EOVERFLOW;
5515 
5516 	pos += offset;
5517 	if (pos < 0)
5518 		return B_BAD_VALUE;
5519 
5520 	return descriptor->pos = pos;
5521 }
5522 
5523 
5524 static status_t
5525 file_select(struct file_descriptor* descriptor, uint8 event,
5526 	struct selectsync* sync)
5527 {
5528 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5529 
5530 	struct vnode* vnode = descriptor->u.vnode;
5531 
5532 	// If the FS has no select() hook, notify select() now.
5533 	if (!HAS_FS_CALL(vnode, select))
5534 		return notify_select_event(sync, event);
5535 
5536 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5537 }
5538 
5539 
5540 static status_t
5541 file_deselect(struct file_descriptor* descriptor, uint8 event,
5542 	struct selectsync* sync)
5543 {
5544 	struct vnode* vnode = descriptor->u.vnode;
5545 
5546 	if (!HAS_FS_CALL(vnode, deselect))
5547 		return B_OK;
5548 
5549 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5550 }
5551 
5552 
5553 static status_t
5554 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5555 	bool kernel)
5556 {
5557 	struct vnode* vnode;
5558 	status_t status;
5559 
5560 	if (name == NULL || *name == '\0')
5561 		return B_BAD_VALUE;
5562 
5563 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5564 		"perms = %d)\n", mountID, parentID, name, perms));
5565 
5566 	status = get_vnode(mountID, parentID, &vnode, true, false);
5567 	if (status != B_OK)
5568 		return status;
5569 
5570 	if (HAS_FS_CALL(vnode, create_dir))
5571 		status = FS_CALL(vnode, create_dir, name, perms);
5572 	else
5573 		status = EROFS;
5574 
5575 	put_vnode(vnode);
5576 	return status;
5577 }
5578 
5579 
5580 static status_t
5581 dir_create(int fd, char* path, int perms, bool kernel)
5582 {
5583 	char filename[B_FILE_NAME_LENGTH];
5584 	struct vnode* vnode;
5585 	status_t status;
5586 
5587 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5588 		kernel));
5589 
5590 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5591 	if (status < 0)
5592 		return status;
5593 
5594 	if (HAS_FS_CALL(vnode, create_dir)) {
5595 		status = FS_CALL(vnode, create_dir, filename, perms);
5596 	} else
5597 		status = EROFS;
5598 
5599 	put_vnode(vnode);
5600 	return status;
5601 }
5602 
5603 
5604 static int
5605 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5606 {
5607 	struct vnode* vnode;
5608 	int status;
5609 
5610 	FUNCTION(("dir_open_entry_ref()\n"));
5611 
5612 	if (name && *name == '\0')
5613 		return B_BAD_VALUE;
5614 
5615 	// get the vnode matching the entry_ref/node_ref
5616 	if (name) {
5617 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5618 			&vnode);
5619 	} else
5620 		status = get_vnode(mountID, parentID, &vnode, true, false);
5621 	if (status != B_OK)
5622 		return status;
5623 
5624 	int fd = open_dir_vnode(vnode, kernel);
5625 	if (fd < 0)
5626 		put_vnode(vnode);
5627 
5628 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5629 		vnode->id, name);
5630 	return fd;
5631 }
5632 
5633 
5634 static int
5635 dir_open(int fd, char* path, bool kernel)
5636 {
5637 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5638 		kernel));
5639 
5640 	// get the vnode matching the vnode + path combination
5641 	struct vnode* vnode = NULL;
5642 	ino_t parentID;
5643 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5644 		kernel);
5645 	if (status != B_OK)
5646 		return status;
5647 
5648 	// open the dir
5649 	int newFD = open_dir_vnode(vnode, kernel);
5650 	if (newFD < 0)
5651 		put_vnode(vnode);
5652 
5653 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID,
5654 		vnode->id, NULL);
5655 	return newFD;
5656 }
5657 
5658 
5659 static status_t
5660 dir_close(struct file_descriptor* descriptor)
5661 {
5662 	struct vnode* vnode = descriptor->u.vnode;
5663 
5664 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5665 
5666 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5667 		vnode->id);
5668 	if (HAS_FS_CALL(vnode, close_dir))
5669 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5670 
5671 	return B_OK;
5672 }
5673 
5674 
5675 static void
5676 dir_free_fd(struct file_descriptor* descriptor)
5677 {
5678 	struct vnode* vnode = descriptor->u.vnode;
5679 
5680 	if (vnode != NULL) {
5681 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5682 		put_vnode(vnode);
5683 	}
5684 }
5685 
5686 
5687 static status_t
5688 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5689 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5690 {
5691 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5692 		bufferSize, _count);
5693 }
5694 
5695 
5696 static status_t
5697 fix_dirent(struct vnode* parent, struct dirent* entry,
5698 	struct io_context* ioContext)
5699 {
5700 	// set d_pdev and d_pino
5701 	entry->d_pdev = parent->device;
5702 	entry->d_pino = parent->id;
5703 
5704 	// If this is the ".." entry and the directory is the root of a FS,
5705 	// we need to replace d_dev and d_ino with the actual values.
5706 	if (strcmp(entry->d_name, "..") == 0
5707 		&& parent->mount->root_vnode == parent
5708 		&& parent->mount->covers_vnode) {
5709 		inc_vnode_ref_count(parent);
5710 			// vnode_path_to_vnode() puts the node
5711 
5712 		// Make sure the IO context root is not bypassed.
5713 		if (parent == ioContext->root) {
5714 			entry->d_dev = parent->device;
5715 			entry->d_ino = parent->id;
5716 		} else {
5717 			// ".." is guaranteed not to be clobbered by this call
5718 			struct vnode* vnode;
5719 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5720 				ioContext, &vnode, NULL);
5721 
5722 			if (status == B_OK) {
5723 				entry->d_dev = vnode->device;
5724 				entry->d_ino = vnode->id;
5725 			}
5726 		}
5727 	} else {
5728 		// resolve mount points
5729 		ReadLocker _(&sVnodeLock);
5730 
5731 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5732 		if (vnode != NULL) {
5733 			if (vnode->covered_by != NULL) {
5734 				entry->d_dev = vnode->covered_by->device;
5735 				entry->d_ino = vnode->covered_by->id;
5736 			}
5737 		}
5738 	}
5739 
5740 	return B_OK;
5741 }
5742 
5743 
5744 static status_t
5745 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5746 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5747 {
5748 	if (!HAS_FS_CALL(vnode, read_dir))
5749 		return EOPNOTSUPP;
5750 
5751 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5752 		_count);
5753 	if (error != B_OK)
5754 		return error;
5755 
5756 	// we need to adjust the read dirents
5757 	uint32 count = *_count;
5758 	for (uint32 i = 0; i < count; i++) {
5759 		error = fix_dirent(vnode, buffer, ioContext);
5760 		if (error != B_OK)
5761 			return error;
5762 
5763 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5764 	}
5765 
5766 	return error;
5767 }
5768 
5769 
5770 static status_t
5771 dir_rewind(struct file_descriptor* descriptor)
5772 {
5773 	struct vnode* vnode = descriptor->u.vnode;
5774 
5775 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5776 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5777 	}
5778 
5779 	return EOPNOTSUPP;
5780 }
5781 
5782 
5783 static status_t
5784 dir_remove(int fd, char* path, bool kernel)
5785 {
5786 	char name[B_FILE_NAME_LENGTH];
5787 	struct vnode* directory;
5788 	status_t status;
5789 
5790 	if (path != NULL) {
5791 		// we need to make sure our path name doesn't stop with "/", ".",
5792 		// or ".."
5793 		char* lastSlash = strrchr(path, '/');
5794 		if (lastSlash != NULL) {
5795 			char* leaf = lastSlash + 1;
5796 			if (!strcmp(leaf, ".."))
5797 				return B_NOT_ALLOWED;
5798 
5799 			// omit multiple slashes
5800 			while (lastSlash > path && lastSlash[-1] == '/') {
5801 				lastSlash--;
5802 			}
5803 
5804 			if (!leaf[0]
5805 				|| !strcmp(leaf, ".")) {
5806 				// "name/" -> "name", or "name/." -> "name"
5807 				lastSlash[0] = '\0';
5808 			}
5809 		}
5810 
5811 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5812 			return B_NOT_ALLOWED;
5813 	}
5814 
5815 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5816 	if (status != B_OK)
5817 		return status;
5818 
5819 	if (HAS_FS_CALL(directory, remove_dir))
5820 		status = FS_CALL(directory, remove_dir, name);
5821 	else
5822 		status = EROFS;
5823 
5824 	put_vnode(directory);
5825 	return status;
5826 }
5827 
5828 
5829 static status_t
5830 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5831 	size_t length)
5832 {
5833 	struct vnode* vnode = descriptor->u.vnode;
5834 
5835 	if (HAS_FS_CALL(vnode, ioctl))
5836 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5837 
5838 	return EOPNOTSUPP;
5839 }
5840 
5841 
5842 static status_t
5843 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5844 {
5845 	struct flock flock;
5846 
5847 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5848 		fd, op, argument, kernel ? "kernel" : "user"));
5849 
5850 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5851 		fd);
5852 	if (descriptor == NULL)
5853 		return B_FILE_ERROR;
5854 
5855 	struct vnode* vnode = fd_vnode(descriptor);
5856 
5857 	status_t status = B_OK;
5858 
5859 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5860 		if (descriptor->type != FDTYPE_FILE)
5861 			status = B_BAD_VALUE;
5862 		else if (user_memcpy(&flock, (struct flock*)argument,
5863 				sizeof(struct flock)) != B_OK)
5864 			status = B_BAD_ADDRESS;
5865 
5866 		if (status != B_OK) {
5867 			put_fd(descriptor);
5868 			return status;
5869 		}
5870 	}
5871 
5872 	switch (op) {
5873 		case F_SETFD:
5874 		{
5875 			struct io_context* context = get_current_io_context(kernel);
5876 			// Set file descriptor flags
5877 
5878 			// O_CLOEXEC is the only flag available at this time
5879 			mutex_lock(&context->io_mutex);
5880 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5881 			mutex_unlock(&context->io_mutex);
5882 
5883 			status = B_OK;
5884 			break;
5885 		}
5886 
5887 		case F_GETFD:
5888 		{
5889 			struct io_context* context = get_current_io_context(kernel);
5890 
5891 			// Get file descriptor flags
5892 			mutex_lock(&context->io_mutex);
5893 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5894 			mutex_unlock(&context->io_mutex);
5895 			break;
5896 		}
5897 
5898 		case F_SETFL:
5899 			// Set file descriptor open mode
5900 
5901 			// we only accept changes to O_APPEND and O_NONBLOCK
5902 			argument &= O_APPEND | O_NONBLOCK;
5903 			if (descriptor->ops->fd_set_flags != NULL) {
5904 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5905 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5906 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5907 					(int)argument);
5908 			} else
5909 				status = EOPNOTSUPP;
5910 
5911 			if (status == B_OK) {
5912 				// update this descriptor's open_mode field
5913 				descriptor->open_mode = (descriptor->open_mode
5914 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5915 			}
5916 
5917 			break;
5918 
5919 		case F_GETFL:
5920 			// Get file descriptor open mode
5921 			status = descriptor->open_mode;
5922 			break;
5923 
5924 		case F_DUPFD:
5925 		{
5926 			struct io_context* context = get_current_io_context(kernel);
5927 
5928 			status = new_fd_etc(context, descriptor, (int)argument);
5929 			if (status >= 0) {
5930 				mutex_lock(&context->io_mutex);
5931 				fd_set_close_on_exec(context, fd, false);
5932 				mutex_unlock(&context->io_mutex);
5933 
5934 				atomic_add(&descriptor->ref_count, 1);
5935 			}
5936 			break;
5937 		}
5938 
5939 		case F_GETLK:
5940 			if (vnode != NULL) {
5941 				status = get_advisory_lock(vnode, &flock);
5942 				if (status == B_OK) {
5943 					// copy back flock structure
5944 					status = user_memcpy((struct flock*)argument, &flock,
5945 						sizeof(struct flock));
5946 				}
5947 			} else
5948 				status = B_BAD_VALUE;
5949 			break;
5950 
5951 		case F_SETLK:
5952 		case F_SETLKW:
5953 			status = normalize_flock(descriptor, &flock);
5954 			if (status != B_OK)
5955 				break;
5956 
5957 			if (vnode == NULL) {
5958 				status = B_BAD_VALUE;
5959 			} else if (flock.l_type == F_UNLCK) {
5960 				status = release_advisory_lock(vnode, &flock);
5961 			} else {
5962 				// the open mode must match the lock type
5963 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5964 						&& flock.l_type == F_WRLCK)
5965 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5966 						&& flock.l_type == F_RDLCK))
5967 					status = B_FILE_ERROR;
5968 				else {
5969 					status = acquire_advisory_lock(vnode, -1,
5970 						&flock, op == F_SETLKW);
5971 				}
5972 			}
5973 			break;
5974 
5975 		// ToDo: add support for more ops?
5976 
5977 		default:
5978 			status = B_BAD_VALUE;
5979 	}
5980 
5981 	put_fd(descriptor);
5982 	return status;
5983 }
5984 
5985 
5986 static status_t
5987 common_sync(int fd, bool kernel)
5988 {
5989 	struct file_descriptor* descriptor;
5990 	struct vnode* vnode;
5991 	status_t status;
5992 
5993 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
5994 
5995 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5996 	if (descriptor == NULL)
5997 		return B_FILE_ERROR;
5998 
5999 	if (HAS_FS_CALL(vnode, fsync))
6000 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6001 	else
6002 		status = EOPNOTSUPP;
6003 
6004 	put_fd(descriptor);
6005 	return status;
6006 }
6007 
6008 
6009 static status_t
6010 common_lock_node(int fd, bool kernel)
6011 {
6012 	struct file_descriptor* descriptor;
6013 	struct vnode* vnode;
6014 
6015 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6016 	if (descriptor == NULL)
6017 		return B_FILE_ERROR;
6018 
6019 	status_t status = B_OK;
6020 
6021 	// We need to set the locking atomically - someone
6022 	// else might set one at the same time
6023 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6024 			(file_descriptor*)NULL) != NULL)
6025 		status = B_BUSY;
6026 
6027 	put_fd(descriptor);
6028 	return status;
6029 }
6030 
6031 
6032 static status_t
6033 common_unlock_node(int fd, bool kernel)
6034 {
6035 	struct file_descriptor* descriptor;
6036 	struct vnode* vnode;
6037 
6038 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6039 	if (descriptor == NULL)
6040 		return B_FILE_ERROR;
6041 
6042 	status_t status = B_OK;
6043 
6044 	// We need to set the locking atomically - someone
6045 	// else might set one at the same time
6046 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6047 			(file_descriptor*)NULL, descriptor) != descriptor)
6048 		status = B_BAD_VALUE;
6049 
6050 	put_fd(descriptor);
6051 	return status;
6052 }
6053 
6054 
6055 static status_t
6056 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6057 	bool kernel)
6058 {
6059 	struct vnode* vnode;
6060 	status_t status;
6061 
6062 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6063 	if (status != B_OK)
6064 		return status;
6065 
6066 	if (HAS_FS_CALL(vnode, read_symlink)) {
6067 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6068 	} else
6069 		status = B_BAD_VALUE;
6070 
6071 	put_vnode(vnode);
6072 	return status;
6073 }
6074 
6075 
6076 static status_t
6077 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6078 	bool kernel)
6079 {
6080 	// path validity checks have to be in the calling function!
6081 	char name[B_FILE_NAME_LENGTH];
6082 	struct vnode* vnode;
6083 	status_t status;
6084 
6085 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6086 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6087 
6088 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6089 	if (status != B_OK)
6090 		return status;
6091 
6092 	if (HAS_FS_CALL(vnode, create_symlink))
6093 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6094 	else {
6095 		status = HAS_FS_CALL(vnode, write)
6096 			? B_NOT_SUPPORTED : B_READ_ONLY_DEVICE;
6097 	}
6098 
6099 	put_vnode(vnode);
6100 
6101 	return status;
6102 }
6103 
6104 
6105 static status_t
6106 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6107 	bool traverseLeafLink, bool kernel)
6108 {
6109 	// path validity checks have to be in the calling function!
6110 
6111 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6112 		toPath, kernel));
6113 
6114 	char name[B_FILE_NAME_LENGTH];
6115 	struct vnode* directory;
6116 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6117 		kernel);
6118 	if (status != B_OK)
6119 		return status;
6120 
6121 	struct vnode* vnode;
6122 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6123 		kernel);
6124 	if (status != B_OK)
6125 		goto err;
6126 
6127 	if (directory->mount != vnode->mount) {
6128 		status = B_CROSS_DEVICE_LINK;
6129 		goto err1;
6130 	}
6131 
6132 	if (HAS_FS_CALL(directory, link))
6133 		status = FS_CALL(directory, link, name, vnode);
6134 	else
6135 		status = EROFS;
6136 
6137 err1:
6138 	put_vnode(vnode);
6139 err:
6140 	put_vnode(directory);
6141 
6142 	return status;
6143 }
6144 
6145 
6146 static status_t
6147 common_unlink(int fd, char* path, bool kernel)
6148 {
6149 	char filename[B_FILE_NAME_LENGTH];
6150 	struct vnode* vnode;
6151 	status_t status;
6152 
6153 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6154 		kernel));
6155 
6156 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6157 	if (status < 0)
6158 		return status;
6159 
6160 	if (HAS_FS_CALL(vnode, unlink))
6161 		status = FS_CALL(vnode, unlink, filename);
6162 	else
6163 		status = EROFS;
6164 
6165 	put_vnode(vnode);
6166 
6167 	return status;
6168 }
6169 
6170 
6171 static status_t
6172 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6173 {
6174 	struct vnode* vnode;
6175 	status_t status;
6176 
6177 	// TODO: honor effectiveUserGroup argument
6178 
6179 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6180 	if (status != B_OK)
6181 		return status;
6182 
6183 	if (HAS_FS_CALL(vnode, access))
6184 		status = FS_CALL(vnode, access, mode);
6185 	else
6186 		status = B_OK;
6187 
6188 	put_vnode(vnode);
6189 
6190 	return status;
6191 }
6192 
6193 
6194 static status_t
6195 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6196 {
6197 	struct vnode* fromVnode;
6198 	struct vnode* toVnode;
6199 	char fromName[B_FILE_NAME_LENGTH];
6200 	char toName[B_FILE_NAME_LENGTH];
6201 	status_t status;
6202 
6203 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6204 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6205 
6206 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6207 	if (status != B_OK)
6208 		return status;
6209 
6210 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6211 	if (status != B_OK)
6212 		goto err1;
6213 
6214 	if (fromVnode->device != toVnode->device) {
6215 		status = B_CROSS_DEVICE_LINK;
6216 		goto err2;
6217 	}
6218 
6219 	if (fromName[0] == '\0' || toName == '\0'
6220 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6221 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6222 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6223 		status = B_BAD_VALUE;
6224 		goto err2;
6225 	}
6226 
6227 	if (HAS_FS_CALL(fromVnode, rename))
6228 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6229 	else
6230 		status = EROFS;
6231 
6232 err2:
6233 	put_vnode(toVnode);
6234 err1:
6235 	put_vnode(fromVnode);
6236 
6237 	return status;
6238 }
6239 
6240 
6241 static status_t
6242 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6243 {
6244 	struct vnode* vnode = descriptor->u.vnode;
6245 
6246 	FUNCTION(("common_read_stat: stat %p\n", stat));
6247 
6248 	// TODO: remove this once all file systems properly set them!
6249 	stat->st_crtim.tv_nsec = 0;
6250 	stat->st_ctim.tv_nsec = 0;
6251 	stat->st_mtim.tv_nsec = 0;
6252 	stat->st_atim.tv_nsec = 0;
6253 
6254 	status_t status = FS_CALL(vnode, read_stat, stat);
6255 
6256 	// fill in the st_dev and st_ino fields
6257 	if (status == B_OK) {
6258 		stat->st_dev = vnode->device;
6259 		stat->st_ino = vnode->id;
6260 		stat->st_rdev = -1;
6261 	}
6262 
6263 	return status;
6264 }
6265 
6266 
6267 static status_t
6268 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6269 	int statMask)
6270 {
6271 	struct vnode* vnode = descriptor->u.vnode;
6272 
6273 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6274 		vnode, stat, statMask));
6275 
6276 	if (!HAS_FS_CALL(vnode, write_stat))
6277 		return EROFS;
6278 
6279 	return FS_CALL(vnode, write_stat, stat, statMask);
6280 }
6281 
6282 
6283 static status_t
6284 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6285 	struct stat* stat, bool kernel)
6286 {
6287 	struct vnode* vnode;
6288 	status_t status;
6289 
6290 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6291 		stat));
6292 
6293 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6294 		kernel);
6295 	if (status < 0)
6296 		return status;
6297 
6298 	status = FS_CALL(vnode, read_stat, stat);
6299 
6300 	// fill in the st_dev and st_ino fields
6301 	if (status == B_OK) {
6302 		stat->st_dev = vnode->device;
6303 		stat->st_ino = vnode->id;
6304 		stat->st_rdev = -1;
6305 	}
6306 
6307 	put_vnode(vnode);
6308 	return status;
6309 }
6310 
6311 
6312 static status_t
6313 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6314 	const struct stat* stat, int statMask, bool kernel)
6315 {
6316 	struct vnode* vnode;
6317 	status_t status;
6318 
6319 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6320 		"kernel %d\n", fd, path, stat, statMask, kernel));
6321 
6322 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6323 		kernel);
6324 	if (status < 0)
6325 		return status;
6326 
6327 	if (HAS_FS_CALL(vnode, write_stat))
6328 		status = FS_CALL(vnode, write_stat, stat, statMask);
6329 	else
6330 		status = EROFS;
6331 
6332 	put_vnode(vnode);
6333 
6334 	return status;
6335 }
6336 
6337 
6338 static int
6339 attr_dir_open(int fd, char* path, bool kernel)
6340 {
6341 	struct vnode* vnode;
6342 	int status;
6343 
6344 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6345 		kernel));
6346 
6347 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6348 	if (status != B_OK)
6349 		return status;
6350 
6351 	status = open_attr_dir_vnode(vnode, kernel);
6352 	if (status < 0)
6353 		put_vnode(vnode);
6354 
6355 	return status;
6356 }
6357 
6358 
6359 static status_t
6360 attr_dir_close(struct file_descriptor* descriptor)
6361 {
6362 	struct vnode* vnode = descriptor->u.vnode;
6363 
6364 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6365 
6366 	if (HAS_FS_CALL(vnode, close_attr_dir))
6367 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6368 
6369 	return B_OK;
6370 }
6371 
6372 
6373 static void
6374 attr_dir_free_fd(struct file_descriptor* descriptor)
6375 {
6376 	struct vnode* vnode = descriptor->u.vnode;
6377 
6378 	if (vnode != NULL) {
6379 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6380 		put_vnode(vnode);
6381 	}
6382 }
6383 
6384 
6385 static status_t
6386 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6387 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6388 {
6389 	struct vnode* vnode = descriptor->u.vnode;
6390 
6391 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6392 
6393 	if (HAS_FS_CALL(vnode, read_attr_dir))
6394 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6395 			bufferSize, _count);
6396 
6397 	return EOPNOTSUPP;
6398 }
6399 
6400 
6401 static status_t
6402 attr_dir_rewind(struct file_descriptor* descriptor)
6403 {
6404 	struct vnode* vnode = descriptor->u.vnode;
6405 
6406 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6407 
6408 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6409 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6410 
6411 	return EOPNOTSUPP;
6412 }
6413 
6414 
6415 static int
6416 attr_create(int fd, char* path, const char* name, uint32 type,
6417 	int openMode, bool kernel)
6418 {
6419 	if (name == NULL || *name == '\0')
6420 		return B_BAD_VALUE;
6421 
6422 	struct vnode* vnode;
6423 	status_t status = fd_and_path_to_vnode(fd, path,
6424 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6425 	if (status != B_OK)
6426 		return status;
6427 
6428 	if (!HAS_FS_CALL(vnode, create_attr)) {
6429 		status = EROFS;
6430 		goto err;
6431 	}
6432 
6433 	void* cookie;
6434 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6435 	if (status != B_OK)
6436 		goto err;
6437 
6438 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6439 	if (fd >= 0)
6440 		return fd;
6441 
6442 	status = fd;
6443 
6444 	FS_CALL(vnode, close_attr, cookie);
6445 	FS_CALL(vnode, free_attr_cookie, cookie);
6446 
6447 	FS_CALL(vnode, remove_attr, name);
6448 
6449 err:
6450 	put_vnode(vnode);
6451 
6452 	return status;
6453 }
6454 
6455 
6456 static int
6457 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6458 {
6459 	if (name == NULL || *name == '\0')
6460 		return B_BAD_VALUE;
6461 
6462 	struct vnode* vnode;
6463 	status_t status = fd_and_path_to_vnode(fd, path,
6464 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6465 	if (status != B_OK)
6466 		return status;
6467 
6468 	if (!HAS_FS_CALL(vnode, open_attr)) {
6469 		status = EOPNOTSUPP;
6470 		goto err;
6471 	}
6472 
6473 	void* cookie;
6474 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6475 	if (status != B_OK)
6476 		goto err;
6477 
6478 	// now we only need a file descriptor for this attribute and we're done
6479 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6480 	if (fd >= 0)
6481 		return fd;
6482 
6483 	status = fd;
6484 
6485 	FS_CALL(vnode, close_attr, cookie);
6486 	FS_CALL(vnode, free_attr_cookie, cookie);
6487 
6488 err:
6489 	put_vnode(vnode);
6490 
6491 	return status;
6492 }
6493 
6494 
6495 static status_t
6496 attr_close(struct file_descriptor* descriptor)
6497 {
6498 	struct vnode* vnode = descriptor->u.vnode;
6499 
6500 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6501 
6502 	if (HAS_FS_CALL(vnode, close_attr))
6503 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6504 
6505 	return B_OK;
6506 }
6507 
6508 
6509 static void
6510 attr_free_fd(struct file_descriptor* descriptor)
6511 {
6512 	struct vnode* vnode = descriptor->u.vnode;
6513 
6514 	if (vnode != NULL) {
6515 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6516 		put_vnode(vnode);
6517 	}
6518 }
6519 
6520 
6521 static status_t
6522 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6523 	size_t* length)
6524 {
6525 	struct vnode* vnode = descriptor->u.vnode;
6526 
6527 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6528 		*length));
6529 
6530 	if (!HAS_FS_CALL(vnode, read_attr))
6531 		return EOPNOTSUPP;
6532 
6533 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6534 }
6535 
6536 
6537 static status_t
6538 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6539 	size_t* length)
6540 {
6541 	struct vnode* vnode = descriptor->u.vnode;
6542 
6543 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6544 	if (!HAS_FS_CALL(vnode, write_attr))
6545 		return EOPNOTSUPP;
6546 
6547 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6548 }
6549 
6550 
6551 static off_t
6552 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6553 {
6554 	off_t offset;
6555 
6556 	switch (seekType) {
6557 		case SEEK_SET:
6558 			offset = 0;
6559 			break;
6560 		case SEEK_CUR:
6561 			offset = descriptor->pos;
6562 			break;
6563 		case SEEK_END:
6564 		{
6565 			struct vnode* vnode = descriptor->u.vnode;
6566 			if (!HAS_FS_CALL(vnode, read_stat))
6567 				return EOPNOTSUPP;
6568 
6569 			struct stat stat;
6570 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6571 				&stat);
6572 			if (status != B_OK)
6573 				return status;
6574 
6575 			offset = stat.st_size;
6576 			break;
6577 		}
6578 		default:
6579 			return B_BAD_VALUE;
6580 	}
6581 
6582 	// assumes off_t is 64 bits wide
6583 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6584 		return EOVERFLOW;
6585 
6586 	pos += offset;
6587 	if (pos < 0)
6588 		return B_BAD_VALUE;
6589 
6590 	return descriptor->pos = pos;
6591 }
6592 
6593 
6594 static status_t
6595 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6596 {
6597 	struct vnode* vnode = descriptor->u.vnode;
6598 
6599 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6600 
6601 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6602 		return EOPNOTSUPP;
6603 
6604 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6605 }
6606 
6607 
6608 static status_t
6609 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6610 	int statMask)
6611 {
6612 	struct vnode* vnode = descriptor->u.vnode;
6613 
6614 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6615 
6616 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6617 		return EROFS;
6618 
6619 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6620 }
6621 
6622 
6623 static status_t
6624 attr_remove(int fd, const char* name, bool kernel)
6625 {
6626 	struct file_descriptor* descriptor;
6627 	struct vnode* vnode;
6628 	status_t status;
6629 
6630 	if (name == NULL || *name == '\0')
6631 		return B_BAD_VALUE;
6632 
6633 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6634 		kernel));
6635 
6636 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6637 	if (descriptor == NULL)
6638 		return B_FILE_ERROR;
6639 
6640 	if (HAS_FS_CALL(vnode, remove_attr))
6641 		status = FS_CALL(vnode, remove_attr, name);
6642 	else
6643 		status = EROFS;
6644 
6645 	put_fd(descriptor);
6646 
6647 	return status;
6648 }
6649 
6650 
6651 static status_t
6652 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6653 	bool kernel)
6654 {
6655 	struct file_descriptor* fromDescriptor;
6656 	struct file_descriptor* toDescriptor;
6657 	struct vnode* fromVnode;
6658 	struct vnode* toVnode;
6659 	status_t status;
6660 
6661 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6662 		|| *toName == '\0')
6663 		return B_BAD_VALUE;
6664 
6665 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6666 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6667 
6668 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6669 	if (fromDescriptor == NULL)
6670 		return B_FILE_ERROR;
6671 
6672 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6673 	if (toDescriptor == NULL) {
6674 		status = B_FILE_ERROR;
6675 		goto err;
6676 	}
6677 
6678 	// are the files on the same volume?
6679 	if (fromVnode->device != toVnode->device) {
6680 		status = B_CROSS_DEVICE_LINK;
6681 		goto err1;
6682 	}
6683 
6684 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6685 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6686 	} else
6687 		status = EROFS;
6688 
6689 err1:
6690 	put_fd(toDescriptor);
6691 err:
6692 	put_fd(fromDescriptor);
6693 
6694 	return status;
6695 }
6696 
6697 
6698 static int
6699 index_dir_open(dev_t mountID, bool kernel)
6700 {
6701 	struct fs_mount* mount;
6702 	void* cookie;
6703 
6704 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6705 
6706 	status_t status = get_mount(mountID, &mount);
6707 	if (status != B_OK)
6708 		return status;
6709 
6710 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6711 		status = EOPNOTSUPP;
6712 		goto error;
6713 	}
6714 
6715 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6716 	if (status != B_OK)
6717 		goto error;
6718 
6719 	// get fd for the index directory
6720 	int fd;
6721 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6722 	if (fd >= 0)
6723 		return fd;
6724 
6725 	// something went wrong
6726 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6727 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6728 
6729 	status = fd;
6730 
6731 error:
6732 	put_mount(mount);
6733 	return status;
6734 }
6735 
6736 
6737 static status_t
6738 index_dir_close(struct file_descriptor* descriptor)
6739 {
6740 	struct fs_mount* mount = descriptor->u.mount;
6741 
6742 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6743 
6744 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6745 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6746 
6747 	return B_OK;
6748 }
6749 
6750 
6751 static void
6752 index_dir_free_fd(struct file_descriptor* descriptor)
6753 {
6754 	struct fs_mount* mount = descriptor->u.mount;
6755 
6756 	if (mount != NULL) {
6757 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6758 		put_mount(mount);
6759 	}
6760 }
6761 
6762 
6763 static status_t
6764 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6765 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6766 {
6767 	struct fs_mount* mount = descriptor->u.mount;
6768 
6769 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6770 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6771 			bufferSize, _count);
6772 	}
6773 
6774 	return EOPNOTSUPP;
6775 }
6776 
6777 
6778 static status_t
6779 index_dir_rewind(struct file_descriptor* descriptor)
6780 {
6781 	struct fs_mount* mount = descriptor->u.mount;
6782 
6783 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6784 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6785 
6786 	return EOPNOTSUPP;
6787 }
6788 
6789 
6790 static status_t
6791 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6792 	bool kernel)
6793 {
6794 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6795 		name, kernel));
6796 
6797 	struct fs_mount* mount;
6798 	status_t status = get_mount(mountID, &mount);
6799 	if (status != B_OK)
6800 		return status;
6801 
6802 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6803 		status = EROFS;
6804 		goto out;
6805 	}
6806 
6807 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6808 
6809 out:
6810 	put_mount(mount);
6811 	return status;
6812 }
6813 
6814 
6815 #if 0
6816 static status_t
6817 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6818 {
6819 	struct vnode* vnode = descriptor->u.vnode;
6820 
6821 	// ToDo: currently unused!
6822 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6823 	if (!HAS_FS_CALL(vnode, read_index_stat))
6824 		return EOPNOTSUPP;
6825 
6826 	return EOPNOTSUPP;
6827 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6828 }
6829 
6830 
6831 static void
6832 index_free_fd(struct file_descriptor* descriptor)
6833 {
6834 	struct vnode* vnode = descriptor->u.vnode;
6835 
6836 	if (vnode != NULL) {
6837 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6838 		put_vnode(vnode);
6839 	}
6840 }
6841 #endif
6842 
6843 
6844 static status_t
6845 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6846 	bool kernel)
6847 {
6848 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6849 		name, kernel));
6850 
6851 	struct fs_mount* mount;
6852 	status_t status = get_mount(mountID, &mount);
6853 	if (status != B_OK)
6854 		return status;
6855 
6856 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6857 		status = EOPNOTSUPP;
6858 		goto out;
6859 	}
6860 
6861 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6862 
6863 out:
6864 	put_mount(mount);
6865 	return status;
6866 }
6867 
6868 
6869 static status_t
6870 index_remove(dev_t mountID, const char* name, bool kernel)
6871 {
6872 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6873 		name, kernel));
6874 
6875 	struct fs_mount* mount;
6876 	status_t status = get_mount(mountID, &mount);
6877 	if (status != B_OK)
6878 		return status;
6879 
6880 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6881 		status = EROFS;
6882 		goto out;
6883 	}
6884 
6885 	status = FS_MOUNT_CALL(mount, remove_index, name);
6886 
6887 out:
6888 	put_mount(mount);
6889 	return status;
6890 }
6891 
6892 
6893 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6894 		It would be nice if the FS would find some more kernel support
6895 		for them.
6896 		For example, query parsing should be moved into the kernel.
6897 */
6898 static int
6899 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6900 	int32 token, bool kernel)
6901 {
6902 	struct fs_mount* mount;
6903 	void* cookie;
6904 
6905 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6906 		query, kernel));
6907 
6908 	status_t status = get_mount(device, &mount);
6909 	if (status != B_OK)
6910 		return status;
6911 
6912 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6913 		status = EOPNOTSUPP;
6914 		goto error;
6915 	}
6916 
6917 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6918 		&cookie);
6919 	if (status != B_OK)
6920 		goto error;
6921 
6922 	// get fd for the index directory
6923 	int fd;
6924 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
6925 	if (fd >= 0)
6926 		return fd;
6927 
6928 	status = fd;
6929 
6930 	// something went wrong
6931 	FS_MOUNT_CALL(mount, close_query, cookie);
6932 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6933 
6934 error:
6935 	put_mount(mount);
6936 	return status;
6937 }
6938 
6939 
6940 static status_t
6941 query_close(struct file_descriptor* descriptor)
6942 {
6943 	struct fs_mount* mount = descriptor->u.mount;
6944 
6945 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6946 
6947 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6948 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6949 
6950 	return B_OK;
6951 }
6952 
6953 
6954 static void
6955 query_free_fd(struct file_descriptor* descriptor)
6956 {
6957 	struct fs_mount* mount = descriptor->u.mount;
6958 
6959 	if (mount != NULL) {
6960 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6961 		put_mount(mount);
6962 	}
6963 }
6964 
6965 
6966 static status_t
6967 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6968 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6969 {
6970 	struct fs_mount* mount = descriptor->u.mount;
6971 
6972 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6973 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6974 			bufferSize, _count);
6975 	}
6976 
6977 	return EOPNOTSUPP;
6978 }
6979 
6980 
6981 static status_t
6982 query_rewind(struct file_descriptor* descriptor)
6983 {
6984 	struct fs_mount* mount = descriptor->u.mount;
6985 
6986 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6987 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6988 
6989 	return EOPNOTSUPP;
6990 }
6991 
6992 
6993 //	#pragma mark - General File System functions
6994 
6995 
6996 static dev_t
6997 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
6998 	const char* args, bool kernel)
6999 {
7000 	struct ::fs_mount* mount;
7001 	status_t status = B_OK;
7002 	fs_volume* volume = NULL;
7003 	int32 layer = 0;
7004 
7005 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
7006 
7007 	// The path is always safe, we just have to make sure that fsName is
7008 	// almost valid - we can't make any assumptions about args, though.
7009 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7010 	// We'll get it from the DDM later.
7011 	if (fsName == NULL) {
7012 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7013 			return B_BAD_VALUE;
7014 	} else if (fsName[0] == '\0')
7015 		return B_BAD_VALUE;
7016 
7017 	RecursiveLocker mountOpLocker(sMountOpLock);
7018 
7019 	// Helper to delete a newly created file device on failure.
7020 	// Not exactly beautiful, but helps to keep the code below cleaner.
7021 	struct FileDeviceDeleter {
7022 		FileDeviceDeleter() : id(-1) {}
7023 		~FileDeviceDeleter()
7024 		{
7025 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7026 		}
7027 
7028 		partition_id id;
7029 	} fileDeviceDeleter;
7030 
7031 	// If the file system is not a "virtual" one, the device argument should
7032 	// point to a real file/device (if given at all).
7033 	// get the partition
7034 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7035 	KPartition* partition = NULL;
7036 	KPath normalizedDevice;
7037 	bool newlyCreatedFileDevice = false;
7038 
7039 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7040 		// normalize the device path
7041 		status = normalizedDevice.SetTo(device, true);
7042 		if (status != B_OK)
7043 			return status;
7044 
7045 		// get a corresponding partition from the DDM
7046 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7047 		if (partition == NULL) {
7048 			// Partition not found: This either means, the user supplied
7049 			// an invalid path, or the path refers to an image file. We try
7050 			// to let the DDM create a file device for the path.
7051 			partition_id deviceID = ddm->CreateFileDevice(
7052 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7053 			if (deviceID >= 0) {
7054 				partition = ddm->RegisterPartition(deviceID);
7055 				if (newlyCreatedFileDevice)
7056 					fileDeviceDeleter.id = deviceID;
7057 			}
7058 		}
7059 
7060 		if (!partition) {
7061 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7062 				normalizedDevice.Path()));
7063 			return B_ENTRY_NOT_FOUND;
7064 		}
7065 
7066 		device = normalizedDevice.Path();
7067 			// correct path to file device
7068 	}
7069 	PartitionRegistrar partitionRegistrar(partition, true);
7070 
7071 	// Write lock the partition's device. For the time being, we keep the lock
7072 	// until we're done mounting -- not nice, but ensure, that no-one is
7073 	// interfering.
7074 	// TODO: Just mark the partition busy while mounting!
7075 	KDiskDevice* diskDevice = NULL;
7076 	if (partition) {
7077 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7078 		if (!diskDevice) {
7079 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7080 			return B_ERROR;
7081 		}
7082 	}
7083 
7084 	DeviceWriteLocker writeLocker(diskDevice, true);
7085 		// this takes over the write lock acquired before
7086 
7087 	if (partition != NULL) {
7088 		// make sure, that the partition is not busy
7089 		if (partition->IsBusy()) {
7090 			TRACE(("fs_mount(): Partition is busy.\n"));
7091 			return B_BUSY;
7092 		}
7093 
7094 		// if no FS name had been supplied, we get it from the partition
7095 		if (fsName == NULL) {
7096 			KDiskSystem* diskSystem = partition->DiskSystem();
7097 			if (!diskSystem) {
7098 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7099 					"recognize it.\n"));
7100 				return B_BAD_VALUE;
7101 			}
7102 
7103 			if (!diskSystem->IsFileSystem()) {
7104 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7105 					"partitioning system.\n"));
7106 				return B_BAD_VALUE;
7107 			}
7108 
7109 			// The disk system name will not change, and the KDiskSystem
7110 			// object will not go away while the disk device is locked (and
7111 			// the partition has a reference to it), so this is safe.
7112 			fsName = diskSystem->Name();
7113 		}
7114 	}
7115 
7116 	mount = new(std::nothrow) (struct ::fs_mount);
7117 	if (mount == NULL)
7118 		return B_NO_MEMORY;
7119 
7120 	mount->device_name = strdup(device);
7121 		// "device" can be NULL
7122 
7123 	status = mount->entry_cache.Init();
7124 	if (status != B_OK)
7125 		goto err1;
7126 
7127 	// initialize structure
7128 	mount->id = sNextMountID++;
7129 	mount->partition = NULL;
7130 	mount->root_vnode = NULL;
7131 	mount->covers_vnode = NULL;
7132 	mount->unmounting = false;
7133 	mount->owns_file_device = false;
7134 	mount->volume = NULL;
7135 
7136 	// build up the volume(s)
7137 	while (true) {
7138 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7139 		if (layerFSName == NULL) {
7140 			if (layer == 0) {
7141 				status = B_NO_MEMORY;
7142 				goto err1;
7143 			}
7144 
7145 			break;
7146 		}
7147 
7148 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7149 		if (volume == NULL) {
7150 			status = B_NO_MEMORY;
7151 			free(layerFSName);
7152 			goto err1;
7153 		}
7154 
7155 		volume->id = mount->id;
7156 		volume->partition = partition != NULL ? partition->ID() : -1;
7157 		volume->layer = layer++;
7158 		volume->private_volume = NULL;
7159 		volume->ops = NULL;
7160 		volume->sub_volume = NULL;
7161 		volume->super_volume = NULL;
7162 		volume->file_system = NULL;
7163 		volume->file_system_name = NULL;
7164 
7165 		volume->file_system_name = get_file_system_name(layerFSName);
7166 		if (volume->file_system_name == NULL) {
7167 			status = B_NO_MEMORY;
7168 			free(layerFSName);
7169 			free(volume);
7170 			goto err1;
7171 		}
7172 
7173 		volume->file_system = get_file_system(layerFSName);
7174 		if (volume->file_system == NULL) {
7175 			status = ENODEV;
7176 			free(layerFSName);
7177 			free(volume->file_system_name);
7178 			free(volume);
7179 			goto err1;
7180 		}
7181 
7182 		if (mount->volume == NULL)
7183 			mount->volume = volume;
7184 		else {
7185 			volume->super_volume = mount->volume;
7186 			mount->volume->sub_volume = volume;
7187 			mount->volume = volume;
7188 		}
7189 	}
7190 
7191 	// insert mount struct into list before we call FS's mount() function
7192 	// so that vnodes can be created for this mount
7193 	mutex_lock(&sMountMutex);
7194 	hash_insert(sMountsTable, mount);
7195 	mutex_unlock(&sMountMutex);
7196 
7197 	ino_t rootID;
7198 
7199 	if (!sRoot) {
7200 		// we haven't mounted anything yet
7201 		if (strcmp(path, "/") != 0) {
7202 			status = B_ERROR;
7203 			goto err2;
7204 		}
7205 
7206 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7207 			args, &rootID);
7208 		if (status != 0)
7209 			goto err2;
7210 	} else {
7211 		status = path_to_vnode(path, true, &mount->covers_vnode, NULL, kernel);
7212 		if (status != B_OK)
7213 			goto err2;
7214 
7215 		// make sure covered_vnode is a directory
7216 		if (!S_ISDIR(mount->covers_vnode->Type())) {
7217 			status = B_NOT_A_DIRECTORY;
7218 			goto err3;
7219 		}
7220 
7221 		if (mount->covers_vnode->mount->root_vnode == mount->covers_vnode) {
7222 			// this is already a mount point
7223 			status = B_BUSY;
7224 			goto err3;
7225 		}
7226 
7227 		// mount it/them
7228 		fs_volume* volume = mount->volume;
7229 		while (volume) {
7230 			status = volume->file_system->mount(volume, device, flags, args,
7231 				&rootID);
7232 			if (status != B_OK) {
7233 				if (volume->sub_volume)
7234 					goto err4;
7235 				goto err3;
7236 			}
7237 
7238 			volume = volume->super_volume;
7239 		}
7240 
7241 		volume = mount->volume;
7242 		while (volume) {
7243 			if (volume->ops->all_layers_mounted != NULL)
7244 				volume->ops->all_layers_mounted(volume);
7245 			volume = volume->super_volume;
7246 		}
7247 	}
7248 
7249 	// the root node is supposed to be owned by the file system - it must
7250 	// exist at this point
7251 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7252 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7253 		panic("fs_mount: file system does not own its root node!\n");
7254 		status = B_ERROR;
7255 		goto err4;
7256 	}
7257 
7258 	// No race here, since fs_mount() is the only function changing
7259 	// covers_vnode (and holds sMountOpLock at that time).
7260 	rw_lock_write_lock(&sVnodeLock);
7261 	if (mount->covers_vnode)
7262 		mount->covers_vnode->covered_by = mount->root_vnode;
7263 	rw_lock_write_unlock(&sVnodeLock);
7264 
7265 	if (!sRoot) {
7266 		sRoot = mount->root_vnode;
7267 		mutex_lock(&sIOContextRootLock);
7268 		get_current_io_context(true)->root = sRoot;
7269 		mutex_unlock(&sIOContextRootLock);
7270 		inc_vnode_ref_count(sRoot);
7271 	}
7272 
7273 	// supply the partition (if any) with the mount cookie and mark it mounted
7274 	if (partition) {
7275 		partition->SetMountCookie(mount->volume->private_volume);
7276 		partition->SetVolumeID(mount->id);
7277 
7278 		// keep a partition reference as long as the partition is mounted
7279 		partitionRegistrar.Detach();
7280 		mount->partition = partition;
7281 		mount->owns_file_device = newlyCreatedFileDevice;
7282 		fileDeviceDeleter.id = -1;
7283 	}
7284 
7285 	notify_mount(mount->id,
7286 		mount->covers_vnode ? mount->covers_vnode->device : -1,
7287 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7288 
7289 	return mount->id;
7290 
7291 err4:
7292 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7293 err3:
7294 	if (mount->covers_vnode != NULL)
7295 		put_vnode(mount->covers_vnode);
7296 err2:
7297 	mutex_lock(&sMountMutex);
7298 	hash_remove(sMountsTable, mount);
7299 	mutex_unlock(&sMountMutex);
7300 err1:
7301 	delete mount;
7302 
7303 	return status;
7304 }
7305 
7306 
7307 static status_t
7308 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7309 {
7310 	struct fs_mount* mount;
7311 	status_t err;
7312 
7313 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7314 		kernel));
7315 
7316 	struct vnode* pathVnode = NULL;
7317 	if (path != NULL) {
7318 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7319 		if (err != B_OK)
7320 			return B_ENTRY_NOT_FOUND;
7321 	}
7322 
7323 	RecursiveLocker mountOpLocker(sMountOpLock);
7324 
7325 	// this lock is not strictly necessary, but here in case of KDEBUG
7326 	// to keep the ASSERT in find_mount() working.
7327 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7328 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7329 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7330 	if (mount == NULL) {
7331 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7332 			pathVnode);
7333 	}
7334 
7335 	if (path != NULL) {
7336 		put_vnode(pathVnode);
7337 
7338 		if (mount->root_vnode != pathVnode) {
7339 			// not mountpoint
7340 			return B_BAD_VALUE;
7341 		}
7342 	}
7343 
7344 	// if the volume is associated with a partition, lock the device of the
7345 	// partition as long as we are unmounting
7346 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7347 	KPartition* partition = mount->partition;
7348 	KDiskDevice* diskDevice = NULL;
7349 	if (partition != NULL) {
7350 		if (partition->Device() == NULL) {
7351 			dprintf("fs_unmount(): There is no device!\n");
7352 			return B_ERROR;
7353 		}
7354 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7355 		if (!diskDevice) {
7356 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7357 			return B_ERROR;
7358 		}
7359 	}
7360 	DeviceWriteLocker writeLocker(diskDevice, true);
7361 
7362 	// make sure, that the partition is not busy
7363 	if (partition != NULL) {
7364 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7365 			TRACE(("fs_unmount(): Partition is busy.\n"));
7366 			return B_BUSY;
7367 		}
7368 	}
7369 
7370 	// grab the vnode master mutex to keep someone from creating
7371 	// a vnode while we're figuring out if we can continue
7372 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7373 
7374 	bool disconnectedDescriptors = false;
7375 
7376 	while (true) {
7377 		bool busy = false;
7378 
7379 		// cycle through the list of vnodes associated with this mount and
7380 		// make sure all of them are not busy or have refs on them
7381 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7382 		while (struct vnode* vnode = iterator.Next()) {
7383 			// The root vnode ref_count needs to be 1 here (the mount has a
7384 			// reference).
7385 			if (vnode->IsBusy()
7386 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7387 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7388 				// there are still vnodes in use on this mount, so we cannot
7389 				// unmount yet
7390 				busy = true;
7391 				break;
7392 			}
7393 		}
7394 
7395 		if (!busy)
7396 			break;
7397 
7398 		if ((flags & B_FORCE_UNMOUNT) == 0)
7399 			return B_BUSY;
7400 
7401 		if (disconnectedDescriptors) {
7402 			// wait a bit until the last access is finished, and then try again
7403 			vnodesWriteLocker.Unlock();
7404 			snooze(100000);
7405 			// TODO: if there is some kind of bug that prevents the ref counts
7406 			// from getting back to zero, this will fall into an endless loop...
7407 			vnodesWriteLocker.Lock();
7408 			continue;
7409 		}
7410 
7411 		// the file system is still busy - but we're forced to unmount it,
7412 		// so let's disconnect all open file descriptors
7413 
7414 		mount->unmounting = true;
7415 			// prevent new vnodes from being created
7416 
7417 		vnodesWriteLocker.Unlock();
7418 
7419 		disconnect_mount_or_vnode_fds(mount, NULL);
7420 		disconnectedDescriptors = true;
7421 
7422 		vnodesWriteLocker.Lock();
7423 	}
7424 
7425 	// we can safely continue, mark all of the vnodes busy and this mount
7426 	// structure in unmounting state
7427 	mount->unmounting = true;
7428 
7429 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7430 	while (struct vnode* vnode = iterator.Next()) {
7431 		vnode->SetBusy(true);
7432 		vnode_to_be_freed(vnode);
7433 	}
7434 
7435 	// The ref_count of the root node is 1 at this point, see above why this is
7436 	mount->root_vnode->ref_count--;
7437 	vnode_to_be_freed(mount->root_vnode);
7438 
7439 	mount->covers_vnode->covered_by = NULL;
7440 
7441 	vnodesWriteLocker.Unlock();
7442 
7443 	put_vnode(mount->covers_vnode);
7444 
7445 	// Free all vnodes associated with this mount.
7446 	// They will be removed from the mount list by free_vnode(), so
7447 	// we don't have to do this.
7448 	while (struct vnode* vnode = mount->vnodes.Head())
7449 		free_vnode(vnode, false);
7450 
7451 	// remove the mount structure from the hash table
7452 	mutex_lock(&sMountMutex);
7453 	hash_remove(sMountsTable, mount);
7454 	mutex_unlock(&sMountMutex);
7455 
7456 	mountOpLocker.Unlock();
7457 
7458 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7459 	notify_unmount(mount->id);
7460 
7461 	// dereference the partition and mark it unmounted
7462 	if (partition) {
7463 		partition->SetVolumeID(-1);
7464 		partition->SetMountCookie(NULL);
7465 
7466 		if (mount->owns_file_device)
7467 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7468 		partition->Unregister();
7469 	}
7470 
7471 	delete mount;
7472 	return B_OK;
7473 }
7474 
7475 
7476 static status_t
7477 fs_sync(dev_t device)
7478 {
7479 	struct fs_mount* mount;
7480 	status_t status = get_mount(device, &mount);
7481 	if (status != B_OK)
7482 		return status;
7483 
7484 	struct vnode marker;
7485 	memset(&marker, 0, sizeof(marker));
7486 	marker.SetBusy(true);
7487 	marker.SetRemoved(true);
7488 
7489 	// First, synchronize all file caches
7490 
7491 	while (true) {
7492 		WriteLocker locker(sVnodeLock);
7493 			// Note: That's the easy way. Which is probably OK for sync(),
7494 			// since it's a relatively rare call and doesn't need to allow for
7495 			// a lot of concurrency. Using a read lock would be possible, but
7496 			// also more involved, since we had to lock the individual nodes
7497 			// and take care of the locking order, which we might not want to
7498 			// do while holding fs_mount::rlock.
7499 
7500 		// synchronize access to vnode list
7501 		recursive_lock_lock(&mount->rlock);
7502 
7503 		struct vnode* vnode;
7504 		if (!marker.IsRemoved()) {
7505 			vnode = mount->vnodes.GetNext(&marker);
7506 			mount->vnodes.Remove(&marker);
7507 			marker.SetRemoved(true);
7508 		} else
7509 			vnode = mount->vnodes.First();
7510 
7511 		while (vnode != NULL && (vnode->cache == NULL
7512 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7513 			// TODO: we could track writes (and writable mapped vnodes)
7514 			//	and have a simple flag that we could test for here
7515 			vnode = mount->vnodes.GetNext(vnode);
7516 		}
7517 
7518 		if (vnode != NULL) {
7519 			// insert marker vnode again
7520 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7521 			marker.SetRemoved(false);
7522 		}
7523 
7524 		recursive_lock_unlock(&mount->rlock);
7525 
7526 		if (vnode == NULL)
7527 			break;
7528 
7529 		vnode = lookup_vnode(mount->id, vnode->id);
7530 		if (vnode == NULL || vnode->IsBusy())
7531 			continue;
7532 
7533 		if (vnode->ref_count == 0) {
7534 			// this vnode has been unused before
7535 			vnode_used(vnode);
7536 		}
7537 		inc_vnode_ref_count(vnode);
7538 
7539 		locker.Unlock();
7540 
7541 		if (vnode->cache != NULL && !vnode->IsRemoved())
7542 			vnode->cache->WriteModified();
7543 
7544 		put_vnode(vnode);
7545 	}
7546 
7547 	// And then, let the file systems do their synchronizing work
7548 
7549 	if (HAS_FS_MOUNT_CALL(mount, sync))
7550 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7551 
7552 	put_mount(mount);
7553 	return status;
7554 }
7555 
7556 
7557 static status_t
7558 fs_read_info(dev_t device, struct fs_info* info)
7559 {
7560 	struct fs_mount* mount;
7561 	status_t status = get_mount(device, &mount);
7562 	if (status != B_OK)
7563 		return status;
7564 
7565 	memset(info, 0, sizeof(struct fs_info));
7566 
7567 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7568 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7569 
7570 	// fill in info the file system doesn't (have to) know about
7571 	if (status == B_OK) {
7572 		info->dev = mount->id;
7573 		info->root = mount->root_vnode->id;
7574 
7575 		fs_volume* volume = mount->volume;
7576 		while (volume->super_volume != NULL)
7577 			volume = volume->super_volume;
7578 
7579 		strlcpy(info->fsh_name, volume->file_system_name,
7580 			sizeof(info->fsh_name));
7581 		if (mount->device_name != NULL) {
7582 			strlcpy(info->device_name, mount->device_name,
7583 				sizeof(info->device_name));
7584 		}
7585 	}
7586 
7587 	// if the call is not supported by the file system, there are still
7588 	// the parts that we filled out ourselves
7589 
7590 	put_mount(mount);
7591 	return status;
7592 }
7593 
7594 
7595 static status_t
7596 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7597 {
7598 	struct fs_mount* mount;
7599 	status_t status = get_mount(device, &mount);
7600 	if (status != B_OK)
7601 		return status;
7602 
7603 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7604 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7605 	else
7606 		status = EROFS;
7607 
7608 	put_mount(mount);
7609 	return status;
7610 }
7611 
7612 
7613 static dev_t
7614 fs_next_device(int32* _cookie)
7615 {
7616 	struct fs_mount* mount = NULL;
7617 	dev_t device = *_cookie;
7618 
7619 	mutex_lock(&sMountMutex);
7620 
7621 	// Since device IDs are assigned sequentially, this algorithm
7622 	// does work good enough. It makes sure that the device list
7623 	// returned is sorted, and that no device is skipped when an
7624 	// already visited device got unmounted.
7625 
7626 	while (device < sNextMountID) {
7627 		mount = find_mount(device++);
7628 		if (mount != NULL && mount->volume->private_volume != NULL)
7629 			break;
7630 	}
7631 
7632 	*_cookie = device;
7633 
7634 	if (mount != NULL)
7635 		device = mount->id;
7636 	else
7637 		device = B_BAD_VALUE;
7638 
7639 	mutex_unlock(&sMountMutex);
7640 
7641 	return device;
7642 }
7643 
7644 
7645 ssize_t
7646 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7647 	void *buffer, size_t readBytes)
7648 {
7649 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7650 	if (attrFD < 0)
7651 		return attrFD;
7652 
7653 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7654 
7655 	_kern_close(attrFD);
7656 
7657 	return bytesRead;
7658 }
7659 
7660 
7661 static status_t
7662 get_cwd(char* buffer, size_t size, bool kernel)
7663 {
7664 	// Get current working directory from io context
7665 	struct io_context* context = get_current_io_context(kernel);
7666 	status_t status;
7667 
7668 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7669 
7670 	mutex_lock(&context->io_mutex);
7671 
7672 	struct vnode* vnode = context->cwd;
7673 	if (vnode)
7674 		inc_vnode_ref_count(vnode);
7675 
7676 	mutex_unlock(&context->io_mutex);
7677 
7678 	if (vnode) {
7679 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7680 		put_vnode(vnode);
7681 	} else
7682 		status = B_ERROR;
7683 
7684 	return status;
7685 }
7686 
7687 
7688 static status_t
7689 set_cwd(int fd, char* path, bool kernel)
7690 {
7691 	struct io_context* context;
7692 	struct vnode* vnode = NULL;
7693 	struct vnode* oldDirectory;
7694 	status_t status;
7695 
7696 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7697 
7698 	// Get vnode for passed path, and bail if it failed
7699 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7700 	if (status < 0)
7701 		return status;
7702 
7703 	if (!S_ISDIR(vnode->Type())) {
7704 		// nope, can't cwd to here
7705 		status = B_NOT_A_DIRECTORY;
7706 		goto err;
7707 	}
7708 
7709 	// Get current io context and lock
7710 	context = get_current_io_context(kernel);
7711 	mutex_lock(&context->io_mutex);
7712 
7713 	// save the old current working directory first
7714 	oldDirectory = context->cwd;
7715 	context->cwd = vnode;
7716 
7717 	mutex_unlock(&context->io_mutex);
7718 
7719 	if (oldDirectory)
7720 		put_vnode(oldDirectory);
7721 
7722 	return B_NO_ERROR;
7723 
7724 err:
7725 	put_vnode(vnode);
7726 	return status;
7727 }
7728 
7729 
7730 //	#pragma mark - kernel mirrored syscalls
7731 
7732 
7733 dev_t
7734 _kern_mount(const char* path, const char* device, const char* fsName,
7735 	uint32 flags, const char* args, size_t argsLength)
7736 {
7737 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7738 	if (pathBuffer.InitCheck() != B_OK)
7739 		return B_NO_MEMORY;
7740 
7741 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7742 }
7743 
7744 
7745 status_t
7746 _kern_unmount(const char* path, uint32 flags)
7747 {
7748 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7749 	if (pathBuffer.InitCheck() != B_OK)
7750 		return B_NO_MEMORY;
7751 
7752 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7753 }
7754 
7755 
7756 status_t
7757 _kern_read_fs_info(dev_t device, struct fs_info* info)
7758 {
7759 	if (info == NULL)
7760 		return B_BAD_VALUE;
7761 
7762 	return fs_read_info(device, info);
7763 }
7764 
7765 
7766 status_t
7767 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7768 {
7769 	if (info == NULL)
7770 		return B_BAD_VALUE;
7771 
7772 	return fs_write_info(device, info, mask);
7773 }
7774 
7775 
7776 status_t
7777 _kern_sync(void)
7778 {
7779 	// Note: _kern_sync() is also called from _user_sync()
7780 	int32 cookie = 0;
7781 	dev_t device;
7782 	while ((device = next_dev(&cookie)) >= 0) {
7783 		status_t status = fs_sync(device);
7784 		if (status != B_OK && status != B_BAD_VALUE) {
7785 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7786 				strerror(status));
7787 		}
7788 	}
7789 
7790 	return B_OK;
7791 }
7792 
7793 
7794 dev_t
7795 _kern_next_device(int32* _cookie)
7796 {
7797 	return fs_next_device(_cookie);
7798 }
7799 
7800 
7801 status_t
7802 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7803 	size_t infoSize)
7804 {
7805 	if (infoSize != sizeof(fd_info))
7806 		return B_BAD_VALUE;
7807 
7808 	struct io_context* context = NULL;
7809 	struct team* team = NULL;
7810 
7811 	cpu_status state = disable_interrupts();
7812 	GRAB_TEAM_LOCK();
7813 
7814 	bool contextLocked = false;
7815 	team = team_get_team_struct_locked(teamID);
7816 	if (team) {
7817 		// We cannot lock the IO context while holding the team lock, nor can
7818 		// we just drop the team lock, since it might be deleted in the
7819 		// meantime. team_remove_team() acquires the thread lock when removing
7820 		// the team from the team hash table, though. Hence we switch to the
7821 		// thread lock and use mutex_lock_threads_locked().
7822 		context = (io_context*)team->io_context;
7823 
7824 		GRAB_THREAD_LOCK();
7825 		RELEASE_TEAM_LOCK();
7826 		contextLocked = mutex_lock_threads_locked(&context->io_mutex) == B_OK;
7827 		RELEASE_THREAD_LOCK();
7828 	} else
7829 		RELEASE_TEAM_LOCK();
7830 
7831 	restore_interrupts(state);
7832 
7833 	if (!contextLocked) {
7834 		// team doesn't exit or seems to be gone
7835 		return B_BAD_TEAM_ID;
7836 	}
7837 
7838 	// the team cannot be deleted completely while we're owning its
7839 	// io_context mutex, so we can safely play with it now
7840 
7841 	uint32 slot = *_cookie;
7842 
7843 	struct file_descriptor* descriptor;
7844 	while (slot < context->table_size
7845 		&& (descriptor = context->fds[slot]) == NULL) {
7846 		slot++;
7847 	}
7848 
7849 	if (slot >= context->table_size) {
7850 		mutex_unlock(&context->io_mutex);
7851 		return B_ENTRY_NOT_FOUND;
7852 	}
7853 
7854 	info->number = slot;
7855 	info->open_mode = descriptor->open_mode;
7856 
7857 	struct vnode* vnode = fd_vnode(descriptor);
7858 	if (vnode != NULL) {
7859 		info->device = vnode->device;
7860 		info->node = vnode->id;
7861 	} else if (descriptor->u.mount != NULL) {
7862 		info->device = descriptor->u.mount->id;
7863 		info->node = -1;
7864 	}
7865 
7866 	mutex_unlock(&context->io_mutex);
7867 
7868 	*_cookie = slot + 1;
7869 	return B_OK;
7870 }
7871 
7872 
7873 int
7874 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7875 	int perms)
7876 {
7877 	if ((openMode & O_CREAT) != 0) {
7878 		return file_create_entry_ref(device, inode, name, openMode, perms,
7879 			true);
7880 	}
7881 
7882 	return file_open_entry_ref(device, inode, name, openMode, true);
7883 }
7884 
7885 
7886 /*!	\brief Opens a node specified by a FD + path pair.
7887 
7888 	At least one of \a fd and \a path must be specified.
7889 	If only \a fd is given, the function opens the node identified by this
7890 	FD. If only a path is given, this path is opened. If both are given and
7891 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7892 	of the directory (!) identified by \a fd.
7893 
7894 	\param fd The FD. May be < 0.
7895 	\param path The absolute or relative path. May be \c NULL.
7896 	\param openMode The open mode.
7897 	\return A FD referring to the newly opened node, or an error code,
7898 			if an error occurs.
7899 */
7900 int
7901 _kern_open(int fd, const char* path, int openMode, int perms)
7902 {
7903 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7904 	if (pathBuffer.InitCheck() != B_OK)
7905 		return B_NO_MEMORY;
7906 
7907 	if (openMode & O_CREAT)
7908 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7909 
7910 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7911 }
7912 
7913 
7914 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7915 
7916 	The supplied name may be \c NULL, in which case directory identified
7917 	by \a device and \a inode will be opened. Otherwise \a device and
7918 	\a inode identify the parent directory of the directory to be opened
7919 	and \a name its entry name.
7920 
7921 	\param device If \a name is specified the ID of the device the parent
7922 		   directory of the directory to be opened resides on, otherwise
7923 		   the device of the directory itself.
7924 	\param inode If \a name is specified the node ID of the parent
7925 		   directory of the directory to be opened, otherwise node ID of the
7926 		   directory itself.
7927 	\param name The entry name of the directory to be opened. If \c NULL,
7928 		   the \a device + \a inode pair identify the node to be opened.
7929 	\return The FD of the newly opened directory or an error code, if
7930 			something went wrong.
7931 */
7932 int
7933 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7934 {
7935 	return dir_open_entry_ref(device, inode, name, true);
7936 }
7937 
7938 
7939 /*!	\brief Opens a directory specified by a FD + path pair.
7940 
7941 	At least one of \a fd and \a path must be specified.
7942 	If only \a fd is given, the function opens the directory identified by this
7943 	FD. If only a path is given, this path is opened. If both are given and
7944 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7945 	of the directory (!) identified by \a fd.
7946 
7947 	\param fd The FD. May be < 0.
7948 	\param path The absolute or relative path. May be \c NULL.
7949 	\return A FD referring to the newly opened directory, or an error code,
7950 			if an error occurs.
7951 */
7952 int
7953 _kern_open_dir(int fd, const char* path)
7954 {
7955 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7956 	if (pathBuffer.InitCheck() != B_OK)
7957 		return B_NO_MEMORY;
7958 
7959 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7960 }
7961 
7962 
7963 status_t
7964 _kern_fcntl(int fd, int op, uint32 argument)
7965 {
7966 	return common_fcntl(fd, op, argument, true);
7967 }
7968 
7969 
7970 status_t
7971 _kern_fsync(int fd)
7972 {
7973 	return common_sync(fd, true);
7974 }
7975 
7976 
7977 status_t
7978 _kern_lock_node(int fd)
7979 {
7980 	return common_lock_node(fd, true);
7981 }
7982 
7983 
7984 status_t
7985 _kern_unlock_node(int fd)
7986 {
7987 	return common_unlock_node(fd, true);
7988 }
7989 
7990 
7991 status_t
7992 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
7993 	int perms)
7994 {
7995 	return dir_create_entry_ref(device, inode, name, perms, true);
7996 }
7997 
7998 
7999 /*!	\brief Creates a directory specified by a FD + path pair.
8000 
8001 	\a path must always be specified (it contains the name of the new directory
8002 	at least). If only a path is given, this path identifies the location at
8003 	which the directory shall be created. If both \a fd and \a path are given
8004 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8005 	of the directory (!) identified by \a fd.
8006 
8007 	\param fd The FD. May be < 0.
8008 	\param path The absolute or relative path. Must not be \c NULL.
8009 	\param perms The access permissions the new directory shall have.
8010 	\return \c B_OK, if the directory has been created successfully, another
8011 			error code otherwise.
8012 */
8013 status_t
8014 _kern_create_dir(int fd, const char* path, int perms)
8015 {
8016 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8017 	if (pathBuffer.InitCheck() != B_OK)
8018 		return B_NO_MEMORY;
8019 
8020 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8021 }
8022 
8023 
8024 status_t
8025 _kern_remove_dir(int fd, const char* path)
8026 {
8027 	if (path) {
8028 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8029 		if (pathBuffer.InitCheck() != B_OK)
8030 			return B_NO_MEMORY;
8031 
8032 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8033 	}
8034 
8035 	return dir_remove(fd, NULL, true);
8036 }
8037 
8038 
8039 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8040 
8041 	At least one of \a fd and \a path must be specified.
8042 	If only \a fd is given, the function the symlink to be read is the node
8043 	identified by this FD. If only a path is given, this path identifies the
8044 	symlink to be read. If both are given and the path is absolute, \a fd is
8045 	ignored; a relative path is reckoned off of the directory (!) identified
8046 	by \a fd.
8047 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8048 	will still be updated to reflect the required buffer size.
8049 
8050 	\param fd The FD. May be < 0.
8051 	\param path The absolute or relative path. May be \c NULL.
8052 	\param buffer The buffer into which the contents of the symlink shall be
8053 		   written.
8054 	\param _bufferSize A pointer to the size of the supplied buffer.
8055 	\return The length of the link on success or an appropriate error code
8056 */
8057 status_t
8058 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8059 {
8060 	if (path) {
8061 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8062 		if (pathBuffer.InitCheck() != B_OK)
8063 			return B_NO_MEMORY;
8064 
8065 		return common_read_link(fd, pathBuffer.LockBuffer(),
8066 			buffer, _bufferSize, true);
8067 	}
8068 
8069 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8070 }
8071 
8072 
8073 /*!	\brief Creates a symlink specified by a FD + path pair.
8074 
8075 	\a path must always be specified (it contains the name of the new symlink
8076 	at least). If only a path is given, this path identifies the location at
8077 	which the symlink shall be created. If both \a fd and \a path are given and
8078 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8079 	of the directory (!) identified by \a fd.
8080 
8081 	\param fd The FD. May be < 0.
8082 	\param toPath The absolute or relative path. Must not be \c NULL.
8083 	\param mode The access permissions the new symlink shall have.
8084 	\return \c B_OK, if the symlink has been created successfully, another
8085 			error code otherwise.
8086 */
8087 status_t
8088 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8089 {
8090 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8091 	if (pathBuffer.InitCheck() != B_OK)
8092 		return B_NO_MEMORY;
8093 
8094 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8095 		toPath, mode, true);
8096 }
8097 
8098 
8099 status_t
8100 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8101 	bool traverseLeafLink)
8102 {
8103 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8104 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8105 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8106 		return B_NO_MEMORY;
8107 
8108 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8109 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8110 }
8111 
8112 
8113 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8114 
8115 	\a path must always be specified (it contains at least the name of the entry
8116 	to be deleted). If only a path is given, this path identifies the entry
8117 	directly. If both \a fd and \a path are given and the path is absolute,
8118 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8119 	identified by \a fd.
8120 
8121 	\param fd The FD. May be < 0.
8122 	\param path The absolute or relative path. Must not be \c NULL.
8123 	\return \c B_OK, if the entry has been removed successfully, another
8124 			error code otherwise.
8125 */
8126 status_t
8127 _kern_unlink(int fd, const char* path)
8128 {
8129 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8130 	if (pathBuffer.InitCheck() != B_OK)
8131 		return B_NO_MEMORY;
8132 
8133 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8134 }
8135 
8136 
8137 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8138 		   by another FD + path pair.
8139 
8140 	\a oldPath and \a newPath must always be specified (they contain at least
8141 	the name of the entry). If only a path is given, this path identifies the
8142 	entry directly. If both a FD and a path are given and the path is absolute,
8143 	the FD is ignored; a relative path is reckoned off of the directory (!)
8144 	identified by the respective FD.
8145 
8146 	\param oldFD The FD of the old location. May be < 0.
8147 	\param oldPath The absolute or relative path of the old location. Must not
8148 		   be \c NULL.
8149 	\param newFD The FD of the new location. May be < 0.
8150 	\param newPath The absolute or relative path of the new location. Must not
8151 		   be \c NULL.
8152 	\return \c B_OK, if the entry has been moved successfully, another
8153 			error code otherwise.
8154 */
8155 status_t
8156 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8157 {
8158 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8159 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8160 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8161 		return B_NO_MEMORY;
8162 
8163 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8164 		newFD, newPathBuffer.LockBuffer(), true);
8165 }
8166 
8167 
8168 status_t
8169 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8170 {
8171 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8172 	if (pathBuffer.InitCheck() != B_OK)
8173 		return B_NO_MEMORY;
8174 
8175 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8176 		true);
8177 }
8178 
8179 
8180 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8181 
8182 	If only \a fd is given, the stat operation associated with the type
8183 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8184 	given, this path identifies the entry for whose node to retrieve the
8185 	stat data. If both \a fd and \a path are given and the path is absolute,
8186 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8187 	identified by \a fd and specifies the entry whose stat data shall be
8188 	retrieved.
8189 
8190 	\param fd The FD. May be < 0.
8191 	\param path The absolute or relative path. Must not be \c NULL.
8192 	\param traverseLeafLink If \a path is given, \c true specifies that the
8193 		   function shall not stick to symlinks, but traverse them.
8194 	\param stat The buffer the stat data shall be written into.
8195 	\param statSize The size of the supplied stat buffer.
8196 	\return \c B_OK, if the the stat data have been read successfully, another
8197 			error code otherwise.
8198 */
8199 status_t
8200 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8201 	struct stat* stat, size_t statSize)
8202 {
8203 	struct stat completeStat;
8204 	struct stat* originalStat = NULL;
8205 	status_t status;
8206 
8207 	if (statSize > sizeof(struct stat))
8208 		return B_BAD_VALUE;
8209 
8210 	// this supports different stat extensions
8211 	if (statSize < sizeof(struct stat)) {
8212 		originalStat = stat;
8213 		stat = &completeStat;
8214 	}
8215 
8216 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8217 
8218 	if (status == B_OK && originalStat != NULL)
8219 		memcpy(originalStat, stat, statSize);
8220 
8221 	return status;
8222 }
8223 
8224 
8225 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8226 
8227 	If only \a fd is given, the stat operation associated with the type
8228 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8229 	given, this path identifies the entry for whose node to write the
8230 	stat data. If both \a fd and \a path are given and the path is absolute,
8231 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8232 	identified by \a fd and specifies the entry whose stat data shall be
8233 	written.
8234 
8235 	\param fd The FD. May be < 0.
8236 	\param path The absolute or relative path. Must not be \c NULL.
8237 	\param traverseLeafLink If \a path is given, \c true specifies that the
8238 		   function shall not stick to symlinks, but traverse them.
8239 	\param stat The buffer containing the stat data to be written.
8240 	\param statSize The size of the supplied stat buffer.
8241 	\param statMask A mask specifying which parts of the stat data shall be
8242 		   written.
8243 	\return \c B_OK, if the the stat data have been written successfully,
8244 			another error code otherwise.
8245 */
8246 status_t
8247 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8248 	const struct stat* stat, size_t statSize, int statMask)
8249 {
8250 	struct stat completeStat;
8251 
8252 	if (statSize > sizeof(struct stat))
8253 		return B_BAD_VALUE;
8254 
8255 	// this supports different stat extensions
8256 	if (statSize < sizeof(struct stat)) {
8257 		memset((uint8*)&completeStat + statSize, 0,
8258 			sizeof(struct stat) - statSize);
8259 		memcpy(&completeStat, stat, statSize);
8260 		stat = &completeStat;
8261 	}
8262 
8263 	status_t status;
8264 
8265 	if (path) {
8266 		// path given: write the stat of the node referred to by (fd, path)
8267 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8268 		if (pathBuffer.InitCheck() != B_OK)
8269 			return B_NO_MEMORY;
8270 
8271 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8272 			traverseLeafLink, stat, statMask, true);
8273 	} else {
8274 		// no path given: get the FD and use the FD operation
8275 		struct file_descriptor* descriptor
8276 			= get_fd(get_current_io_context(true), fd);
8277 		if (descriptor == NULL)
8278 			return B_FILE_ERROR;
8279 
8280 		if (descriptor->ops->fd_write_stat)
8281 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8282 		else
8283 			status = EOPNOTSUPP;
8284 
8285 		put_fd(descriptor);
8286 	}
8287 
8288 	return status;
8289 }
8290 
8291 
8292 int
8293 _kern_open_attr_dir(int fd, const char* path)
8294 {
8295 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8296 	if (pathBuffer.InitCheck() != B_OK)
8297 		return B_NO_MEMORY;
8298 
8299 	if (path != NULL)
8300 		pathBuffer.SetTo(path);
8301 
8302 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
8303 }
8304 
8305 
8306 int
8307 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8308 	int openMode)
8309 {
8310 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8311 	if (pathBuffer.InitCheck() != B_OK)
8312 		return B_NO_MEMORY;
8313 
8314 	if ((openMode & O_CREAT) != 0) {
8315 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8316 			true);
8317 	}
8318 
8319 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8320 }
8321 
8322 
8323 status_t
8324 _kern_remove_attr(int fd, const char* name)
8325 {
8326 	return attr_remove(fd, name, true);
8327 }
8328 
8329 
8330 status_t
8331 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8332 	const char* toName)
8333 {
8334 	return attr_rename(fromFile, fromName, toFile, toName, true);
8335 }
8336 
8337 
8338 int
8339 _kern_open_index_dir(dev_t device)
8340 {
8341 	return index_dir_open(device, true);
8342 }
8343 
8344 
8345 status_t
8346 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8347 {
8348 	return index_create(device, name, type, flags, true);
8349 }
8350 
8351 
8352 status_t
8353 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8354 {
8355 	return index_name_read_stat(device, name, stat, true);
8356 }
8357 
8358 
8359 status_t
8360 _kern_remove_index(dev_t device, const char* name)
8361 {
8362 	return index_remove(device, name, true);
8363 }
8364 
8365 
8366 status_t
8367 _kern_getcwd(char* buffer, size_t size)
8368 {
8369 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8370 
8371 	// Call vfs to get current working directory
8372 	return get_cwd(buffer, size, true);
8373 }
8374 
8375 
8376 status_t
8377 _kern_setcwd(int fd, const char* path)
8378 {
8379 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8380 	if (pathBuffer.InitCheck() != B_OK)
8381 		return B_NO_MEMORY;
8382 
8383 	if (path != NULL)
8384 		pathBuffer.SetTo(path);
8385 
8386 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8387 }
8388 
8389 
8390 //	#pragma mark - userland syscalls
8391 
8392 
8393 dev_t
8394 _user_mount(const char* userPath, const char* userDevice,
8395 	const char* userFileSystem, uint32 flags, const char* userArgs,
8396 	size_t argsLength)
8397 {
8398 	char fileSystem[B_FILE_NAME_LENGTH];
8399 	KPath path, device;
8400 	char* args = NULL;
8401 	status_t status;
8402 
8403 	if (!IS_USER_ADDRESS(userPath)
8404 		|| !IS_USER_ADDRESS(userFileSystem)
8405 		|| !IS_USER_ADDRESS(userDevice))
8406 		return B_BAD_ADDRESS;
8407 
8408 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8409 		return B_NO_MEMORY;
8410 
8411 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8412 		return B_BAD_ADDRESS;
8413 
8414 	if (userFileSystem != NULL
8415 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8416 		return B_BAD_ADDRESS;
8417 
8418 	if (userDevice != NULL
8419 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8420 			< B_OK)
8421 		return B_BAD_ADDRESS;
8422 
8423 	if (userArgs != NULL && argsLength > 0) {
8424 		// this is a safety restriction
8425 		if (argsLength >= 65536)
8426 			return B_NAME_TOO_LONG;
8427 
8428 		args = (char*)malloc(argsLength + 1);
8429 		if (args == NULL)
8430 			return B_NO_MEMORY;
8431 
8432 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8433 			free(args);
8434 			return B_BAD_ADDRESS;
8435 		}
8436 	}
8437 	path.UnlockBuffer();
8438 	device.UnlockBuffer();
8439 
8440 	status = fs_mount(path.LockBuffer(),
8441 		userDevice != NULL ? device.Path() : NULL,
8442 		userFileSystem ? fileSystem : NULL, flags, args, false);
8443 
8444 	free(args);
8445 	return status;
8446 }
8447 
8448 
8449 status_t
8450 _user_unmount(const char* userPath, uint32 flags)
8451 {
8452 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8453 	if (pathBuffer.InitCheck() != B_OK)
8454 		return B_NO_MEMORY;
8455 
8456 	char* path = pathBuffer.LockBuffer();
8457 
8458 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8459 		return B_BAD_ADDRESS;
8460 
8461 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8462 }
8463 
8464 
8465 status_t
8466 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8467 {
8468 	struct fs_info info;
8469 	status_t status;
8470 
8471 	if (userInfo == NULL)
8472 		return B_BAD_VALUE;
8473 
8474 	if (!IS_USER_ADDRESS(userInfo))
8475 		return B_BAD_ADDRESS;
8476 
8477 	status = fs_read_info(device, &info);
8478 	if (status != B_OK)
8479 		return status;
8480 
8481 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8482 		return B_BAD_ADDRESS;
8483 
8484 	return B_OK;
8485 }
8486 
8487 
8488 status_t
8489 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8490 {
8491 	struct fs_info info;
8492 
8493 	if (userInfo == NULL)
8494 		return B_BAD_VALUE;
8495 
8496 	if (!IS_USER_ADDRESS(userInfo)
8497 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8498 		return B_BAD_ADDRESS;
8499 
8500 	return fs_write_info(device, &info, mask);
8501 }
8502 
8503 
8504 dev_t
8505 _user_next_device(int32* _userCookie)
8506 {
8507 	int32 cookie;
8508 	dev_t device;
8509 
8510 	if (!IS_USER_ADDRESS(_userCookie)
8511 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8512 		return B_BAD_ADDRESS;
8513 
8514 	device = fs_next_device(&cookie);
8515 
8516 	if (device >= B_OK) {
8517 		// update user cookie
8518 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8519 			return B_BAD_ADDRESS;
8520 	}
8521 
8522 	return device;
8523 }
8524 
8525 
8526 status_t
8527 _user_sync(void)
8528 {
8529 	return _kern_sync();
8530 }
8531 
8532 
8533 status_t
8534 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8535 	size_t infoSize)
8536 {
8537 	struct fd_info info;
8538 	uint32 cookie;
8539 
8540 	// only root can do this (or should root's group be enough?)
8541 	if (geteuid() != 0)
8542 		return B_NOT_ALLOWED;
8543 
8544 	if (infoSize != sizeof(fd_info))
8545 		return B_BAD_VALUE;
8546 
8547 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8548 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8549 		return B_BAD_ADDRESS;
8550 
8551 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8552 	if (status != B_OK)
8553 		return status;
8554 
8555 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8556 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8557 		return B_BAD_ADDRESS;
8558 
8559 	return status;
8560 }
8561 
8562 
8563 status_t
8564 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8565 	char* userPath, size_t pathLength)
8566 {
8567 	if (!IS_USER_ADDRESS(userPath))
8568 		return B_BAD_ADDRESS;
8569 
8570 	KPath path(B_PATH_NAME_LENGTH + 1);
8571 	if (path.InitCheck() != B_OK)
8572 		return B_NO_MEMORY;
8573 
8574 	// copy the leaf name onto the stack
8575 	char stackLeaf[B_FILE_NAME_LENGTH];
8576 	if (leaf) {
8577 		if (!IS_USER_ADDRESS(leaf))
8578 			return B_BAD_ADDRESS;
8579 
8580 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8581 		if (length < 0)
8582 			return length;
8583 		if (length >= B_FILE_NAME_LENGTH)
8584 			return B_NAME_TOO_LONG;
8585 
8586 		leaf = stackLeaf;
8587 	}
8588 
8589 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8590 		path.LockBuffer(), path.BufferSize());
8591 	if (status != B_OK)
8592 		return status;
8593 
8594 	path.UnlockBuffer();
8595 
8596 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8597 	if (length < 0)
8598 		return length;
8599 	if (length >= (int)pathLength)
8600 		return B_BUFFER_OVERFLOW;
8601 
8602 	return B_OK;
8603 }
8604 
8605 
8606 status_t
8607 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8608 {
8609 	if (userPath == NULL || buffer == NULL)
8610 		return B_BAD_VALUE;
8611 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8612 		return B_BAD_ADDRESS;
8613 
8614 	// copy path from userland
8615 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8616 	if (pathBuffer.InitCheck() != B_OK)
8617 		return B_NO_MEMORY;
8618 	char* path = pathBuffer.LockBuffer();
8619 
8620 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8621 		return B_BAD_ADDRESS;
8622 
8623 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8624 		false);
8625 	if (error != B_OK)
8626 		return error;
8627 
8628 	// copy back to userland
8629 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8630 	if (len < 0)
8631 		return len;
8632 	if (len >= B_PATH_NAME_LENGTH)
8633 		return B_BUFFER_OVERFLOW;
8634 
8635 	return B_OK;
8636 }
8637 
8638 
8639 int
8640 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8641 	int openMode, int perms)
8642 {
8643 	char name[B_FILE_NAME_LENGTH];
8644 
8645 	if (userName == NULL || device < 0 || inode < 0)
8646 		return B_BAD_VALUE;
8647 	if (!IS_USER_ADDRESS(userName)
8648 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8649 		return B_BAD_ADDRESS;
8650 
8651 	if ((openMode & O_CREAT) != 0) {
8652 		return file_create_entry_ref(device, inode, name, openMode, perms,
8653 		 false);
8654 	}
8655 
8656 	return file_open_entry_ref(device, inode, name, openMode, false);
8657 }
8658 
8659 
8660 int
8661 _user_open(int fd, const char* userPath, int openMode, int perms)
8662 {
8663 	KPath path(B_PATH_NAME_LENGTH + 1);
8664 	if (path.InitCheck() != B_OK)
8665 		return B_NO_MEMORY;
8666 
8667 	char* buffer = path.LockBuffer();
8668 
8669 	if (!IS_USER_ADDRESS(userPath)
8670 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8671 		return B_BAD_ADDRESS;
8672 
8673 	if ((openMode & O_CREAT) != 0)
8674 		return file_create(fd, buffer, openMode, perms, false);
8675 
8676 	return file_open(fd, buffer, openMode, false);
8677 }
8678 
8679 
8680 int
8681 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8682 {
8683 	if (userName != NULL) {
8684 		char name[B_FILE_NAME_LENGTH];
8685 
8686 		if (!IS_USER_ADDRESS(userName)
8687 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8688 			return B_BAD_ADDRESS;
8689 
8690 		return dir_open_entry_ref(device, inode, name, false);
8691 	}
8692 	return dir_open_entry_ref(device, inode, NULL, false);
8693 }
8694 
8695 
8696 int
8697 _user_open_dir(int fd, const char* userPath)
8698 {
8699 	if (userPath == NULL)
8700 		return dir_open(fd, NULL, false);
8701 
8702 	KPath path(B_PATH_NAME_LENGTH + 1);
8703 	if (path.InitCheck() != B_OK)
8704 		return B_NO_MEMORY;
8705 
8706 	char* buffer = path.LockBuffer();
8707 
8708 	if (!IS_USER_ADDRESS(userPath)
8709 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8710 		return B_BAD_ADDRESS;
8711 
8712 	return dir_open(fd, buffer, false);
8713 }
8714 
8715 
8716 /*!	\brief Opens a directory's parent directory and returns the entry name
8717 		   of the former.
8718 
8719 	Aside from that is returns the directory's entry name, this method is
8720 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8721 	equivalent, if \a userName is \c NULL.
8722 
8723 	If a name buffer is supplied and the name does not fit the buffer, the
8724 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8725 
8726 	\param fd A FD referring to a directory.
8727 	\param userName Buffer the directory's entry name shall be written into.
8728 		   May be \c NULL.
8729 	\param nameLength Size of the name buffer.
8730 	\return The file descriptor of the opened parent directory, if everything
8731 			went fine, an error code otherwise.
8732 */
8733 int
8734 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8735 {
8736 	bool kernel = false;
8737 
8738 	if (userName && !IS_USER_ADDRESS(userName))
8739 		return B_BAD_ADDRESS;
8740 
8741 	// open the parent dir
8742 	int parentFD = dir_open(fd, (char*)"..", kernel);
8743 	if (parentFD < 0)
8744 		return parentFD;
8745 	FDCloser fdCloser(parentFD, kernel);
8746 
8747 	if (userName) {
8748 		// get the vnodes
8749 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8750 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8751 		VNodePutter parentVNodePutter(parentVNode);
8752 		VNodePutter dirVNodePutter(dirVNode);
8753 		if (!parentVNode || !dirVNode)
8754 			return B_FILE_ERROR;
8755 
8756 		// get the vnode name
8757 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8758 		struct dirent* buffer = (struct dirent*)_buffer;
8759 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8760 			sizeof(_buffer), get_current_io_context(false));
8761 		if (status != B_OK)
8762 			return status;
8763 
8764 		// copy the name to the userland buffer
8765 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8766 		if (len < 0)
8767 			return len;
8768 		if (len >= (int)nameLength)
8769 			return B_BUFFER_OVERFLOW;
8770 	}
8771 
8772 	return fdCloser.Detach();
8773 }
8774 
8775 
8776 status_t
8777 _user_fcntl(int fd, int op, uint32 argument)
8778 {
8779 	status_t status = common_fcntl(fd, op, argument, false);
8780 	if (op == F_SETLKW)
8781 		syscall_restart_handle_post(status);
8782 
8783 	return status;
8784 }
8785 
8786 
8787 status_t
8788 _user_fsync(int fd)
8789 {
8790 	return common_sync(fd, false);
8791 }
8792 
8793 
8794 status_t
8795 _user_flock(int fd, int operation)
8796 {
8797 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8798 
8799 	// Check if the operation is valid
8800 	switch (operation & ~LOCK_NB) {
8801 		case LOCK_UN:
8802 		case LOCK_SH:
8803 		case LOCK_EX:
8804 			break;
8805 
8806 		default:
8807 			return B_BAD_VALUE;
8808 	}
8809 
8810 	struct file_descriptor* descriptor;
8811 	struct vnode* vnode;
8812 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8813 	if (descriptor == NULL)
8814 		return B_FILE_ERROR;
8815 
8816 	if (descriptor->type != FDTYPE_FILE) {
8817 		put_fd(descriptor);
8818 		return B_BAD_VALUE;
8819 	}
8820 
8821 	struct flock flock;
8822 	flock.l_start = 0;
8823 	flock.l_len = OFF_MAX;
8824 	flock.l_whence = 0;
8825 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8826 
8827 	status_t status;
8828 	if ((operation & LOCK_UN) != 0)
8829 		status = release_advisory_lock(vnode, &flock);
8830 	else {
8831 		status = acquire_advisory_lock(vnode,
8832 			thread_get_current_thread()->team->session_id, &flock,
8833 			(operation & LOCK_NB) == 0);
8834 	}
8835 
8836 	syscall_restart_handle_post(status);
8837 
8838 	put_fd(descriptor);
8839 	return status;
8840 }
8841 
8842 
8843 status_t
8844 _user_lock_node(int fd)
8845 {
8846 	return common_lock_node(fd, false);
8847 }
8848 
8849 
8850 status_t
8851 _user_unlock_node(int fd)
8852 {
8853 	return common_unlock_node(fd, false);
8854 }
8855 
8856 
8857 status_t
8858 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8859 	int perms)
8860 {
8861 	char name[B_FILE_NAME_LENGTH];
8862 	status_t status;
8863 
8864 	if (!IS_USER_ADDRESS(userName))
8865 		return B_BAD_ADDRESS;
8866 
8867 	status = user_strlcpy(name, userName, sizeof(name));
8868 	if (status < 0)
8869 		return status;
8870 
8871 	return dir_create_entry_ref(device, inode, name, perms, false);
8872 }
8873 
8874 
8875 status_t
8876 _user_create_dir(int fd, const char* userPath, int perms)
8877 {
8878 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8879 	if (pathBuffer.InitCheck() != B_OK)
8880 		return B_NO_MEMORY;
8881 
8882 	char* path = pathBuffer.LockBuffer();
8883 
8884 	if (!IS_USER_ADDRESS(userPath)
8885 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8886 		return B_BAD_ADDRESS;
8887 
8888 	return dir_create(fd, path, perms, false);
8889 }
8890 
8891 
8892 status_t
8893 _user_remove_dir(int fd, const char* userPath)
8894 {
8895 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8896 	if (pathBuffer.InitCheck() != B_OK)
8897 		return B_NO_MEMORY;
8898 
8899 	char* path = pathBuffer.LockBuffer();
8900 
8901 	if (userPath != NULL) {
8902 		if (!IS_USER_ADDRESS(userPath)
8903 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8904 			return B_BAD_ADDRESS;
8905 	}
8906 
8907 	return dir_remove(fd, userPath ? path : NULL, false);
8908 }
8909 
8910 
8911 status_t
8912 _user_read_link(int fd, const char* userPath, char* userBuffer,
8913 	size_t* userBufferSize)
8914 {
8915 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8916 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8917 		return B_NO_MEMORY;
8918 
8919 	size_t bufferSize;
8920 
8921 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8922 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
8923 		return B_BAD_ADDRESS;
8924 
8925 	char* path = pathBuffer.LockBuffer();
8926 	char* buffer = linkBuffer.LockBuffer();
8927 
8928 	if (userPath) {
8929 		if (!IS_USER_ADDRESS(userPath)
8930 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8931 			return B_BAD_ADDRESS;
8932 
8933 		if (bufferSize > B_PATH_NAME_LENGTH)
8934 			bufferSize = B_PATH_NAME_LENGTH;
8935 	}
8936 
8937 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8938 		&bufferSize, false);
8939 
8940 	// we also update the bufferSize in case of errors
8941 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8942 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
8943 		return B_BAD_ADDRESS;
8944 
8945 	if (status != B_OK)
8946 		return status;
8947 
8948 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8949 		return B_BAD_ADDRESS;
8950 
8951 	return B_OK;
8952 }
8953 
8954 
8955 status_t
8956 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8957 	int mode)
8958 {
8959 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8960 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8961 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8962 		return B_NO_MEMORY;
8963 
8964 	char* path = pathBuffer.LockBuffer();
8965 	char* toPath = toPathBuffer.LockBuffer();
8966 
8967 	if (!IS_USER_ADDRESS(userPath)
8968 		|| !IS_USER_ADDRESS(userToPath)
8969 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8970 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8971 		return B_BAD_ADDRESS;
8972 
8973 	return common_create_symlink(fd, path, toPath, mode, false);
8974 }
8975 
8976 
8977 status_t
8978 _user_create_link(int pathFD, const char* userPath, int toFD,
8979 	const char* userToPath, bool traverseLeafLink)
8980 {
8981 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8982 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8983 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8984 		return B_NO_MEMORY;
8985 
8986 	char* path = pathBuffer.LockBuffer();
8987 	char* toPath = toPathBuffer.LockBuffer();
8988 
8989 	if (!IS_USER_ADDRESS(userPath)
8990 		|| !IS_USER_ADDRESS(userToPath)
8991 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8992 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8993 		return B_BAD_ADDRESS;
8994 
8995 	status_t status = check_path(toPath);
8996 	if (status != B_OK)
8997 		return status;
8998 
8999 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9000 		false);
9001 }
9002 
9003 
9004 status_t
9005 _user_unlink(int fd, const char* userPath)
9006 {
9007 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9008 	if (pathBuffer.InitCheck() != B_OK)
9009 		return B_NO_MEMORY;
9010 
9011 	char* path = pathBuffer.LockBuffer();
9012 
9013 	if (!IS_USER_ADDRESS(userPath)
9014 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9015 		return B_BAD_ADDRESS;
9016 
9017 	return common_unlink(fd, path, false);
9018 }
9019 
9020 
9021 status_t
9022 _user_rename(int oldFD, const char* userOldPath, int newFD,
9023 	const char* userNewPath)
9024 {
9025 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9026 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9027 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9028 		return B_NO_MEMORY;
9029 
9030 	char* oldPath = oldPathBuffer.LockBuffer();
9031 	char* newPath = newPathBuffer.LockBuffer();
9032 
9033 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9034 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9035 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9036 		return B_BAD_ADDRESS;
9037 
9038 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9039 }
9040 
9041 
9042 status_t
9043 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9044 {
9045 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9046 	if (pathBuffer.InitCheck() != B_OK)
9047 		return B_NO_MEMORY;
9048 
9049 	char* path = pathBuffer.LockBuffer();
9050 
9051 	if (!IS_USER_ADDRESS(userPath)
9052 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9053 		return B_BAD_ADDRESS;
9054 	}
9055 
9056 	// split into directory vnode and filename path
9057 	char filename[B_FILE_NAME_LENGTH];
9058 	struct vnode* dir;
9059 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9060 	if (status != B_OK)
9061 		return status;
9062 
9063 	VNodePutter _(dir);
9064 
9065 	// the underlying FS needs to support creating FIFOs
9066 	if (!HAS_FS_CALL(dir, create_special_node))
9067 		return B_UNSUPPORTED;
9068 
9069 	// create the entry	-- the FIFO sub node is set up automatically
9070 	fs_vnode superVnode;
9071 	ino_t nodeID;
9072 	status = FS_CALL(dir, create_special_node, filename, NULL,
9073 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9074 
9075 	// create_special_node() acquired a reference for us that we don't need.
9076 	if (status == B_OK)
9077 		put_vnode(dir->mount->volume, nodeID);
9078 
9079 	return status;
9080 }
9081 
9082 
9083 status_t
9084 _user_create_pipe(int* userFDs)
9085 {
9086 	// rootfs should support creating FIFOs, but let's be sure
9087 	if (!HAS_FS_CALL(sRoot, create_special_node))
9088 		return B_UNSUPPORTED;
9089 
9090 	// create the node	-- the FIFO sub node is set up automatically
9091 	fs_vnode superVnode;
9092 	ino_t nodeID;
9093 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9094 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9095 	if (status != B_OK)
9096 		return status;
9097 
9098 	// We've got one reference to the node and need another one.
9099 	struct vnode* vnode;
9100 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9101 	if (status != B_OK) {
9102 		// that should not happen
9103 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9104 			sRoot->mount->id, sRoot->id);
9105 		return status;
9106 	}
9107 
9108 	// Everything looks good so far. Open two FDs for reading respectively
9109 	// writing.
9110 	int fds[2];
9111 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9112 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9113 
9114 	FDCloser closer0(fds[0], false);
9115 	FDCloser closer1(fds[1], false);
9116 
9117 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9118 
9119 	// copy FDs to userland
9120 	if (status == B_OK) {
9121 		if (!IS_USER_ADDRESS(userFDs)
9122 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9123 			status = B_BAD_ADDRESS;
9124 		}
9125 	}
9126 
9127 	// keep FDs, if everything went fine
9128 	if (status == B_OK) {
9129 		closer0.Detach();
9130 		closer1.Detach();
9131 	}
9132 
9133 	return status;
9134 }
9135 
9136 
9137 status_t
9138 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9139 {
9140 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9141 	if (pathBuffer.InitCheck() != B_OK)
9142 		return B_NO_MEMORY;
9143 
9144 	char* path = pathBuffer.LockBuffer();
9145 
9146 	if (!IS_USER_ADDRESS(userPath)
9147 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9148 		return B_BAD_ADDRESS;
9149 
9150 	return common_access(fd, path, mode, effectiveUserGroup, false);
9151 }
9152 
9153 
9154 status_t
9155 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9156 	struct stat* userStat, size_t statSize)
9157 {
9158 	struct stat stat;
9159 	status_t status;
9160 
9161 	if (statSize > sizeof(struct stat))
9162 		return B_BAD_VALUE;
9163 
9164 	if (!IS_USER_ADDRESS(userStat))
9165 		return B_BAD_ADDRESS;
9166 
9167 	if (userPath) {
9168 		// path given: get the stat of the node referred to by (fd, path)
9169 		if (!IS_USER_ADDRESS(userPath))
9170 			return B_BAD_ADDRESS;
9171 
9172 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9173 		if (pathBuffer.InitCheck() != B_OK)
9174 			return B_NO_MEMORY;
9175 
9176 		char* path = pathBuffer.LockBuffer();
9177 
9178 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9179 		if (length < B_OK)
9180 			return length;
9181 		if (length >= B_PATH_NAME_LENGTH)
9182 			return B_NAME_TOO_LONG;
9183 
9184 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9185 	} else {
9186 		// no path given: get the FD and use the FD operation
9187 		struct file_descriptor* descriptor
9188 			= get_fd(get_current_io_context(false), fd);
9189 		if (descriptor == NULL)
9190 			return B_FILE_ERROR;
9191 
9192 		if (descriptor->ops->fd_read_stat)
9193 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9194 		else
9195 			status = EOPNOTSUPP;
9196 
9197 		put_fd(descriptor);
9198 	}
9199 
9200 	if (status != B_OK)
9201 		return status;
9202 
9203 	return user_memcpy(userStat, &stat, statSize);
9204 }
9205 
9206 
9207 status_t
9208 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9209 	const struct stat* userStat, size_t statSize, int statMask)
9210 {
9211 	if (statSize > sizeof(struct stat))
9212 		return B_BAD_VALUE;
9213 
9214 	struct stat stat;
9215 
9216 	if (!IS_USER_ADDRESS(userStat)
9217 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9218 		return B_BAD_ADDRESS;
9219 
9220 	// clear additional stat fields
9221 	if (statSize < sizeof(struct stat))
9222 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9223 
9224 	status_t status;
9225 
9226 	if (userPath) {
9227 		// path given: write the stat of the node referred to by (fd, path)
9228 		if (!IS_USER_ADDRESS(userPath))
9229 			return B_BAD_ADDRESS;
9230 
9231 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9232 		if (pathBuffer.InitCheck() != B_OK)
9233 			return B_NO_MEMORY;
9234 
9235 		char* path = pathBuffer.LockBuffer();
9236 
9237 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9238 		if (length < B_OK)
9239 			return length;
9240 		if (length >= B_PATH_NAME_LENGTH)
9241 			return B_NAME_TOO_LONG;
9242 
9243 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9244 			statMask, false);
9245 	} else {
9246 		// no path given: get the FD and use the FD operation
9247 		struct file_descriptor* descriptor
9248 			= get_fd(get_current_io_context(false), fd);
9249 		if (descriptor == NULL)
9250 			return B_FILE_ERROR;
9251 
9252 		if (descriptor->ops->fd_write_stat) {
9253 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9254 				statMask);
9255 		} else
9256 			status = EOPNOTSUPP;
9257 
9258 		put_fd(descriptor);
9259 	}
9260 
9261 	return status;
9262 }
9263 
9264 
9265 int
9266 _user_open_attr_dir(int fd, const char* userPath)
9267 {
9268 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9269 	if (pathBuffer.InitCheck() != B_OK)
9270 		return B_NO_MEMORY;
9271 
9272 	char* path = pathBuffer.LockBuffer();
9273 
9274 	if (userPath != NULL) {
9275 		if (!IS_USER_ADDRESS(userPath)
9276 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9277 			return B_BAD_ADDRESS;
9278 	}
9279 
9280 	return attr_dir_open(fd, userPath ? path : NULL, false);
9281 }
9282 
9283 
9284 ssize_t
9285 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9286 	size_t readBytes)
9287 {
9288 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9289 	if (attr < 0)
9290 		return attr;
9291 
9292 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9293 	_user_close(attr);
9294 
9295 	return bytes;
9296 }
9297 
9298 
9299 ssize_t
9300 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9301 	const void* buffer, size_t writeBytes)
9302 {
9303 	// Try to support the BeOS typical truncation as well as the position
9304 	// argument
9305 	int attr = attr_create(fd, NULL, attribute, type,
9306 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9307 	if (attr < 0)
9308 		return attr;
9309 
9310 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9311 	_user_close(attr);
9312 
9313 	return bytes;
9314 }
9315 
9316 
9317 status_t
9318 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9319 {
9320 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9321 	if (attr < 0)
9322 		return attr;
9323 
9324 	struct file_descriptor* descriptor
9325 		= get_fd(get_current_io_context(false), attr);
9326 	if (descriptor == NULL) {
9327 		_user_close(attr);
9328 		return B_FILE_ERROR;
9329 	}
9330 
9331 	struct stat stat;
9332 	status_t status;
9333 	if (descriptor->ops->fd_read_stat)
9334 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9335 	else
9336 		status = EOPNOTSUPP;
9337 
9338 	put_fd(descriptor);
9339 	_user_close(attr);
9340 
9341 	if (status == B_OK) {
9342 		attr_info info;
9343 		info.type = stat.st_type;
9344 		info.size = stat.st_size;
9345 
9346 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9347 			return B_BAD_ADDRESS;
9348 	}
9349 
9350 	return status;
9351 }
9352 
9353 
9354 int
9355 _user_open_attr(int fd, const char* userPath, const char* userName,
9356 	uint32 type, int openMode)
9357 {
9358 	char name[B_FILE_NAME_LENGTH];
9359 
9360 	if (!IS_USER_ADDRESS(userName)
9361 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9362 		return B_BAD_ADDRESS;
9363 
9364 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9365 	if (pathBuffer.InitCheck() != B_OK)
9366 		return B_NO_MEMORY;
9367 
9368 	char* path = pathBuffer.LockBuffer();
9369 
9370 	if (userPath != NULL) {
9371 		if (!IS_USER_ADDRESS(userPath)
9372 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9373 			return B_BAD_ADDRESS;
9374 	}
9375 
9376 	if ((openMode & O_CREAT) != 0) {
9377 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9378 			false);
9379 	}
9380 
9381 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9382 }
9383 
9384 
9385 status_t
9386 _user_remove_attr(int fd, const char* userName)
9387 {
9388 	char name[B_FILE_NAME_LENGTH];
9389 
9390 	if (!IS_USER_ADDRESS(userName)
9391 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9392 		return B_BAD_ADDRESS;
9393 
9394 	return attr_remove(fd, name, false);
9395 }
9396 
9397 
9398 status_t
9399 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9400 	const char* userToName)
9401 {
9402 	if (!IS_USER_ADDRESS(userFromName)
9403 		|| !IS_USER_ADDRESS(userToName))
9404 		return B_BAD_ADDRESS;
9405 
9406 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9407 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9408 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9409 		return B_NO_MEMORY;
9410 
9411 	char* fromName = fromNameBuffer.LockBuffer();
9412 	char* toName = toNameBuffer.LockBuffer();
9413 
9414 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9415 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9416 		return B_BAD_ADDRESS;
9417 
9418 	return attr_rename(fromFile, fromName, toFile, toName, false);
9419 }
9420 
9421 
9422 int
9423 _user_open_index_dir(dev_t device)
9424 {
9425 	return index_dir_open(device, false);
9426 }
9427 
9428 
9429 status_t
9430 _user_create_index(dev_t device, const char* userName, uint32 type,
9431 	uint32 flags)
9432 {
9433 	char name[B_FILE_NAME_LENGTH];
9434 
9435 	if (!IS_USER_ADDRESS(userName)
9436 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9437 		return B_BAD_ADDRESS;
9438 
9439 	return index_create(device, name, type, flags, false);
9440 }
9441 
9442 
9443 status_t
9444 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9445 {
9446 	char name[B_FILE_NAME_LENGTH];
9447 	struct stat stat;
9448 	status_t status;
9449 
9450 	if (!IS_USER_ADDRESS(userName)
9451 		|| !IS_USER_ADDRESS(userStat)
9452 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9453 		return B_BAD_ADDRESS;
9454 
9455 	status = index_name_read_stat(device, name, &stat, false);
9456 	if (status == B_OK) {
9457 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9458 			return B_BAD_ADDRESS;
9459 	}
9460 
9461 	return status;
9462 }
9463 
9464 
9465 status_t
9466 _user_remove_index(dev_t device, const char* userName)
9467 {
9468 	char name[B_FILE_NAME_LENGTH];
9469 
9470 	if (!IS_USER_ADDRESS(userName)
9471 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9472 		return B_BAD_ADDRESS;
9473 
9474 	return index_remove(device, name, false);
9475 }
9476 
9477 
9478 status_t
9479 _user_getcwd(char* userBuffer, size_t size)
9480 {
9481 	if (size == 0)
9482 		return B_BAD_VALUE;
9483 	if (!IS_USER_ADDRESS(userBuffer))
9484 		return B_BAD_ADDRESS;
9485 
9486 	if (size > kMaxPathLength)
9487 		size = kMaxPathLength;
9488 
9489 	KPath pathBuffer(size);
9490 	if (pathBuffer.InitCheck() != B_OK)
9491 		return B_NO_MEMORY;
9492 
9493 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9494 
9495 	char* path = pathBuffer.LockBuffer();
9496 
9497 	status_t status = get_cwd(path, size, false);
9498 	if (status != B_OK)
9499 		return status;
9500 
9501 	// Copy back the result
9502 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9503 		return B_BAD_ADDRESS;
9504 
9505 	return status;
9506 }
9507 
9508 
9509 status_t
9510 _user_setcwd(int fd, const char* userPath)
9511 {
9512 	TRACE(("user_setcwd: path = %p\n", userPath));
9513 
9514 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9515 	if (pathBuffer.InitCheck() != B_OK)
9516 		return B_NO_MEMORY;
9517 
9518 	char* path = pathBuffer.LockBuffer();
9519 
9520 	if (userPath != NULL) {
9521 		if (!IS_USER_ADDRESS(userPath)
9522 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9523 			return B_BAD_ADDRESS;
9524 	}
9525 
9526 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9527 }
9528 
9529 
9530 status_t
9531 _user_change_root(const char* userPath)
9532 {
9533 	// only root is allowed to chroot()
9534 	if (geteuid() != 0)
9535 		return EPERM;
9536 
9537 	// alloc path buffer
9538 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9539 	if (pathBuffer.InitCheck() != B_OK)
9540 		return B_NO_MEMORY;
9541 
9542 	// copy userland path to kernel
9543 	char* path = pathBuffer.LockBuffer();
9544 	if (userPath != NULL) {
9545 		if (!IS_USER_ADDRESS(userPath)
9546 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9547 			return B_BAD_ADDRESS;
9548 	}
9549 
9550 	// get the vnode
9551 	struct vnode* vnode;
9552 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9553 	if (status != B_OK)
9554 		return status;
9555 
9556 	// set the new root
9557 	struct io_context* context = get_current_io_context(false);
9558 	mutex_lock(&sIOContextRootLock);
9559 	struct vnode* oldRoot = context->root;
9560 	context->root = vnode;
9561 	mutex_unlock(&sIOContextRootLock);
9562 
9563 	put_vnode(oldRoot);
9564 
9565 	return B_OK;
9566 }
9567 
9568 
9569 int
9570 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9571 	uint32 flags, port_id port, int32 token)
9572 {
9573 	char* query;
9574 
9575 	if (device < 0 || userQuery == NULL || queryLength == 0)
9576 		return B_BAD_VALUE;
9577 
9578 	// this is a safety restriction
9579 	if (queryLength >= 65536)
9580 		return B_NAME_TOO_LONG;
9581 
9582 	query = (char*)malloc(queryLength + 1);
9583 	if (query == NULL)
9584 		return B_NO_MEMORY;
9585 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9586 		free(query);
9587 		return B_BAD_ADDRESS;
9588 	}
9589 
9590 	int fd = query_open(device, query, flags, port, token, false);
9591 
9592 	free(query);
9593 	return fd;
9594 }
9595 
9596 
9597 #include "vfs_request_io.cpp"
9598