xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 9760dcae2038d47442f4658c2575844c6cf92c40)
1 /*
2  * Copyright 2005-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <disk_device_manager/KDiskDevice.h>
36 #include <disk_device_manager/KDiskDeviceManager.h>
37 #include <disk_device_manager/KDiskDeviceUtils.h>
38 #include <disk_device_manager/KDiskSystem.h>
39 #include <fd.h>
40 #include <file_cache.h>
41 #include <fs/node_monitor.h>
42 #include <khash.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <vfs.h>
53 #include <vm/vm.h>
54 #include <vm/VMCache.h>
55 
56 #include "EntryCache.h"
57 #include "fifo.h"
58 #include "IORequest.h"
59 #include "unused_vnodes.h"
60 #include "Vnode.h"
61 #include "../cache/vnode_store.h"
62 
63 
64 //#define TRACE_VFS
65 #ifdef TRACE_VFS
66 #	define TRACE(x) dprintf x
67 #	define FUNCTION(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #	define FUNCTION(x) ;
71 #endif
72 
73 #define ADD_DEBUGGER_COMMANDS
74 
75 
76 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
77 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
78 
79 #if KDEBUG
80 #	define FS_CALL(vnode, op, params...) \
81 		( HAS_FS_CALL(vnode, op) ? \
82 			vnode->ops->op(vnode->mount->volume, vnode, params) \
83 			: (panic("FS_CALL op " #op " is NULL"), 0))
84 #	define FS_CALL_NO_PARAMS(vnode, op) \
85 		( HAS_FS_CALL(vnode, op) ? \
86 			vnode->ops->op(vnode->mount->volume, vnode) \
87 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
88 #	define FS_MOUNT_CALL(mount, op, params...) \
89 		( HAS_FS_MOUNT_CALL(mount, op) ? \
90 			mount->volume->ops->op(mount->volume, params) \
91 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
92 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
93 		( HAS_FS_MOUNT_CALL(mount, op) ? \
94 			mount->volume->ops->op(mount->volume) \
95 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
96 #else
97 #	define FS_CALL(vnode, op, params...) \
98 			vnode->ops->op(vnode->mount->volume, vnode, params)
99 #	define FS_CALL_NO_PARAMS(vnode, op) \
100 			vnode->ops->op(vnode->mount->volume, vnode)
101 #	define FS_MOUNT_CALL(mount, op, params...) \
102 			mount->volume->ops->op(mount->volume, params)
103 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
104 			mount->volume->ops->op(mount->volume)
105 #endif
106 
107 
108 const static size_t kMaxPathLength = 65536;
109 	// The absolute maximum path length (for getcwd() - this is not depending
110 	// on PATH_MAX
111 
112 
113 struct vnode_hash_key {
114 	dev_t	device;
115 	ino_t	vnode;
116 };
117 
118 typedef DoublyLinkedList<vnode> VnodeList;
119 
120 /*!	\brief Structure to manage a mounted file system
121 
122 	Note: The root_vnode and covers_vnode fields (what others?) are
123 	initialized in fs_mount() and not changed afterwards. That is as soon
124 	as the mount is mounted and it is made sure it won't be unmounted
125 	(e.g. by holding a reference to a vnode of that mount) (read) access
126 	to those fields is always safe, even without additional locking. Morever
127 	while mounted the mount holds a reference to the covers_vnode, and thus
128 	making the access path vnode->mount->covers_vnode->mount->... safe if a
129 	reference to vnode is held (note that for the root mount covers_vnode
130 	is NULL, though).
131 */
132 struct fs_mount {
133 	fs_mount()
134 		:
135 		volume(NULL),
136 		device_name(NULL)
137 	{
138 		recursive_lock_init(&rlock, "mount rlock");
139 	}
140 
141 	~fs_mount()
142 	{
143 		recursive_lock_destroy(&rlock);
144 		free(device_name);
145 
146 		while (volume) {
147 			fs_volume* superVolume = volume->super_volume;
148 
149 			if (volume->file_system != NULL)
150 				put_module(volume->file_system->info.name);
151 
152 			free(volume->file_system_name);
153 			free(volume);
154 			volume = superVolume;
155 		}
156 	}
157 
158 	struct fs_mount* next;
159 	dev_t			id;
160 	fs_volume*		volume;
161 	char*			device_name;
162 	recursive_lock	rlock;	// guards the vnodes list
163 		// TODO: Make this a mutex! It is never used recursively.
164 	struct vnode*	root_vnode;
165 	struct vnode*	covers_vnode;
166 	KPartition*		partition;
167 	VnodeList		vnodes;
168 	EntryCache		entry_cache;
169 	bool			unmounting;
170 	bool			owns_file_device;
171 };
172 
173 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
174 	list_link		link;
175 	team_id			team;
176 	pid_t			session;
177 	off_t			start;
178 	off_t			end;
179 	bool			shared;
180 };
181 
182 typedef DoublyLinkedList<advisory_lock> LockList;
183 
184 struct advisory_locking {
185 	sem_id			lock;
186 	sem_id			wait_sem;
187 	LockList		locks;
188 
189 	advisory_locking()
190 		:
191 		lock(-1),
192 		wait_sem(-1)
193 	{
194 	}
195 
196 	~advisory_locking()
197 	{
198 		if (lock >= 0)
199 			delete_sem(lock);
200 		if (wait_sem >= 0)
201 			delete_sem(wait_sem);
202 	}
203 };
204 
205 /*!	\brief Guards sMountsTable.
206 
207 	The holder is allowed to read/write access the sMountsTable.
208 	Manipulation of the fs_mount structures themselves
209 	(and their destruction) requires different locks though.
210 */
211 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
212 
213 /*!	\brief Guards mount/unmount operations.
214 
215 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
216 	That is locking the lock ensures that no FS is mounted/unmounted. In
217 	particular this means that
218 	- sMountsTable will not be modified,
219 	- the fields immutable after initialization of the fs_mount structures in
220 	  sMountsTable will not be modified,
221 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
222 
223 	The thread trying to lock the lock must not hold sVnodeLock or
224 	sMountMutex.
225 */
226 static recursive_lock sMountOpLock;
227 
228 /*!	\brief Guards sVnodeTable.
229 
230 	The holder is allowed read/write access to sVnodeTable and to
231 	any unbusy vnode in that table, save to the immutable fields (device, id,
232 	private_node, mount) to which only read-only access is allowed.
233 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
234 	well as the busy, removed, unused flags, and the vnode's type can also be
235 	write access when holding a read lock to sVnodeLock *and* having the vnode
236 	locked. Writing access to covered_by requires to write lock sVnodeLock.
237 
238 	The thread trying to acquire the lock must not hold sMountMutex.
239 	You must not have this lock held when calling create_sem(), as this
240 	might call vfs_free_unused_vnodes() and thus cause a deadlock.
241 */
242 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
243 
244 /*!	\brief Guards io_context::root.
245 
246 	Must be held when setting or getting the io_context::root field.
247 	The only operation allowed while holding this lock besides getting or
248 	setting the field is inc_vnode_ref_count() on io_context::root.
249 */
250 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
251 
252 
253 #define VNODE_HASH_TABLE_SIZE 1024
254 static hash_table* sVnodeTable;
255 static struct vnode* sRoot;
256 
257 #define MOUNTS_HASH_TABLE_SIZE 16
258 static hash_table* sMountsTable;
259 static dev_t sNextMountID = 1;
260 
261 #define MAX_TEMP_IO_VECS 8
262 
263 mode_t __gUmask = 022;
264 
265 /* function declarations */
266 
267 static void free_unused_vnodes();
268 
269 // file descriptor operation prototypes
270 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
271 	void* buffer, size_t* _bytes);
272 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
273 	const void* buffer, size_t* _bytes);
274 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
275 	int seekType);
276 static void file_free_fd(struct file_descriptor* descriptor);
277 static status_t file_close(struct file_descriptor* descriptor);
278 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
279 	struct selectsync* sync);
280 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t dir_read(struct io_context* context,
283 	struct file_descriptor* descriptor, struct dirent* buffer,
284 	size_t bufferSize, uint32* _count);
285 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
286 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
287 static status_t dir_rewind(struct file_descriptor* descriptor);
288 static void dir_free_fd(struct file_descriptor* descriptor);
289 static status_t dir_close(struct file_descriptor* descriptor);
290 static status_t attr_dir_read(struct io_context* context,
291 	struct file_descriptor* descriptor, struct dirent* buffer,
292 	size_t bufferSize, uint32* _count);
293 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
294 static void attr_dir_free_fd(struct file_descriptor* descriptor);
295 static status_t attr_dir_close(struct file_descriptor* descriptor);
296 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
297 	void* buffer, size_t* _bytes);
298 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
299 	const void* buffer, size_t* _bytes);
300 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
301 	int seekType);
302 static void attr_free_fd(struct file_descriptor* descriptor);
303 static status_t attr_close(struct file_descriptor* descriptor);
304 static status_t attr_read_stat(struct file_descriptor* descriptor,
305 	struct stat* statData);
306 static status_t attr_write_stat(struct file_descriptor* descriptor,
307 	const struct stat* stat, int statMask);
308 static status_t index_dir_read(struct io_context* context,
309 	struct file_descriptor* descriptor, struct dirent* buffer,
310 	size_t bufferSize, uint32* _count);
311 static status_t index_dir_rewind(struct file_descriptor* descriptor);
312 static void index_dir_free_fd(struct file_descriptor* descriptor);
313 static status_t index_dir_close(struct file_descriptor* descriptor);
314 static status_t query_read(struct io_context* context,
315 	struct file_descriptor* descriptor, struct dirent* buffer,
316 	size_t bufferSize, uint32* _count);
317 static status_t query_rewind(struct file_descriptor* descriptor);
318 static void query_free_fd(struct file_descriptor* descriptor);
319 static status_t query_close(struct file_descriptor* descriptor);
320 
321 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
322 	void* buffer, size_t length);
323 static status_t common_read_stat(struct file_descriptor* descriptor,
324 	struct stat* statData);
325 static status_t common_write_stat(struct file_descriptor* descriptor,
326 	const struct stat* statData, int statMask);
327 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
328 	struct stat* stat, bool kernel);
329 
330 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
331 	bool traverseLeafLink, int count, bool kernel,
332 	struct vnode** _vnode, ino_t* _parentID);
333 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
334 	size_t bufferSize, bool kernel);
335 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
336 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
337 static void inc_vnode_ref_count(struct vnode* vnode);
338 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
339 	bool reenter);
340 static inline void put_vnode(struct vnode* vnode);
341 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
342 	bool kernel);
343 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
344 
345 
346 static struct fd_ops sFileOps = {
347 	file_read,
348 	file_write,
349 	file_seek,
350 	common_ioctl,
351 	NULL,		// set_flags
352 	file_select,
353 	file_deselect,
354 	NULL,		// read_dir()
355 	NULL,		// rewind_dir()
356 	common_read_stat,
357 	common_write_stat,
358 	file_close,
359 	file_free_fd
360 };
361 
362 static struct fd_ops sDirectoryOps = {
363 	NULL,		// read()
364 	NULL,		// write()
365 	NULL,		// seek()
366 	common_ioctl,
367 	NULL,		// set_flags
368 	NULL,		// select()
369 	NULL,		// deselect()
370 	dir_read,
371 	dir_rewind,
372 	common_read_stat,
373 	common_write_stat,
374 	dir_close,
375 	dir_free_fd
376 };
377 
378 static struct fd_ops sAttributeDirectoryOps = {
379 	NULL,		// read()
380 	NULL,		// write()
381 	NULL,		// seek()
382 	common_ioctl,
383 	NULL,		// set_flags
384 	NULL,		// select()
385 	NULL,		// deselect()
386 	attr_dir_read,
387 	attr_dir_rewind,
388 	common_read_stat,
389 	common_write_stat,
390 	attr_dir_close,
391 	attr_dir_free_fd
392 };
393 
394 static struct fd_ops sAttributeOps = {
395 	attr_read,
396 	attr_write,
397 	attr_seek,
398 	common_ioctl,
399 	NULL,		// set_flags
400 	NULL,		// select()
401 	NULL,		// deselect()
402 	NULL,		// read_dir()
403 	NULL,		// rewind_dir()
404 	attr_read_stat,
405 	attr_write_stat,
406 	attr_close,
407 	attr_free_fd
408 };
409 
410 static struct fd_ops sIndexDirectoryOps = {
411 	NULL,		// read()
412 	NULL,		// write()
413 	NULL,		// seek()
414 	NULL,		// ioctl()
415 	NULL,		// set_flags
416 	NULL,		// select()
417 	NULL,		// deselect()
418 	index_dir_read,
419 	index_dir_rewind,
420 	NULL,		// read_stat()
421 	NULL,		// write_stat()
422 	index_dir_close,
423 	index_dir_free_fd
424 };
425 
426 #if 0
427 static struct fd_ops sIndexOps = {
428 	NULL,		// read()
429 	NULL,		// write()
430 	NULL,		// seek()
431 	NULL,		// ioctl()
432 	NULL,		// set_flags
433 	NULL,		// select()
434 	NULL,		// deselect()
435 	NULL,		// dir_read()
436 	NULL,		// dir_rewind()
437 	index_read_stat,	// read_stat()
438 	NULL,		// write_stat()
439 	NULL,		// dir_close()
440 	NULL		// free_fd()
441 };
442 #endif
443 
444 static struct fd_ops sQueryOps = {
445 	NULL,		// read()
446 	NULL,		// write()
447 	NULL,		// seek()
448 	NULL,		// ioctl()
449 	NULL,		// set_flags
450 	NULL,		// select()
451 	NULL,		// deselect()
452 	query_read,
453 	query_rewind,
454 	NULL,		// read_stat()
455 	NULL,		// write_stat()
456 	query_close,
457 	query_free_fd
458 };
459 
460 
461 // VNodePutter
462 class VNodePutter {
463 public:
464 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
465 
466 	~VNodePutter()
467 	{
468 		Put();
469 	}
470 
471 	void SetTo(struct vnode* vnode)
472 	{
473 		Put();
474 		fVNode = vnode;
475 	}
476 
477 	void Put()
478 	{
479 		if (fVNode) {
480 			put_vnode(fVNode);
481 			fVNode = NULL;
482 		}
483 	}
484 
485 	struct vnode* Detach()
486 	{
487 		struct vnode* vnode = fVNode;
488 		fVNode = NULL;
489 		return vnode;
490 	}
491 
492 private:
493 	struct vnode* fVNode;
494 };
495 
496 
497 class FDCloser {
498 public:
499 	FDCloser() : fFD(-1), fKernel(true) {}
500 
501 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
502 
503 	~FDCloser()
504 	{
505 		Close();
506 	}
507 
508 	void SetTo(int fd, bool kernel)
509 	{
510 		Close();
511 		fFD = fd;
512 		fKernel = kernel;
513 	}
514 
515 	void Close()
516 	{
517 		if (fFD >= 0) {
518 			if (fKernel)
519 				_kern_close(fFD);
520 			else
521 				_user_close(fFD);
522 			fFD = -1;
523 		}
524 	}
525 
526 	int Detach()
527 	{
528 		int fd = fFD;
529 		fFD = -1;
530 		return fd;
531 	}
532 
533 private:
534 	int		fFD;
535 	bool	fKernel;
536 };
537 
538 
539 #if VFS_PAGES_IO_TRACING
540 
541 namespace VFSPagesIOTracing {
542 
543 class PagesIOTraceEntry : public AbstractTraceEntry {
544 protected:
545 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
546 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
547 		status_t status, size_t bytesTransferred)
548 		:
549 		fVnode(vnode),
550 		fMountID(vnode->mount->id),
551 		fNodeID(vnode->id),
552 		fCookie(cookie),
553 		fPos(pos),
554 		fCount(count),
555 		fFlags(flags),
556 		fBytesRequested(bytesRequested),
557 		fStatus(status),
558 		fBytesTransferred(bytesTransferred)
559 	{
560 		fVecs = (iovec*)alloc_tracing_buffer_memcpy(vecs, sizeof(iovec) * count,
561 			false);
562 	}
563 
564 	void AddDump(TraceOutput& out, const char* mode)
565 	{
566 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
567 			"pos: %lld, size: %lu, vecs: {", mode, fVnode, fMountID, fNodeID,
568 			fCookie, fPos, fBytesRequested);
569 
570 		if (fVecs != NULL) {
571 			for (uint32 i = 0; i < fCount; i++) {
572 				if (i > 0)
573 					out.Print(", ");
574 				out.Print("(%p, %lu)", fVecs[i].iov_base, fVecs[i].iov_len);
575 			}
576 		}
577 
578 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %lu",
579 			fFlags, fStatus, fBytesTransferred);
580 	}
581 
582 protected:
583 	struct vnode*	fVnode;
584 	dev_t			fMountID;
585 	ino_t			fNodeID;
586 	void*			fCookie;
587 	off_t			fPos;
588 	iovec*			fVecs;
589 	uint32			fCount;
590 	uint32			fFlags;
591 	size_t			fBytesRequested;
592 	status_t		fStatus;
593 	size_t			fBytesTransferred;
594 };
595 
596 
597 class ReadPages : public PagesIOTraceEntry {
598 public:
599 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
600 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
601 		status_t status, size_t bytesTransferred)
602 		:
603 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
604 			bytesRequested, status, bytesTransferred)
605 	{
606 		Initialized();
607 	}
608 
609 	virtual void AddDump(TraceOutput& out)
610 	{
611 		PagesIOTraceEntry::AddDump(out, "read");
612 	}
613 };
614 
615 
616 class WritePages : public PagesIOTraceEntry {
617 public:
618 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
619 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
620 		status_t status, size_t bytesTransferred)
621 		:
622 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
623 			bytesRequested, status, bytesTransferred)
624 	{
625 		Initialized();
626 	}
627 
628 	virtual void AddDump(TraceOutput& out)
629 	{
630 		PagesIOTraceEntry::AddDump(out, "write");
631 	}
632 };
633 
634 }	// namespace VFSPagesIOTracing
635 
636 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
637 #else
638 #	define TPIO(x) ;
639 #endif	// VFS_PAGES_IO_TRACING
640 
641 
642 static int
643 mount_compare(void* _m, const void* _key)
644 {
645 	struct fs_mount* mount = (fs_mount*)_m;
646 	const dev_t* id = (dev_t*)_key;
647 
648 	if (mount->id == *id)
649 		return 0;
650 
651 	return -1;
652 }
653 
654 
655 static uint32
656 mount_hash(void* _m, const void* _key, uint32 range)
657 {
658 	struct fs_mount* mount = (fs_mount*)_m;
659 	const dev_t* id = (dev_t*)_key;
660 
661 	if (mount)
662 		return mount->id % range;
663 
664 	return (uint32)*id % range;
665 }
666 
667 
668 /*! Finds the mounted device (the fs_mount structure) with the given ID.
669 	Note, you must hold the gMountMutex lock when you call this function.
670 */
671 static struct fs_mount*
672 find_mount(dev_t id)
673 {
674 	ASSERT_LOCKED_MUTEX(&sMountMutex);
675 
676 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
677 }
678 
679 
680 static status_t
681 get_mount(dev_t id, struct fs_mount** _mount)
682 {
683 	struct fs_mount* mount;
684 
685 	ReadLocker nodeLocker(sVnodeLock);
686 	MutexLocker mountLocker(sMountMutex);
687 
688 	mount = find_mount(id);
689 	if (mount == NULL)
690 		return B_BAD_VALUE;
691 
692 	struct vnode* rootNode = mount->root_vnode;
693 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
694 		// might have been called during a mount/unmount operation
695 		return B_BUSY;
696 	}
697 
698 	inc_vnode_ref_count(mount->root_vnode);
699 	*_mount = mount;
700 	return B_OK;
701 }
702 
703 
704 static void
705 put_mount(struct fs_mount* mount)
706 {
707 	if (mount)
708 		put_vnode(mount->root_vnode);
709 }
710 
711 
712 /*!	Tries to open the specified file system module.
713 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
714 	Returns a pointer to file system module interface, or NULL if it
715 	could not open the module.
716 */
717 static file_system_module_info*
718 get_file_system(const char* fsName)
719 {
720 	char name[B_FILE_NAME_LENGTH];
721 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
722 		// construct module name if we didn't get one
723 		// (we currently support only one API)
724 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
725 		fsName = NULL;
726 	}
727 
728 	file_system_module_info* info;
729 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
730 		return NULL;
731 
732 	return info;
733 }
734 
735 
736 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
737 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
738 	The name is allocated for you, and you have to free() it when you're
739 	done with it.
740 	Returns NULL if the required memory is not available.
741 */
742 static char*
743 get_file_system_name(const char* fsName)
744 {
745 	const size_t length = strlen("file_systems/");
746 
747 	if (strncmp(fsName, "file_systems/", length)) {
748 		// the name already seems to be the module's file name
749 		return strdup(fsName);
750 	}
751 
752 	fsName += length;
753 	const char* end = strchr(fsName, '/');
754 	if (end == NULL) {
755 		// this doesn't seem to be a valid name, but well...
756 		return strdup(fsName);
757 	}
758 
759 	// cut off the trailing /v1
760 
761 	char* name = (char*)malloc(end + 1 - fsName);
762 	if (name == NULL)
763 		return NULL;
764 
765 	strlcpy(name, fsName, end + 1 - fsName);
766 	return name;
767 }
768 
769 
770 /*!	Accepts a list of file system names separated by a colon, one for each
771 	layer and returns the file system name for the specified layer.
772 	The name is allocated for you, and you have to free() it when you're
773 	done with it.
774 	Returns NULL if the required memory is not available or if there is no
775 	name for the specified layer.
776 */
777 static char*
778 get_file_system_name_for_layer(const char* fsNames, int32 layer)
779 {
780 	while (layer >= 0) {
781 		const char* end = strchr(fsNames, ':');
782 		if (end == NULL) {
783 			if (layer == 0)
784 				return strdup(fsNames);
785 			return NULL;
786 		}
787 
788 		if (layer == 0) {
789 			size_t length = end - fsNames + 1;
790 			char* result = (char*)malloc(length);
791 			strlcpy(result, fsNames, length);
792 			return result;
793 		}
794 
795 		fsNames = end + 1;
796 		layer--;
797 	}
798 
799 	return NULL;
800 }
801 
802 
803 static int
804 vnode_compare(void* _vnode, const void* _key)
805 {
806 	struct vnode* vnode = (struct vnode*)_vnode;
807 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
808 
809 	if (vnode->device == key->device && vnode->id == key->vnode)
810 		return 0;
811 
812 	return -1;
813 }
814 
815 
816 static uint32
817 vnode_hash(void* _vnode, const void* _key, uint32 range)
818 {
819 	struct vnode* vnode = (struct vnode*)_vnode;
820 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
821 
822 #define VHASH(mountid, vnodeid) \
823 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
824 
825 	if (vnode != NULL)
826 		return VHASH(vnode->device, vnode->id) % range;
827 
828 	return VHASH(key->device, key->vnode) % range;
829 
830 #undef VHASH
831 }
832 
833 
834 static void
835 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
836 {
837 	RecursiveLocker _(mount->rlock);
838 	mount->vnodes.Add(vnode);
839 }
840 
841 
842 static void
843 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
844 {
845 	RecursiveLocker _(mount->rlock);
846 	mount->vnodes.Remove(vnode);
847 }
848 
849 
850 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
851 
852 	The caller must hold the sVnodeLock (read lock at least).
853 
854 	\param mountID the mount ID.
855 	\param vnodeID the node ID.
856 
857 	\return The vnode structure, if it was found in the hash table, \c NULL
858 			otherwise.
859 */
860 static struct vnode*
861 lookup_vnode(dev_t mountID, ino_t vnodeID)
862 {
863 	struct vnode_hash_key key;
864 
865 	key.device = mountID;
866 	key.vnode = vnodeID;
867 
868 	return (vnode*)hash_lookup(sVnodeTable, &key);
869 }
870 
871 
872 /*!	Creates a new vnode with the given mount and node ID.
873 	If the node already exists, it is returned instead and no new node is
874 	created. In either case -- but not, if an error occurs -- the function write
875 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
876 	error the lock is not not held on return.
877 
878 	\param mountID The mount ID.
879 	\param vnodeID The vnode ID.
880 	\param _vnode Will be set to the new vnode on success.
881 	\param _nodeCreated Will be set to \c true when the returned vnode has
882 		been newly created, \c false when it already existed. Will not be
883 		changed on error.
884 	\return \c B_OK, when the vnode was successfully created and inserted or
885 		a node with the given ID was found, \c B_NO_MEMORY or
886 		\c B_ENTRY_NOT_FOUND on error.
887 */
888 static status_t
889 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
890 	bool& _nodeCreated)
891 {
892 	FUNCTION(("create_new_vnode_and_lock()\n"));
893 
894 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
895 	if (vnode == NULL)
896 		return B_NO_MEMORY;
897 
898 	// initialize basic values
899 	memset(vnode, 0, sizeof(struct vnode));
900 	vnode->device = mountID;
901 	vnode->id = vnodeID;
902 	vnode->ref_count = 1;
903 	vnode->SetBusy(true);
904 
905 	// look up the the node -- it might have been added by someone else in the
906 	// meantime
907 	rw_lock_write_lock(&sVnodeLock);
908 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
909 	if (existingVnode != NULL) {
910 		free(vnode);
911 		_vnode = existingVnode;
912 		_nodeCreated = false;
913 		return B_OK;
914 	}
915 
916 	// get the mount structure
917 	mutex_lock(&sMountMutex);
918 	vnode->mount = find_mount(mountID);
919 	if (!vnode->mount || vnode->mount->unmounting) {
920 		mutex_unlock(&sMountMutex);
921 		rw_lock_write_unlock(&sVnodeLock);
922 		free(vnode);
923 		return B_ENTRY_NOT_FOUND;
924 	}
925 
926 	// add the vnode to the mount's node list and the hash table
927 	hash_insert(sVnodeTable, vnode);
928 	add_vnode_to_mount_list(vnode, vnode->mount);
929 
930 	mutex_unlock(&sMountMutex);
931 
932 	_vnode = vnode;
933 	_nodeCreated = true;
934 
935 	// keep the vnode lock locked
936 	return B_OK;
937 }
938 
939 
940 /*!	Frees the vnode and all resources it has acquired, and removes
941 	it from the vnode hash as well as from its mount structure.
942 	Will also make sure that any cache modifications are written back.
943 */
944 static void
945 free_vnode(struct vnode* vnode, bool reenter)
946 {
947 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
948 		vnode);
949 
950 	// write back any changes in this vnode's cache -- but only
951 	// if the vnode won't be deleted, in which case the changes
952 	// will be discarded
953 
954 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
955 		FS_CALL_NO_PARAMS(vnode, fsync);
956 
957 	// Note: If this vnode has a cache attached, there will still be two
958 	// references to that cache at this point. The last one belongs to the vnode
959 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
960 	// cache. Each but the last reference to a cache also includes a reference
961 	// to the vnode. The file cache, however, released its reference (cf.
962 	// file_cache_create()), so that this vnode's ref count has the chance to
963 	// ever drop to 0. Deleting the file cache now, will cause the next to last
964 	// cache reference to be released, which will also release a (no longer
965 	// existing) vnode reference. To avoid problems, we set the vnode's ref
966 	// count, so that it will neither become negative nor 0.
967 	vnode->ref_count = 2;
968 
969 	if (!vnode->IsUnpublished()) {
970 		if (vnode->IsRemoved())
971 			FS_CALL(vnode, remove_vnode, reenter);
972 		else
973 			FS_CALL(vnode, put_vnode, reenter);
974 	}
975 
976 	// If the vnode has a VMCache attached, make sure that it won't try to get
977 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
978 	// long as the vnode is busy and in the hash, that won't happen, but as
979 	// soon as we've removed it from the hash, it could reload the vnode -- with
980 	// a new cache attached!
981 	if (vnode->cache != NULL)
982 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
983 
984 	// The file system has removed the resources of the vnode now, so we can
985 	// make it available again (by removing the busy vnode from the hash).
986 	rw_lock_write_lock(&sVnodeLock);
987 	hash_remove(sVnodeTable, vnode);
988 	rw_lock_write_unlock(&sVnodeLock);
989 
990 	// if we have a VMCache attached, remove it
991 	if (vnode->cache)
992 		vnode->cache->ReleaseRef();
993 
994 	vnode->cache = NULL;
995 
996 	remove_vnode_from_mount_list(vnode, vnode->mount);
997 
998 	free(vnode);
999 }
1000 
1001 
1002 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1003 	if the counter dropped to 0.
1004 
1005 	The caller must, of course, own a reference to the vnode to call this
1006 	function.
1007 	The caller must not hold the sVnodeLock or the sMountMutex.
1008 
1009 	\param vnode the vnode.
1010 	\param alwaysFree don't move this vnode into the unused list, but really
1011 		   delete it if possible.
1012 	\param reenter \c true, if this function is called (indirectly) from within
1013 		   a file system. This will be passed to file system hooks only.
1014 	\return \c B_OK, if everything went fine, an error code otherwise.
1015 */
1016 static status_t
1017 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1018 {
1019 	ReadLocker locker(sVnodeLock);
1020 	AutoLocker<Vnode> nodeLocker(vnode);
1021 
1022 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1023 
1024 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1025 
1026 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1027 		vnode->ref_count));
1028 
1029 	if (oldRefCount != 1)
1030 		return B_OK;
1031 
1032 	if (vnode->IsBusy())
1033 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1034 
1035 	bool freeNode = false;
1036 	bool freeUnusedNodes = false;
1037 
1038 	// Just insert the vnode into an unused list if we don't need
1039 	// to delete it
1040 	if (vnode->IsRemoved() || alwaysFree) {
1041 		vnode_to_be_freed(vnode);
1042 		vnode->SetBusy(true);
1043 		freeNode = true;
1044 	} else
1045 		freeUnusedNodes = vnode_unused(vnode);
1046 
1047 	nodeLocker.Unlock();
1048 	locker.Unlock();
1049 
1050 	if (freeNode)
1051 		free_vnode(vnode, reenter);
1052 	else if (freeUnusedNodes)
1053 		free_unused_vnodes();
1054 
1055 	return B_OK;
1056 }
1057 
1058 
1059 /*!	\brief Increments the reference counter of the given vnode.
1060 
1061 	The caller must make sure that the node isn't deleted while this function
1062 	is called. This can be done either:
1063 	- by ensuring that a reference to the node exists and remains in existence,
1064 	  or
1065 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1066 	  or by holding sVnodeLock write locked.
1067 
1068 	In the second case the caller is responsible for dealing with the ref count
1069 	0 -> 1 transition. That is 1. this function must not be invoked when the
1070 	node is busy in the first place and 2. vnode_used() must be called for the
1071 	node.
1072 
1073 	\param vnode the vnode.
1074 */
1075 static void
1076 inc_vnode_ref_count(struct vnode* vnode)
1077 {
1078 	atomic_add(&vnode->ref_count, 1);
1079 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1080 		vnode->ref_count));
1081 }
1082 
1083 
1084 static bool
1085 is_special_node_type(int type)
1086 {
1087 	// at the moment only FIFOs are supported
1088 	return S_ISFIFO(type);
1089 }
1090 
1091 
1092 static status_t
1093 create_special_sub_node(struct vnode* vnode, uint32 flags)
1094 {
1095 	if (S_ISFIFO(vnode->Type()))
1096 		return create_fifo_vnode(vnode->mount->volume, vnode);
1097 
1098 	return B_BAD_VALUE;
1099 }
1100 
1101 
1102 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1103 
1104 	If the node is not yet in memory, it will be loaded.
1105 
1106 	The caller must not hold the sVnodeLock or the sMountMutex.
1107 
1108 	\param mountID the mount ID.
1109 	\param vnodeID the node ID.
1110 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1111 		   retrieved vnode structure shall be written.
1112 	\param reenter \c true, if this function is called (indirectly) from within
1113 		   a file system.
1114 	\return \c B_OK, if everything when fine, an error code otherwise.
1115 */
1116 static status_t
1117 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1118 	int reenter)
1119 {
1120 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1121 		_vnode));
1122 
1123 	rw_lock_read_lock(&sVnodeLock);
1124 
1125 	int32 tries = 2000;
1126 		// try for 10 secs
1127 restart:
1128 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1129 	AutoLocker<Vnode> nodeLocker(vnode);
1130 
1131 	if (vnode && vnode->IsBusy()) {
1132 		nodeLocker.Unlock();
1133 		rw_lock_read_unlock(&sVnodeLock);
1134 		if (!canWait || --tries < 0) {
1135 			// vnode doesn't seem to become unbusy
1136 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1137 				vnodeID);
1138 			return B_BUSY;
1139 		}
1140 		snooze(5000); // 5 ms
1141 		rw_lock_read_lock(&sVnodeLock);
1142 		goto restart;
1143 	}
1144 
1145 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1146 
1147 	status_t status;
1148 
1149 	if (vnode) {
1150 		if (vnode->ref_count == 0) {
1151 			// this vnode has been unused before
1152 			vnode_used(vnode);
1153 		}
1154 		inc_vnode_ref_count(vnode);
1155 
1156 		nodeLocker.Unlock();
1157 		rw_lock_read_unlock(&sVnodeLock);
1158 	} else {
1159 		// we need to create a new vnode and read it in
1160 		rw_lock_read_unlock(&sVnodeLock);
1161 			// unlock -- create_new_vnode_and_lock() write-locks on success
1162 		bool nodeCreated;
1163 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1164 			nodeCreated);
1165 		if (status != B_OK)
1166 			return status;
1167 
1168 		if (!nodeCreated) {
1169 			rw_lock_read_lock(&sVnodeLock);
1170 			rw_lock_write_unlock(&sVnodeLock);
1171 			goto restart;
1172 		}
1173 
1174 		rw_lock_write_unlock(&sVnodeLock);
1175 
1176 		int type;
1177 		uint32 flags;
1178 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1179 			&flags, reenter);
1180 		if (status == B_OK && vnode->private_node == NULL)
1181 			status = B_BAD_VALUE;
1182 
1183 		bool gotNode = status == B_OK;
1184 		bool publishSpecialSubNode = false;
1185 		if (gotNode) {
1186 			vnode->SetType(type);
1187 			publishSpecialSubNode = is_special_node_type(type)
1188 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1189 		}
1190 
1191 		if (gotNode && publishSpecialSubNode)
1192 			status = create_special_sub_node(vnode, flags);
1193 
1194 		if (status != B_OK) {
1195 			if (gotNode)
1196 				FS_CALL(vnode, put_vnode, reenter);
1197 
1198 			rw_lock_write_lock(&sVnodeLock);
1199 			hash_remove(sVnodeTable, vnode);
1200 			remove_vnode_from_mount_list(vnode, vnode->mount);
1201 			rw_lock_write_unlock(&sVnodeLock);
1202 
1203 			free(vnode);
1204 			return status;
1205 		}
1206 
1207 		rw_lock_read_lock(&sVnodeLock);
1208 		vnode->Lock();
1209 
1210 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1211 		vnode->SetBusy(false);
1212 
1213 		vnode->Unlock();
1214 		rw_lock_read_unlock(&sVnodeLock);
1215 	}
1216 
1217 	TRACE(("get_vnode: returning %p\n", vnode));
1218 
1219 	*_vnode = vnode;
1220 	return B_OK;
1221 }
1222 
1223 
1224 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1225 	if the counter dropped to 0.
1226 
1227 	The caller must, of course, own a reference to the vnode to call this
1228 	function.
1229 	The caller must not hold the sVnodeLock or the sMountMutex.
1230 
1231 	\param vnode the vnode.
1232 */
1233 static inline void
1234 put_vnode(struct vnode* vnode)
1235 {
1236 	dec_vnode_ref_count(vnode, false, false);
1237 }
1238 
1239 
1240 static void
1241 free_unused_vnodes(int32 level)
1242 {
1243 	unused_vnodes_check_started();
1244 
1245 	if (level == B_NO_LOW_RESOURCE) {
1246 		unused_vnodes_check_done();
1247 		return;
1248 	}
1249 
1250 	flush_hot_vnodes();
1251 
1252 	// determine how many nodes to free
1253 	uint32 count = 1;
1254 	{
1255 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1256 
1257 		switch (level) {
1258 			case B_LOW_RESOURCE_NOTE:
1259 				count = sUnusedVnodes / 100;
1260 				break;
1261 			case B_LOW_RESOURCE_WARNING:
1262 				count = sUnusedVnodes / 10;
1263 				break;
1264 			case B_LOW_RESOURCE_CRITICAL:
1265 				count = sUnusedVnodes;
1266 				break;
1267 		}
1268 
1269 		if (count > sUnusedVnodes)
1270 			count = sUnusedVnodes;
1271 	}
1272 
1273 	// Write back the modified pages of some unused vnodes and free them.
1274 
1275 	for (uint32 i = 0; i < count; i++) {
1276 		ReadLocker vnodesReadLocker(sVnodeLock);
1277 
1278 		// get the first node
1279 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1280 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1281 			&sUnusedVnodeList);
1282 		unusedVnodesLocker.Unlock();
1283 
1284 		if (vnode == NULL)
1285 			break;
1286 
1287 		// lock the node
1288 		AutoLocker<Vnode> nodeLocker(vnode);
1289 
1290 		// Check whether the node is still unused -- since we only append to the
1291 		// the tail of the unused queue, the vnode should still be at its head.
1292 		// Alternatively we could check its ref count for 0 and its busy flag,
1293 		// but if the node is no longer at the head of the queue, it means it
1294 		// has been touched in the meantime, i.e. it is no longer the least
1295 		// recently used unused vnode and we rather don't free it.
1296 		unusedVnodesLocker.Lock();
1297 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1298 			continue;
1299 		unusedVnodesLocker.Unlock();
1300 
1301 		ASSERT(!vnode->IsBusy());
1302 
1303 		// grab a reference
1304 		inc_vnode_ref_count(vnode);
1305 		vnode_used(vnode);
1306 
1307 		// write back changes and free the node
1308 		nodeLocker.Unlock();
1309 		vnodesReadLocker.Unlock();
1310 
1311 		if (vnode->cache != NULL)
1312 			vnode->cache->WriteModified();
1313 
1314 		dec_vnode_ref_count(vnode, true, false);
1315 			// this should free the vnode when it's still unused
1316 	}
1317 
1318 	unused_vnodes_check_done();
1319 }
1320 
1321 
1322 static void
1323 free_unused_vnodes()
1324 {
1325 	free_unused_vnodes(
1326 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY));
1327 }
1328 
1329 
1330 static void
1331 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1332 {
1333 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1334 
1335 	free_unused_vnodes(level);
1336 }
1337 
1338 
1339 static inline void
1340 put_advisory_locking(struct advisory_locking* locking)
1341 {
1342 	release_sem(locking->lock);
1343 }
1344 
1345 
1346 /*!	Returns the advisory_locking object of the \a vnode in case it
1347 	has one, and locks it.
1348 	You have to call put_advisory_locking() when you're done with
1349 	it.
1350 	Note, you must not have the vnode mutex locked when calling
1351 	this function.
1352 */
1353 static struct advisory_locking*
1354 get_advisory_locking(struct vnode* vnode)
1355 {
1356 	rw_lock_read_lock(&sVnodeLock);
1357 	vnode->Lock();
1358 
1359 	struct advisory_locking* locking = vnode->advisory_locking;
1360 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1361 
1362 	vnode->Unlock();
1363 	rw_lock_read_unlock(&sVnodeLock);
1364 
1365 	if (lock >= 0)
1366 		lock = acquire_sem(lock);
1367 	if (lock < 0) {
1368 		// This means the locking has been deleted in the mean time
1369 		// or had never existed in the first place - otherwise, we
1370 		// would get the lock at some point.
1371 		return NULL;
1372 	}
1373 
1374 	return locking;
1375 }
1376 
1377 
1378 /*!	Creates a locked advisory_locking object, and attaches it to the
1379 	given \a vnode.
1380 	Returns B_OK in case of success - also if the vnode got such an
1381 	object from someone else in the mean time, you'll still get this
1382 	one locked then.
1383 */
1384 static status_t
1385 create_advisory_locking(struct vnode* vnode)
1386 {
1387 	if (vnode == NULL)
1388 		return B_FILE_ERROR;
1389 
1390 	ObjectDeleter<advisory_locking> lockingDeleter;
1391 	struct advisory_locking* locking = NULL;
1392 
1393 	while (get_advisory_locking(vnode) == NULL) {
1394 		// no locking object set on the vnode yet, create one
1395 		if (locking == NULL) {
1396 			locking = new(std::nothrow) advisory_locking;
1397 			if (locking == NULL)
1398 				return B_NO_MEMORY;
1399 			lockingDeleter.SetTo(locking);
1400 
1401 			locking->wait_sem = create_sem(0, "advisory lock");
1402 			if (locking->wait_sem < 0)
1403 				return locking->wait_sem;
1404 
1405 			locking->lock = create_sem(0, "advisory locking");
1406 			if (locking->lock < 0)
1407 				return locking->lock;
1408 		}
1409 
1410 		// set our newly created locking object
1411 		ReadLocker _(sVnodeLock);
1412 		AutoLocker<Vnode> nodeLocker(vnode);
1413 		if (vnode->advisory_locking == NULL) {
1414 			vnode->advisory_locking = locking;
1415 			lockingDeleter.Detach();
1416 			return B_OK;
1417 		}
1418 	}
1419 
1420 	// The vnode already had a locking object. That's just as well.
1421 
1422 	return B_OK;
1423 }
1424 
1425 
1426 /*!	Retrieves the first lock that has been set by the current team.
1427 */
1428 static status_t
1429 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1430 {
1431 	struct advisory_locking* locking = get_advisory_locking(vnode);
1432 	if (locking == NULL)
1433 		return B_BAD_VALUE;
1434 
1435 	// TODO: this should probably get the flock by its file descriptor!
1436 	team_id team = team_get_current_team_id();
1437 	status_t status = B_BAD_VALUE;
1438 
1439 	LockList::Iterator iterator = locking->locks.GetIterator();
1440 	while (iterator.HasNext()) {
1441 		struct advisory_lock* lock = iterator.Next();
1442 
1443 		if (lock->team == team) {
1444 			flock->l_start = lock->start;
1445 			flock->l_len = lock->end - lock->start + 1;
1446 			status = B_OK;
1447 			break;
1448 		}
1449 	}
1450 
1451 	put_advisory_locking(locking);
1452 	return status;
1453 }
1454 
1455 
1456 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1457 	with the advisory_lock \a lock.
1458 */
1459 static bool
1460 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1461 {
1462 	if (flock == NULL)
1463 		return true;
1464 
1465 	return lock->start <= flock->l_start - 1 + flock->l_len
1466 		&& lock->end >= flock->l_start;
1467 }
1468 
1469 
1470 /*!	Removes the specified lock, or all locks of the calling team
1471 	if \a flock is NULL.
1472 */
1473 static status_t
1474 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1475 {
1476 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1477 
1478 	struct advisory_locking* locking = get_advisory_locking(vnode);
1479 	if (locking == NULL)
1480 		return B_OK;
1481 
1482 	// TODO: use the thread ID instead??
1483 	team_id team = team_get_current_team_id();
1484 	pid_t session = thread_get_current_thread()->team->session_id;
1485 
1486 	// find matching lock entries
1487 
1488 	LockList::Iterator iterator = locking->locks.GetIterator();
1489 	while (iterator.HasNext()) {
1490 		struct advisory_lock* lock = iterator.Next();
1491 		bool removeLock = false;
1492 
1493 		if (lock->session == session)
1494 			removeLock = true;
1495 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1496 			bool endsBeyond = false;
1497 			bool startsBefore = false;
1498 			if (flock != NULL) {
1499 				startsBefore = lock->start < flock->l_start;
1500 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1501 			}
1502 
1503 			if (!startsBefore && !endsBeyond) {
1504 				// lock is completely contained in flock
1505 				removeLock = true;
1506 			} else if (startsBefore && !endsBeyond) {
1507 				// cut the end of the lock
1508 				lock->end = flock->l_start - 1;
1509 			} else if (!startsBefore && endsBeyond) {
1510 				// cut the start of the lock
1511 				lock->start = flock->l_start + flock->l_len;
1512 			} else {
1513 				// divide the lock into two locks
1514 				struct advisory_lock* secondLock = new advisory_lock;
1515 				if (secondLock == NULL) {
1516 					// TODO: we should probably revert the locks we already
1517 					// changed... (ie. allocate upfront)
1518 					put_advisory_locking(locking);
1519 					return B_NO_MEMORY;
1520 				}
1521 
1522 				lock->end = flock->l_start - 1;
1523 
1524 				secondLock->team = lock->team;
1525 				secondLock->session = lock->session;
1526 				// values must already be normalized when getting here
1527 				secondLock->start = flock->l_start + flock->l_len;
1528 				secondLock->end = lock->end;
1529 				secondLock->shared = lock->shared;
1530 
1531 				locking->locks.Add(secondLock);
1532 			}
1533 		}
1534 
1535 		if (removeLock) {
1536 			// this lock is no longer used
1537 			iterator.Remove();
1538 			free(lock);
1539 		}
1540 	}
1541 
1542 	bool removeLocking = locking->locks.IsEmpty();
1543 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1544 
1545 	put_advisory_locking(locking);
1546 
1547 	if (removeLocking) {
1548 		// We can remove the whole advisory locking structure; it's no
1549 		// longer used
1550 		locking = get_advisory_locking(vnode);
1551 		if (locking != NULL) {
1552 			ReadLocker locker(sVnodeLock);
1553 			AutoLocker<Vnode> nodeLocker(vnode);
1554 
1555 			// the locking could have been changed in the mean time
1556 			if (locking->locks.IsEmpty()) {
1557 				vnode->advisory_locking = NULL;
1558 				nodeLocker.Unlock();
1559 				locker.Unlock();
1560 
1561 				// we've detached the locking from the vnode, so we can
1562 				// safely delete it
1563 				delete_sem(locking->lock);
1564 				delete_sem(locking->wait_sem);
1565 				delete locking;
1566 			} else {
1567 				// the locking is in use again
1568 				nodeLocker.Unlock();
1569 				locker.Unlock();
1570 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1571 			}
1572 		}
1573 	}
1574 
1575 	return B_OK;
1576 }
1577 
1578 
1579 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1580 	will wait for the lock to become available, if there are any collisions
1581 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1582 
1583 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1584 	BSD flock() semantics are used, that is, all children can unlock the file
1585 	in question (we even allow parents to remove the lock, though, but that
1586 	seems to be in line to what the BSD's are doing).
1587 */
1588 static status_t
1589 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1590 	bool wait)
1591 {
1592 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1593 		vnode, flock, wait ? "yes" : "no"));
1594 
1595 	bool shared = flock->l_type == F_RDLCK;
1596 	status_t status = B_OK;
1597 
1598 	// TODO: do deadlock detection!
1599 
1600 	struct advisory_locking* locking;
1601 	sem_id waitForLock;
1602 
1603 	while (true) {
1604 		// if this vnode has an advisory_locking structure attached,
1605 		// lock that one and search for any colliding file lock
1606 		status = create_advisory_locking(vnode);
1607 		if (status != B_OK)
1608 			return status;
1609 
1610 		locking = vnode->advisory_locking;
1611 		team_id team = team_get_current_team_id();
1612 		waitForLock = -1;
1613 
1614 		// test for collisions
1615 		LockList::Iterator iterator = locking->locks.GetIterator();
1616 		while (iterator.HasNext()) {
1617 			struct advisory_lock* lock = iterator.Next();
1618 
1619 			// TODO: locks from the same team might be joinable!
1620 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1621 				// locks do overlap
1622 				if (!shared || !lock->shared) {
1623 					// we need to wait
1624 					waitForLock = locking->wait_sem;
1625 					break;
1626 				}
1627 			}
1628 		}
1629 
1630 		if (waitForLock < 0)
1631 			break;
1632 
1633 		// We need to wait. Do that or fail now, if we've been asked not to.
1634 
1635 		if (!wait) {
1636 			put_advisory_locking(locking);
1637 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1638 		}
1639 
1640 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1641 			B_CAN_INTERRUPT, 0);
1642 		if (status != B_OK && status != B_BAD_SEM_ID)
1643 			return status;
1644 
1645 		// We have been notified, but we need to re-lock the locking object. So
1646 		// go another round...
1647 	}
1648 
1649 	// install new lock
1650 
1651 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1652 		sizeof(struct advisory_lock));
1653 	if (lock == NULL) {
1654 		if (waitForLock >= B_OK)
1655 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1656 		release_sem(locking->lock);
1657 		return B_NO_MEMORY;
1658 	}
1659 
1660 	lock->team = team_get_current_team_id();
1661 	lock->session = session;
1662 	// values must already be normalized when getting here
1663 	lock->start = flock->l_start;
1664 	lock->end = flock->l_start - 1 + flock->l_len;
1665 	lock->shared = shared;
1666 
1667 	locking->locks.Add(lock);
1668 	put_advisory_locking(locking);
1669 
1670 	return status;
1671 }
1672 
1673 
1674 /*!	Normalizes the \a flock structure to make it easier to compare the
1675 	structure with others. The l_start and l_len fields are set to absolute
1676 	values according to the l_whence field.
1677 */
1678 static status_t
1679 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1680 {
1681 	switch (flock->l_whence) {
1682 		case SEEK_SET:
1683 			break;
1684 		case SEEK_CUR:
1685 			flock->l_start += descriptor->pos;
1686 			break;
1687 		case SEEK_END:
1688 		{
1689 			struct vnode* vnode = descriptor->u.vnode;
1690 			struct stat stat;
1691 			status_t status;
1692 
1693 			if (!HAS_FS_CALL(vnode, read_stat))
1694 				return EOPNOTSUPP;
1695 
1696 			status = FS_CALL(vnode, read_stat, &stat);
1697 			if (status != B_OK)
1698 				return status;
1699 
1700 			flock->l_start += stat.st_size;
1701 			break;
1702 		}
1703 		default:
1704 			return B_BAD_VALUE;
1705 	}
1706 
1707 	if (flock->l_start < 0)
1708 		flock->l_start = 0;
1709 	if (flock->l_len == 0)
1710 		flock->l_len = OFF_MAX;
1711 
1712 	// don't let the offset and length overflow
1713 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1714 		flock->l_len = OFF_MAX - flock->l_start;
1715 
1716 	if (flock->l_len < 0) {
1717 		// a negative length reverses the region
1718 		flock->l_start += flock->l_len;
1719 		flock->l_len = -flock->l_len;
1720 	}
1721 
1722 	return B_OK;
1723 }
1724 
1725 
1726 static void
1727 replace_vnode_if_disconnected(struct fs_mount* mount,
1728 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1729 	struct vnode* fallBack, bool lockRootLock)
1730 {
1731 	if (lockRootLock)
1732 		mutex_lock(&sIOContextRootLock);
1733 
1734 	struct vnode* obsoleteVnode = NULL;
1735 
1736 	if (vnode != NULL && vnode->mount == mount
1737 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1738 		obsoleteVnode = vnode;
1739 
1740 		if (vnode == mount->root_vnode) {
1741 			// redirect the vnode to the covered vnode
1742 			vnode = mount->covers_vnode;
1743 		} else
1744 			vnode = fallBack;
1745 
1746 		if (vnode != NULL)
1747 			inc_vnode_ref_count(vnode);
1748 	}
1749 
1750 	if (lockRootLock)
1751 		mutex_unlock(&sIOContextRootLock);
1752 
1753 	if (obsoleteVnode != NULL)
1754 		put_vnode(obsoleteVnode);
1755 }
1756 
1757 
1758 /*!	Disconnects all file descriptors that are associated with the
1759 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1760 	\a mount object.
1761 
1762 	Note, after you've called this function, there might still be ongoing
1763 	accesses - they won't be interrupted if they already happened before.
1764 	However, any subsequent access will fail.
1765 
1766 	This is not a cheap function and should be used with care and rarely.
1767 	TODO: there is currently no means to stop a blocking read/write!
1768 */
1769 void
1770 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1771 	struct vnode* vnodeToDisconnect)
1772 {
1773 	// iterate over all teams and peek into their file descriptors
1774 	int32 nextTeamID = 0;
1775 
1776 	while (true) {
1777 		struct io_context* context = NULL;
1778 		bool contextLocked = false;
1779 		struct team* team = NULL;
1780 		team_id lastTeamID;
1781 
1782 		cpu_status state = disable_interrupts();
1783 		SpinLocker teamsLock(gTeamSpinlock);
1784 
1785 		lastTeamID = peek_next_thread_id();
1786 		if (nextTeamID < lastTeamID) {
1787 			// get next valid team
1788 			while (nextTeamID < lastTeamID
1789 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1790 				nextTeamID++;
1791 			}
1792 
1793 			if (team) {
1794 				context = (io_context*)team->io_context;
1795 
1796 				// Some acrobatics to lock the context in a safe way
1797 				// (cf. _kern_get_next_fd_info() for details).
1798 				GRAB_THREAD_LOCK();
1799 				teamsLock.Unlock();
1800 				contextLocked = mutex_lock_threads_locked(&context->io_mutex)
1801 					== B_OK;
1802 				RELEASE_THREAD_LOCK();
1803 
1804 				nextTeamID++;
1805 			}
1806 		}
1807 
1808 		teamsLock.Unlock();
1809 		restore_interrupts(state);
1810 
1811 		if (context == NULL)
1812 			break;
1813 
1814 		// we now have a context - since we couldn't lock it while having
1815 		// safe access to the team structure, we now need to lock the mutex
1816 		// manually
1817 
1818 		if (!contextLocked) {
1819 			// team seems to be gone, go over to the next team
1820 			continue;
1821 		}
1822 
1823 		// the team cannot be deleted completely while we're owning its
1824 		// io_context mutex, so we can safely play with it now
1825 
1826 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1827 			sRoot, true);
1828 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1829 			sRoot, false);
1830 
1831 		for (uint32 i = 0; i < context->table_size; i++) {
1832 			if (struct file_descriptor* descriptor = context->fds[i]) {
1833 				inc_fd_ref_count(descriptor);
1834 
1835 				// if this descriptor points at this mount, we
1836 				// need to disconnect it to be able to unmount
1837 				struct vnode* vnode = fd_vnode(descriptor);
1838 				if (vnodeToDisconnect != NULL) {
1839 					if (vnode == vnodeToDisconnect)
1840 						disconnect_fd(descriptor);
1841 				} else if ((vnode != NULL && vnode->mount == mount)
1842 					|| (vnode == NULL && descriptor->u.mount == mount))
1843 					disconnect_fd(descriptor);
1844 
1845 				put_fd(descriptor);
1846 			}
1847 		}
1848 
1849 		mutex_unlock(&context->io_mutex);
1850 	}
1851 }
1852 
1853 
1854 /*!	\brief Gets the root node of the current IO context.
1855 	If \a kernel is \c true, the kernel IO context will be used.
1856 	The caller obtains a reference to the returned node.
1857 */
1858 struct vnode*
1859 get_root_vnode(bool kernel)
1860 {
1861 	if (!kernel) {
1862 		// Get current working directory from io context
1863 		struct io_context* context = get_current_io_context(kernel);
1864 
1865 		mutex_lock(&sIOContextRootLock);
1866 
1867 		struct vnode* root = context->root;
1868 		if (root != NULL)
1869 			inc_vnode_ref_count(root);
1870 
1871 		mutex_unlock(&sIOContextRootLock);
1872 
1873 		if (root != NULL)
1874 			return root;
1875 
1876 		// That should never happen.
1877 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1878 			"root\n", team_get_current_team_id());
1879 	}
1880 
1881 	inc_vnode_ref_count(sRoot);
1882 	return sRoot;
1883 }
1884 
1885 
1886 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1887 		   by.
1888 
1889 	Given an arbitrary vnode, the function checks, whether the node is covered
1890 	by the root of a volume. If it is the function obtains a reference to the
1891 	volume root node and returns it.
1892 
1893 	\param vnode The vnode in question.
1894 	\return The volume root vnode the vnode cover is covered by, if it is
1895 			indeed a mount point, or \c NULL otherwise.
1896 */
1897 static struct vnode*
1898 resolve_mount_point_to_volume_root(struct vnode* vnode)
1899 {
1900 	if (!vnode)
1901 		return NULL;
1902 
1903 	struct vnode* volumeRoot = NULL;
1904 
1905 	rw_lock_read_lock(&sVnodeLock);
1906 
1907 	if (vnode->covered_by) {
1908 		volumeRoot = vnode->covered_by;
1909 		inc_vnode_ref_count(volumeRoot);
1910 	}
1911 
1912 	rw_lock_read_unlock(&sVnodeLock);
1913 
1914 	return volumeRoot;
1915 }
1916 
1917 
1918 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1919 		   by.
1920 
1921 	Given an arbitrary vnode (identified by mount and node ID), the function
1922 	checks, whether the node is covered by the root of a volume. If it is the
1923 	function returns the mount and node ID of the volume root node. Otherwise
1924 	it simply returns the supplied mount and node ID.
1925 
1926 	In case of error (e.g. the supplied node could not be found) the variables
1927 	for storing the resolved mount and node ID remain untouched and an error
1928 	code is returned.
1929 
1930 	\param mountID The mount ID of the vnode in question.
1931 	\param nodeID The node ID of the vnode in question.
1932 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1933 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1934 	\return
1935 	- \c B_OK, if everything went fine,
1936 	- another error code, if something went wrong.
1937 */
1938 status_t
1939 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1940 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1941 {
1942 	// get the node
1943 	struct vnode* node;
1944 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1945 	if (error != B_OK)
1946 		return error;
1947 
1948 	// resolve the node
1949 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
1950 	if (resolvedNode) {
1951 		put_vnode(node);
1952 		node = resolvedNode;
1953 	}
1954 
1955 	// set the return values
1956 	*resolvedMountID = node->device;
1957 	*resolvedNodeID = node->id;
1958 
1959 	put_vnode(node);
1960 
1961 	return B_OK;
1962 }
1963 
1964 
1965 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1966 
1967 	Given an arbitrary vnode, the function checks, whether the node is the
1968 	root of a volume. If it is (and if it is not "/"), the function obtains
1969 	a reference to the underlying mount point node and returns it.
1970 
1971 	\param vnode The vnode in question (caller must have a reference).
1972 	\return The mount point vnode the vnode covers, if it is indeed a volume
1973 			root and not "/", or \c NULL otherwise.
1974 */
1975 static struct vnode*
1976 resolve_volume_root_to_mount_point(struct vnode* vnode)
1977 {
1978 	if (!vnode)
1979 		return NULL;
1980 
1981 	struct vnode* mountPoint = NULL;
1982 
1983 	struct fs_mount* mount = vnode->mount;
1984 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1985 		mountPoint = mount->covers_vnode;
1986 		inc_vnode_ref_count(mountPoint);
1987 	}
1988 
1989 	return mountPoint;
1990 }
1991 
1992 
1993 /*!	\brief Gets the directory path and leaf name for a given path.
1994 
1995 	The supplied \a path is transformed to refer to the directory part of
1996 	the entry identified by the original path, and into the buffer \a filename
1997 	the leaf name of the original entry is written.
1998 	Neither the returned path nor the leaf name can be expected to be
1999 	canonical.
2000 
2001 	\param path The path to be analyzed. Must be able to store at least one
2002 		   additional character.
2003 	\param filename The buffer into which the leaf name will be written.
2004 		   Must be of size B_FILE_NAME_LENGTH at least.
2005 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2006 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2007 		   if the given path name is empty.
2008 */
2009 static status_t
2010 get_dir_path_and_leaf(char* path, char* filename)
2011 {
2012 	if (*path == '\0')
2013 		return B_ENTRY_NOT_FOUND;
2014 
2015 	char* last = strrchr(path, '/');
2016 		// '/' are not allowed in file names!
2017 
2018 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2019 
2020 	if (last == NULL) {
2021 		// this path is single segment with no '/' in it
2022 		// ex. "foo"
2023 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2024 			return B_NAME_TOO_LONG;
2025 
2026 		strcpy(path, ".");
2027 	} else {
2028 		last++;
2029 		if (last[0] == '\0') {
2030 			// special case: the path ends in one or more '/' - remove them
2031 			while (*--last == '/' && last != path);
2032 			last[1] = '\0';
2033 
2034 			if (last == path && last[0] == '/') {
2035 				// This path points to the root of the file system
2036 				strcpy(filename, ".");
2037 				return B_OK;
2038 			}
2039 			for (; last != path && *(last - 1) != '/'; last--);
2040 				// rewind to the start of the leaf before the '/'
2041 		}
2042 
2043 		// normal leaf: replace the leaf portion of the path with a '.'
2044 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2045 			return B_NAME_TOO_LONG;
2046 
2047 		last[0] = '.';
2048 		last[1] = '\0';
2049 	}
2050 	return B_OK;
2051 }
2052 
2053 
2054 static status_t
2055 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2056 	bool traverse, bool kernel, struct vnode** _vnode)
2057 {
2058 	char clonedName[B_FILE_NAME_LENGTH + 1];
2059 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2060 		return B_NAME_TOO_LONG;
2061 
2062 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2063 	struct vnode* directory;
2064 
2065 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2066 	if (status < 0)
2067 		return status;
2068 
2069 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2070 		_vnode, NULL);
2071 }
2072 
2073 
2074 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2075 	and returns the respective vnode.
2076 	On success a reference to the vnode is acquired for the caller.
2077 */
2078 static status_t
2079 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2080 {
2081 	ino_t id;
2082 
2083 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2084 		return get_vnode(dir->device, id, _vnode, true, false);
2085 
2086 	status_t status = FS_CALL(dir, lookup, name, &id);
2087 	if (status != B_OK)
2088 		return status;
2089 
2090 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2091 	// have a reference and just need to look the node up.
2092 	rw_lock_read_lock(&sVnodeLock);
2093 	*_vnode = lookup_vnode(dir->device, id);
2094 	rw_lock_read_unlock(&sVnodeLock);
2095 
2096 	if (*_vnode == NULL) {
2097 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2098 			"0x%Lx)\n", dir->device, id);
2099 		return B_ENTRY_NOT_FOUND;
2100 	}
2101 
2102 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2103 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2104 //		(*_vnode)->mount->id, (*_vnode)->id);
2105 
2106 	return B_OK;
2107 }
2108 
2109 
2110 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2111 	\a path must not be NULL.
2112 	If it returns successfully, \a path contains the name of the last path
2113 	component. This function clobbers the buffer pointed to by \a path only
2114 	if it does contain more than one component.
2115 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2116 	it is successful or not!
2117 */
2118 static status_t
2119 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2120 	int count, struct io_context* ioContext, struct vnode** _vnode,
2121 	ino_t* _parentID)
2122 {
2123 	status_t status = B_OK;
2124 	ino_t lastParentID = vnode->id;
2125 
2126 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2127 
2128 	if (path == NULL) {
2129 		put_vnode(vnode);
2130 		return B_BAD_VALUE;
2131 	}
2132 
2133 	if (*path == '\0') {
2134 		put_vnode(vnode);
2135 		return B_ENTRY_NOT_FOUND;
2136 	}
2137 
2138 	while (true) {
2139 		struct vnode* nextVnode;
2140 		char* nextPath;
2141 
2142 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2143 			path));
2144 
2145 		// done?
2146 		if (path[0] == '\0')
2147 			break;
2148 
2149 		// walk to find the next path component ("path" will point to a single
2150 		// path component), and filter out multiple slashes
2151 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2152 				nextPath++);
2153 
2154 		if (*nextPath == '/') {
2155 			*nextPath = '\0';
2156 			do
2157 				nextPath++;
2158 			while (*nextPath == '/');
2159 		}
2160 
2161 		// See if the '..' is at the root of a mount and move to the covered
2162 		// vnode so we pass the '..' path to the underlying filesystem.
2163 		// Also prevent breaking the root of the IO context.
2164 		if (strcmp("..", path) == 0) {
2165 			if (vnode == ioContext->root) {
2166 				// Attempted prison break! Keep it contained.
2167 				path = nextPath;
2168 				continue;
2169 			} else if (vnode->mount->root_vnode == vnode
2170 				&& vnode->mount->covers_vnode) {
2171 				nextVnode = vnode->mount->covers_vnode;
2172 				inc_vnode_ref_count(nextVnode);
2173 				put_vnode(vnode);
2174 				vnode = nextVnode;
2175 			}
2176 		}
2177 
2178 		// check if vnode is really a directory
2179 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2180 			status = B_NOT_A_DIRECTORY;
2181 
2182 		// Check if we have the right to search the current directory vnode.
2183 		// If a file system doesn't have the access() function, we assume that
2184 		// searching a directory is always allowed
2185 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2186 			status = FS_CALL(vnode, access, X_OK);
2187 
2188 		// Tell the filesystem to get the vnode of this path component (if we
2189 		// got the permission from the call above)
2190 		if (status == B_OK)
2191 			status = lookup_dir_entry(vnode, path, &nextVnode);
2192 
2193 		if (status != B_OK) {
2194 			put_vnode(vnode);
2195 			return status;
2196 		}
2197 
2198 		// If the new node is a symbolic link, resolve it (if we've been told
2199 		// to do it)
2200 		if (S_ISLNK(nextVnode->Type())
2201 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2202 			size_t bufferSize;
2203 			char* buffer;
2204 
2205 			TRACE(("traverse link\n"));
2206 
2207 			// it's not exactly nice style using goto in this way, but hey,
2208 			// it works :-/
2209 			if (count + 1 > B_MAX_SYMLINKS) {
2210 				status = B_LINK_LIMIT;
2211 				goto resolve_link_error;
2212 			}
2213 
2214 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2215 			if (buffer == NULL) {
2216 				status = B_NO_MEMORY;
2217 				goto resolve_link_error;
2218 			}
2219 
2220 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2221 				bufferSize--;
2222 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2223 				// null-terminate
2224 				if (status >= 0)
2225 					buffer[bufferSize] = '\0';
2226 			} else
2227 				status = B_BAD_VALUE;
2228 
2229 			if (status != B_OK) {
2230 				free(buffer);
2231 
2232 		resolve_link_error:
2233 				put_vnode(vnode);
2234 				put_vnode(nextVnode);
2235 
2236 				return status;
2237 			}
2238 			put_vnode(nextVnode);
2239 
2240 			// Check if we start from the root directory or the current
2241 			// directory ("vnode" still points to that one).
2242 			// Cut off all leading slashes if it's the root directory
2243 			path = buffer;
2244 			bool absoluteSymlink = false;
2245 			if (path[0] == '/') {
2246 				// we don't need the old directory anymore
2247 				put_vnode(vnode);
2248 
2249 				while (*++path == '/')
2250 					;
2251 
2252 				mutex_lock(&sIOContextRootLock);
2253 				vnode = ioContext->root;
2254 				inc_vnode_ref_count(vnode);
2255 				mutex_unlock(&sIOContextRootLock);
2256 
2257 				absoluteSymlink = true;
2258 			}
2259 
2260 			inc_vnode_ref_count(vnode);
2261 				// balance the next recursion - we will decrement the
2262 				// ref_count of the vnode, no matter if we succeeded or not
2263 
2264 			if (absoluteSymlink && *path == '\0') {
2265 				// symlink was just "/"
2266 				nextVnode = vnode;
2267 			} else {
2268 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2269 					ioContext, &nextVnode, &lastParentID);
2270 			}
2271 
2272 			free(buffer);
2273 
2274 			if (status != B_OK) {
2275 				put_vnode(vnode);
2276 				return status;
2277 			}
2278 		} else
2279 			lastParentID = vnode->id;
2280 
2281 		// decrease the ref count on the old dir we just looked up into
2282 		put_vnode(vnode);
2283 
2284 		path = nextPath;
2285 		vnode = nextVnode;
2286 
2287 		// see if we hit a mount point
2288 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2289 		if (mountPoint) {
2290 			put_vnode(vnode);
2291 			vnode = mountPoint;
2292 		}
2293 	}
2294 
2295 	*_vnode = vnode;
2296 	if (_parentID)
2297 		*_parentID = lastParentID;
2298 
2299 	return B_OK;
2300 }
2301 
2302 
2303 static status_t
2304 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2305 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2306 {
2307 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2308 		get_current_io_context(kernel), _vnode, _parentID);
2309 }
2310 
2311 
2312 static status_t
2313 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2314 	ino_t* _parentID, bool kernel)
2315 {
2316 	struct vnode* start = NULL;
2317 
2318 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2319 
2320 	if (!path)
2321 		return B_BAD_VALUE;
2322 
2323 	if (*path == '\0')
2324 		return B_ENTRY_NOT_FOUND;
2325 
2326 	// figure out if we need to start at root or at cwd
2327 	if (*path == '/') {
2328 		if (sRoot == NULL) {
2329 			// we're a bit early, aren't we?
2330 			return B_ERROR;
2331 		}
2332 
2333 		while (*++path == '/')
2334 			;
2335 		start = get_root_vnode(kernel);
2336 
2337 		if (*path == '\0') {
2338 			*_vnode = start;
2339 			return B_OK;
2340 		}
2341 
2342 	} else {
2343 		struct io_context* context = get_current_io_context(kernel);
2344 
2345 		mutex_lock(&context->io_mutex);
2346 		start = context->cwd;
2347 		if (start != NULL)
2348 			inc_vnode_ref_count(start);
2349 		mutex_unlock(&context->io_mutex);
2350 
2351 		if (start == NULL)
2352 			return B_ERROR;
2353 	}
2354 
2355 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2356 		_parentID);
2357 }
2358 
2359 
2360 /*! Returns the vnode in the next to last segment of the path, and returns
2361 	the last portion in filename.
2362 	The path buffer must be able to store at least one additional character.
2363 */
2364 static status_t
2365 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2366 	bool kernel)
2367 {
2368 	status_t status = get_dir_path_and_leaf(path, filename);
2369 	if (status != B_OK)
2370 		return status;
2371 
2372 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2373 }
2374 
2375 
2376 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2377 		   to by a FD + path pair.
2378 
2379 	\a path must be given in either case. \a fd might be omitted, in which
2380 	case \a path is either an absolute path or one relative to the current
2381 	directory. If both a supplied and \a path is relative it is reckoned off
2382 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2383 	ignored.
2384 
2385 	The caller has the responsibility to call put_vnode() on the returned
2386 	directory vnode.
2387 
2388 	\param fd The FD. May be < 0.
2389 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2390 	       is modified by this function. It must have at least room for a
2391 	       string one character longer than the path it contains.
2392 	\param _vnode A pointer to a variable the directory vnode shall be written
2393 		   into.
2394 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2395 		   the leaf name of the specified entry will be written.
2396 	\param kernel \c true, if invoked from inside the kernel, \c false if
2397 		   invoked from userland.
2398 	\return \c B_OK, if everything went fine, another error code otherwise.
2399 */
2400 static status_t
2401 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2402 	char* filename, bool kernel)
2403 {
2404 	if (!path)
2405 		return B_BAD_VALUE;
2406 	if (*path == '\0')
2407 		return B_ENTRY_NOT_FOUND;
2408 	if (fd < 0)
2409 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2410 
2411 	status_t status = get_dir_path_and_leaf(path, filename);
2412 	if (status != B_OK)
2413 		return status;
2414 
2415 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2416 }
2417 
2418 
2419 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2420 		   to by a vnode + path pair.
2421 
2422 	\a path must be given in either case. \a vnode might be omitted, in which
2423 	case \a path is either an absolute path or one relative to the current
2424 	directory. If both a supplied and \a path is relative it is reckoned off
2425 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2426 	ignored.
2427 
2428 	The caller has the responsibility to call put_vnode() on the returned
2429 	directory vnode.
2430 
2431 	\param vnode The vnode. May be \c NULL.
2432 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2433 	       is modified by this function. It must have at least room for a
2434 	       string one character longer than the path it contains.
2435 	\param _vnode A pointer to a variable the directory vnode shall be written
2436 		   into.
2437 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2438 		   the leaf name of the specified entry will be written.
2439 	\param kernel \c true, if invoked from inside the kernel, \c false if
2440 		   invoked from userland.
2441 	\return \c B_OK, if everything went fine, another error code otherwise.
2442 */
2443 static status_t
2444 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2445 	struct vnode** _vnode, char* filename, bool kernel)
2446 {
2447 	if (!path)
2448 		return B_BAD_VALUE;
2449 	if (*path == '\0')
2450 		return B_ENTRY_NOT_FOUND;
2451 	if (vnode == NULL || path[0] == '/')
2452 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2453 
2454 	status_t status = get_dir_path_and_leaf(path, filename);
2455 	if (status != B_OK)
2456 		return status;
2457 
2458 	inc_vnode_ref_count(vnode);
2459 		// vnode_path_to_vnode() always decrements the ref count
2460 
2461 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2462 }
2463 
2464 
2465 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2466 */
2467 static status_t
2468 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2469 	size_t bufferSize, struct io_context* ioContext)
2470 {
2471 	if (bufferSize < sizeof(struct dirent))
2472 		return B_BAD_VALUE;
2473 
2474 	// See if vnode is the root of a mount and move to the covered
2475 	// vnode so we get the underlying file system
2476 	VNodePutter vnodePutter;
2477 	if (vnode->mount->root_vnode == vnode
2478 		&& vnode->mount->covers_vnode != NULL) {
2479 		vnode = vnode->mount->covers_vnode;
2480 		inc_vnode_ref_count(vnode);
2481 		vnodePutter.SetTo(vnode);
2482 	}
2483 
2484 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2485 		// The FS supports getting the name of a vnode.
2486 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2487 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2488 			return B_OK;
2489 	}
2490 
2491 	// The FS doesn't support getting the name of a vnode. So we search the
2492 	// parent directory for the vnode, if the caller let us.
2493 
2494 	if (parent == NULL)
2495 		return EOPNOTSUPP;
2496 
2497 	void* cookie;
2498 
2499 	status_t status = FS_CALL(parent, open_dir, &cookie);
2500 	if (status >= B_OK) {
2501 		while (true) {
2502 			uint32 num = 1;
2503 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2504 				&num);
2505 			if (status != B_OK)
2506 				break;
2507 			if (num == 0) {
2508 				status = B_ENTRY_NOT_FOUND;
2509 				break;
2510 			}
2511 
2512 			if (vnode->id == buffer->d_ino) {
2513 				// found correct entry!
2514 				break;
2515 			}
2516 		}
2517 
2518 		FS_CALL(vnode, close_dir, cookie);
2519 		FS_CALL(vnode, free_dir_cookie, cookie);
2520 	}
2521 	return status;
2522 }
2523 
2524 
2525 static status_t
2526 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2527 	size_t nameSize, bool kernel)
2528 {
2529 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2530 	struct dirent* dirent = (struct dirent*)buffer;
2531 
2532 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2533 		get_current_io_context(kernel));
2534 	if (status != B_OK)
2535 		return status;
2536 
2537 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2538 		return B_BUFFER_OVERFLOW;
2539 
2540 	return B_OK;
2541 }
2542 
2543 
2544 /*!	Gets the full path to a given directory vnode.
2545 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2546 	file system doesn't support this call, it will fall back to iterating
2547 	through the parent directory to get the name of the child.
2548 
2549 	To protect against circular loops, it supports a maximum tree depth
2550 	of 256 levels.
2551 
2552 	Note that the path may not be correct the time this function returns!
2553 	It doesn't use any locking to prevent returning the correct path, as
2554 	paths aren't safe anyway: the path to a file can change at any time.
2555 
2556 	It might be a good idea, though, to check if the returned path exists
2557 	in the calling function (it's not done here because of efficiency)
2558 */
2559 static status_t
2560 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2561 	bool kernel)
2562 {
2563 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2564 
2565 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2566 		return B_BAD_VALUE;
2567 
2568 	if (!S_ISDIR(vnode->Type()))
2569 		return B_NOT_A_DIRECTORY;
2570 
2571 	char* path = buffer;
2572 	int32 insert = bufferSize;
2573 	int32 maxLevel = 256;
2574 	int32 length;
2575 	status_t status;
2576 	struct io_context* ioContext = get_current_io_context(kernel);
2577 
2578 	// we don't use get_vnode() here because this call is more
2579 	// efficient and does all we need from get_vnode()
2580 	inc_vnode_ref_count(vnode);
2581 
2582 	if (vnode != ioContext->root) {
2583 		// we don't hit the IO context root
2584 		// resolve a volume root to its mount point
2585 		struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2586 		if (mountPoint) {
2587 			put_vnode(vnode);
2588 			vnode = mountPoint;
2589 		}
2590 	}
2591 
2592 	path[--insert] = '\0';
2593 		// the path is filled right to left
2594 
2595 	while (true) {
2596 		// the name buffer is also used for fs_read_dir()
2597 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2598 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2599 		struct vnode* parentVnode;
2600 		ino_t parentID;
2601 
2602 		// lookup the parent vnode
2603 		if (vnode == ioContext->root) {
2604 			// we hit the IO context root
2605 			parentVnode = vnode;
2606 			inc_vnode_ref_count(vnode);
2607 		} else {
2608 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2609 			if (status != B_OK)
2610 				goto out;
2611 		}
2612 
2613 		// get the node's name
2614 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2615 			sizeof(nameBuffer), ioContext);
2616 
2617 		if (vnode != ioContext->root) {
2618 			// we don't hit the IO context root
2619 			// resolve a volume root to its mount point
2620 			struct vnode* mountPoint
2621 				= resolve_volume_root_to_mount_point(parentVnode);
2622 			if (mountPoint) {
2623 				put_vnode(parentVnode);
2624 				parentVnode = mountPoint;
2625 				parentID = parentVnode->id;
2626 			}
2627 		}
2628 
2629 		bool hitRoot = (parentVnode == vnode);
2630 
2631 		// release the current vnode, we only need its parent from now on
2632 		put_vnode(vnode);
2633 		vnode = parentVnode;
2634 
2635 		if (status != B_OK)
2636 			goto out;
2637 
2638 		if (hitRoot) {
2639 			// we have reached "/", which means we have constructed the full
2640 			// path
2641 			break;
2642 		}
2643 
2644 		// TODO: add an explicit check for loops in about 10 levels to do
2645 		// real loop detection
2646 
2647 		// don't go deeper as 'maxLevel' to prevent circular loops
2648 		if (maxLevel-- < 0) {
2649 			status = B_LINK_LIMIT;
2650 			goto out;
2651 		}
2652 
2653 		// add the name in front of the current path
2654 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2655 		length = strlen(name);
2656 		insert -= length;
2657 		if (insert <= 0) {
2658 			status = B_RESULT_NOT_REPRESENTABLE;
2659 			goto out;
2660 		}
2661 		memcpy(path + insert, name, length);
2662 		path[--insert] = '/';
2663 	}
2664 
2665 	// the root dir will result in an empty path: fix it
2666 	if (path[insert] == '\0')
2667 		path[--insert] = '/';
2668 
2669 	TRACE(("  path is: %s\n", path + insert));
2670 
2671 	// move the path to the start of the buffer
2672 	length = bufferSize - insert;
2673 	memmove(buffer, path + insert, length);
2674 
2675 out:
2676 	put_vnode(vnode);
2677 	return status;
2678 }
2679 
2680 
2681 /*!	Checks the length of every path component, and adds a '.'
2682 	if the path ends in a slash.
2683 	The given path buffer must be able to store at least one
2684 	additional character.
2685 */
2686 static status_t
2687 check_path(char* to)
2688 {
2689 	int32 length = 0;
2690 
2691 	// check length of every path component
2692 
2693 	while (*to) {
2694 		char* begin;
2695 		if (*to == '/')
2696 			to++, length++;
2697 
2698 		begin = to;
2699 		while (*to != '/' && *to)
2700 			to++, length++;
2701 
2702 		if (to - begin > B_FILE_NAME_LENGTH)
2703 			return B_NAME_TOO_LONG;
2704 	}
2705 
2706 	if (length == 0)
2707 		return B_ENTRY_NOT_FOUND;
2708 
2709 	// complete path if there is a slash at the end
2710 
2711 	if (*(to - 1) == '/') {
2712 		if (length > B_PATH_NAME_LENGTH - 2)
2713 			return B_NAME_TOO_LONG;
2714 
2715 		to[0] = '.';
2716 		to[1] = '\0';
2717 	}
2718 
2719 	return B_OK;
2720 }
2721 
2722 
2723 static struct file_descriptor*
2724 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2725 {
2726 	struct file_descriptor* descriptor
2727 		= get_fd(get_current_io_context(kernel), fd);
2728 	if (descriptor == NULL)
2729 		return NULL;
2730 
2731 	struct vnode* vnode = fd_vnode(descriptor);
2732 	if (vnode == NULL) {
2733 		put_fd(descriptor);
2734 		return NULL;
2735 	}
2736 
2737 	// ToDo: when we can close a file descriptor at any point, investigate
2738 	//	if this is still valid to do (accessing the vnode without ref_count
2739 	//	or locking)
2740 	*_vnode = vnode;
2741 	return descriptor;
2742 }
2743 
2744 
2745 static struct vnode*
2746 get_vnode_from_fd(int fd, bool kernel)
2747 {
2748 	struct file_descriptor* descriptor;
2749 	struct vnode* vnode;
2750 
2751 	descriptor = get_fd(get_current_io_context(kernel), fd);
2752 	if (descriptor == NULL)
2753 		return NULL;
2754 
2755 	vnode = fd_vnode(descriptor);
2756 	if (vnode != NULL)
2757 		inc_vnode_ref_count(vnode);
2758 
2759 	put_fd(descriptor);
2760 	return vnode;
2761 }
2762 
2763 
2764 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2765 	only the path will be considered. In this case, the \a path must not be
2766 	NULL.
2767 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2768 	and should be NULL for files.
2769 */
2770 static status_t
2771 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2772 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2773 {
2774 	if (fd < 0 && !path)
2775 		return B_BAD_VALUE;
2776 
2777 	if (path != NULL && *path == '\0')
2778 		return B_ENTRY_NOT_FOUND;
2779 
2780 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2781 		// no FD or absolute path
2782 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2783 	}
2784 
2785 	// FD only, or FD + relative path
2786 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2787 	if (!vnode)
2788 		return B_FILE_ERROR;
2789 
2790 	if (path != NULL) {
2791 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2792 			_vnode, _parentID);
2793 	}
2794 
2795 	// there is no relative path to take into account
2796 
2797 	*_vnode = vnode;
2798 	if (_parentID)
2799 		*_parentID = -1;
2800 
2801 	return B_OK;
2802 }
2803 
2804 
2805 static int
2806 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2807 	void* cookie, int openMode, bool kernel)
2808 {
2809 	struct file_descriptor* descriptor;
2810 	int fd;
2811 
2812 	// If the vnode is locked, we don't allow creating a new file/directory
2813 	// file_descriptor for it
2814 	if (vnode && vnode->mandatory_locked_by != NULL
2815 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2816 		return B_BUSY;
2817 
2818 	descriptor = alloc_fd();
2819 	if (!descriptor)
2820 		return B_NO_MEMORY;
2821 
2822 	if (vnode)
2823 		descriptor->u.vnode = vnode;
2824 	else
2825 		descriptor->u.mount = mount;
2826 	descriptor->cookie = cookie;
2827 
2828 	switch (type) {
2829 		// vnode types
2830 		case FDTYPE_FILE:
2831 			descriptor->ops = &sFileOps;
2832 			break;
2833 		case FDTYPE_DIR:
2834 			descriptor->ops = &sDirectoryOps;
2835 			break;
2836 		case FDTYPE_ATTR:
2837 			descriptor->ops = &sAttributeOps;
2838 			break;
2839 		case FDTYPE_ATTR_DIR:
2840 			descriptor->ops = &sAttributeDirectoryOps;
2841 			break;
2842 
2843 		// mount types
2844 		case FDTYPE_INDEX_DIR:
2845 			descriptor->ops = &sIndexDirectoryOps;
2846 			break;
2847 		case FDTYPE_QUERY:
2848 			descriptor->ops = &sQueryOps;
2849 			break;
2850 
2851 		default:
2852 			panic("get_new_fd() called with unknown type %d\n", type);
2853 			break;
2854 	}
2855 	descriptor->type = type;
2856 	descriptor->open_mode = openMode;
2857 
2858 	fd = new_fd(get_current_io_context(kernel), descriptor);
2859 	if (fd < 0) {
2860 		free(descriptor);
2861 		return B_NO_MORE_FDS;
2862 	}
2863 
2864 	return fd;
2865 }
2866 
2867 
2868 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2869 	vfs_normalize_path(). See there for more documentation.
2870 */
2871 static status_t
2872 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2873 {
2874 	VNodePutter dirPutter;
2875 	struct vnode* dir = NULL;
2876 	status_t error;
2877 
2878 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2879 		// get dir vnode + leaf name
2880 		struct vnode* nextDir;
2881 		char leaf[B_FILE_NAME_LENGTH];
2882 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2883 		if (error != B_OK)
2884 			return error;
2885 
2886 		dir = nextDir;
2887 		strcpy(path, leaf);
2888 		dirPutter.SetTo(dir);
2889 
2890 		// get file vnode, if we shall resolve links
2891 		bool fileExists = false;
2892 		struct vnode* fileVnode;
2893 		VNodePutter fileVnodePutter;
2894 		if (traverseLink) {
2895 			inc_vnode_ref_count(dir);
2896 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2897 					NULL) == B_OK) {
2898 				fileVnodePutter.SetTo(fileVnode);
2899 				fileExists = true;
2900 			}
2901 		}
2902 
2903 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2904 			// we're done -- construct the path
2905 			bool hasLeaf = true;
2906 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2907 				// special cases "." and ".." -- get the dir, forget the leaf
2908 				inc_vnode_ref_count(dir);
2909 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2910 					&nextDir, NULL);
2911 				if (error != B_OK)
2912 					return error;
2913 				dir = nextDir;
2914 				dirPutter.SetTo(dir);
2915 				hasLeaf = false;
2916 			}
2917 
2918 			// get the directory path
2919 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2920 			if (error != B_OK)
2921 				return error;
2922 
2923 			// append the leaf name
2924 			if (hasLeaf) {
2925 				// insert a directory separator if this is not the file system
2926 				// root
2927 				if ((strcmp(path, "/") != 0
2928 					&& strlcat(path, "/", pathSize) >= pathSize)
2929 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2930 					return B_NAME_TOO_LONG;
2931 				}
2932 			}
2933 
2934 			return B_OK;
2935 		}
2936 
2937 		// read link
2938 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2939 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2940 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2941 			if (error != B_OK)
2942 				return error;
2943 			path[bufferSize] = '\0';
2944 		} else
2945 			return B_BAD_VALUE;
2946 	}
2947 
2948 	return B_LINK_LIMIT;
2949 }
2950 
2951 
2952 #ifdef ADD_DEBUGGER_COMMANDS
2953 
2954 
2955 static void
2956 _dump_advisory_locking(advisory_locking* locking)
2957 {
2958 	if (locking == NULL)
2959 		return;
2960 
2961 	kprintf("   lock:        %ld", locking->lock);
2962 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2963 
2964 	int32 index = 0;
2965 	LockList::Iterator iterator = locking->locks.GetIterator();
2966 	while (iterator.HasNext()) {
2967 		struct advisory_lock* lock = iterator.Next();
2968 
2969 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2970 		kprintf("        start:  %Ld\n", lock->start);
2971 		kprintf("        end:    %Ld\n", lock->end);
2972 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2973 	}
2974 }
2975 
2976 
2977 static void
2978 _dump_mount(struct fs_mount* mount)
2979 {
2980 	kprintf("MOUNT: %p\n", mount);
2981 	kprintf(" id:            %ld\n", mount->id);
2982 	kprintf(" device_name:   %s\n", mount->device_name);
2983 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2984 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2985 	kprintf(" partition:     %p\n", mount->partition);
2986 	kprintf(" lock:          %p\n", &mount->rlock);
2987 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2988 		mount->owns_file_device ? " owns_file_device" : "");
2989 
2990 	fs_volume* volume = mount->volume;
2991 	while (volume != NULL) {
2992 		kprintf(" volume %p:\n", volume);
2993 		kprintf("  layer:            %ld\n", volume->layer);
2994 		kprintf("  private_volume:   %p\n", volume->private_volume);
2995 		kprintf("  ops:              %p\n", volume->ops);
2996 		kprintf("  file_system:      %p\n", volume->file_system);
2997 		kprintf("  file_system_name: %s\n", volume->file_system_name);
2998 		volume = volume->super_volume;
2999 	}
3000 
3001 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3002 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3003 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
3004 	set_debug_variable("_partition", (addr_t)mount->partition);
3005 }
3006 
3007 
3008 static void
3009 _dump_vnode(struct vnode* vnode)
3010 {
3011 	kprintf("VNODE: %p\n", vnode);
3012 	kprintf(" device:        %ld\n", vnode->device);
3013 	kprintf(" id:            %Ld\n", vnode->id);
3014 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3015 	kprintf(" private_node:  %p\n", vnode->private_node);
3016 	kprintf(" mount:         %p\n", vnode->mount);
3017 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3018 	kprintf(" cache:         %p\n", vnode->cache);
3019 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3020 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3021 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3022 
3023 	_dump_advisory_locking(vnode->advisory_locking);
3024 
3025 	set_debug_variable("_node", (addr_t)vnode->private_node);
3026 	set_debug_variable("_mount", (addr_t)vnode->mount);
3027 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3028 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3029 }
3030 
3031 
3032 static int
3033 dump_mount(int argc, char** argv)
3034 {
3035 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3036 		kprintf("usage: %s [id|address]\n", argv[0]);
3037 		return 0;
3038 	}
3039 
3040 	uint32 id = parse_expression(argv[1]);
3041 	struct fs_mount* mount = NULL;
3042 
3043 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3044 	if (mount == NULL) {
3045 		if (IS_USER_ADDRESS(id)) {
3046 			kprintf("fs_mount not found\n");
3047 			return 0;
3048 		}
3049 		mount = (fs_mount*)id;
3050 	}
3051 
3052 	_dump_mount(mount);
3053 	return 0;
3054 }
3055 
3056 
3057 static int
3058 dump_mounts(int argc, char** argv)
3059 {
3060 	if (argc != 1) {
3061 		kprintf("usage: %s\n", argv[0]);
3062 		return 0;
3063 	}
3064 
3065 	kprintf("address     id root       covers     cookie     fs_name\n");
3066 
3067 	struct hash_iterator iterator;
3068 	struct fs_mount* mount;
3069 
3070 	hash_open(sMountsTable, &iterator);
3071 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3072 			!= NULL) {
3073 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3074 			mount->covers_vnode, mount->volume->private_volume,
3075 			mount->volume->file_system_name);
3076 
3077 		fs_volume* volume = mount->volume;
3078 		while (volume->super_volume != NULL) {
3079 			volume = volume->super_volume;
3080 			kprintf("                                     %p %s\n",
3081 				volume->private_volume, volume->file_system_name);
3082 		}
3083 	}
3084 
3085 	hash_close(sMountsTable, &iterator, false);
3086 	return 0;
3087 }
3088 
3089 
3090 static int
3091 dump_vnode(int argc, char** argv)
3092 {
3093 	if (argc < 2 || argc > 3 || !strcmp(argv[1], "--help")) {
3094 		kprintf("usage: %s <device> <id>\n"
3095 			"   or: %s <address>\n", argv[0], argv[0]);
3096 		return 0;
3097 	}
3098 
3099 	struct vnode* vnode = NULL;
3100 
3101 	if (argc == 2) {
3102 		vnode = (struct vnode*)parse_expression(argv[1]);
3103 		if (IS_USER_ADDRESS(vnode)) {
3104 			kprintf("invalid vnode address\n");
3105 			return 0;
3106 		}
3107 		_dump_vnode(vnode);
3108 		return 0;
3109 	}
3110 
3111 	struct hash_iterator iterator;
3112 	dev_t device = parse_expression(argv[1]);
3113 	ino_t id = parse_expression(argv[2]);
3114 
3115 	hash_open(sVnodeTable, &iterator);
3116 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3117 		if (vnode->id != id || vnode->device != device)
3118 			continue;
3119 
3120 		_dump_vnode(vnode);
3121 	}
3122 
3123 	hash_close(sVnodeTable, &iterator, false);
3124 	return 0;
3125 }
3126 
3127 
3128 static int
3129 dump_vnodes(int argc, char** argv)
3130 {
3131 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3132 		kprintf("usage: %s [device]\n", argv[0]);
3133 		return 0;
3134 	}
3135 
3136 	// restrict dumped nodes to a certain device if requested
3137 	dev_t device = parse_expression(argv[1]);
3138 
3139 	struct hash_iterator iterator;
3140 	struct vnode* vnode;
3141 
3142 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3143 		"flags\n");
3144 
3145 	hash_open(sVnodeTable, &iterator);
3146 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3147 		if (vnode->device != device)
3148 			continue;
3149 
3150 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3151 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3152 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3153 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3154 	}
3155 
3156 	hash_close(sVnodeTable, &iterator, false);
3157 	return 0;
3158 }
3159 
3160 
3161 static int
3162 dump_vnode_caches(int argc, char** argv)
3163 {
3164 	struct hash_iterator iterator;
3165 	struct vnode* vnode;
3166 
3167 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3168 		kprintf("usage: %s [device]\n", argv[0]);
3169 		return 0;
3170 	}
3171 
3172 	// restrict dumped nodes to a certain device if requested
3173 	dev_t device = -1;
3174 	if (argc > 1)
3175 		device = parse_expression(argv[1]);
3176 
3177 	kprintf("address    dev     inode cache          size   pages\n");
3178 
3179 	hash_open(sVnodeTable, &iterator);
3180 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3181 		if (vnode->cache == NULL)
3182 			continue;
3183 		if (device != -1 && vnode->device != device)
3184 			continue;
3185 
3186 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3187 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3188 				/ B_PAGE_SIZE, vnode->cache->page_count);
3189 	}
3190 
3191 	hash_close(sVnodeTable, &iterator, false);
3192 	return 0;
3193 }
3194 
3195 
3196 int
3197 dump_io_context(int argc, char** argv)
3198 {
3199 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3200 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3201 		return 0;
3202 	}
3203 
3204 	struct io_context* context = NULL;
3205 
3206 	if (argc > 1) {
3207 		uint32 num = parse_expression(argv[1]);
3208 		if (IS_KERNEL_ADDRESS(num))
3209 			context = (struct io_context*)num;
3210 		else {
3211 			struct team* team = team_get_team_struct_locked(num);
3212 			if (team == NULL) {
3213 				kprintf("could not find team with ID %ld\n", num);
3214 				return 0;
3215 			}
3216 			context = (struct io_context*)team->io_context;
3217 		}
3218 	} else
3219 		context = get_current_io_context(true);
3220 
3221 	kprintf("I/O CONTEXT: %p\n", context);
3222 	kprintf(" root vnode:\t%p\n", context->root);
3223 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3224 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3225 	kprintf(" max fds:\t%lu\n", context->table_size);
3226 
3227 	if (context->num_used_fds)
3228 		kprintf("   no. type     ops ref open mode        pos cookie\n");
3229 
3230 	for (uint32 i = 0; i < context->table_size; i++) {
3231 		struct file_descriptor* fd = context->fds[i];
3232 		if (fd == NULL)
3233 			continue;
3234 
3235 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type,
3236 			fd->ops, fd->ref_count, fd->open_count, fd->open_mode, fd->pos,
3237 			fd->cookie, fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3238 				? "mount" : "vnode",
3239 			fd->u.vnode);
3240 	}
3241 
3242 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3243 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3244 
3245 	set_debug_variable("_cwd", (addr_t)context->cwd);
3246 
3247 	return 0;
3248 }
3249 
3250 
3251 int
3252 dump_vnode_usage(int argc, char** argv)
3253 {
3254 	if (argc != 1) {
3255 		kprintf("usage: %s\n", argv[0]);
3256 		return 0;
3257 	}
3258 
3259 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3260 		kMaxUnusedVnodes);
3261 
3262 	struct hash_iterator iterator;
3263 	hash_open(sVnodeTable, &iterator);
3264 
3265 	uint32 count = 0;
3266 	struct vnode* vnode;
3267 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3268 		count++;
3269 	}
3270 
3271 	hash_close(sVnodeTable, &iterator, false);
3272 
3273 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3274 	return 0;
3275 }
3276 
3277 #endif	// ADD_DEBUGGER_COMMANDS
3278 
3279 /*!	Clears an iovec array of physical pages.
3280 	Returns in \a _bytes the number of bytes successfully cleared.
3281 */
3282 static status_t
3283 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3284 {
3285 	size_t bytes = *_bytes;
3286 	size_t index = 0;
3287 
3288 	while (bytes > 0) {
3289 		size_t length = min_c(vecs[index].iov_len, bytes);
3290 
3291 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3292 			length);
3293 		if (status != B_OK) {
3294 			*_bytes -= bytes;
3295 			return status;
3296 		}
3297 
3298 		bytes -= length;
3299 	}
3300 
3301 	return B_OK;
3302 }
3303 
3304 
3305 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3306 	and calls the file system hooks to read/write the request to disk.
3307 */
3308 static status_t
3309 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3310 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3311 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3312 	bool doWrite)
3313 {
3314 	if (fileVecCount == 0) {
3315 		// There are no file vecs at this offset, so we're obviously trying
3316 		// to access the file outside of its bounds
3317 		return B_BAD_VALUE;
3318 	}
3319 
3320 	size_t numBytes = *_numBytes;
3321 	uint32 fileVecIndex;
3322 	size_t vecOffset = *_vecOffset;
3323 	uint32 vecIndex = *_vecIndex;
3324 	status_t status;
3325 	size_t size;
3326 
3327 	if (!doWrite && vecOffset == 0) {
3328 		// now directly read the data from the device
3329 		// the first file_io_vec can be read directly
3330 
3331 		if (fileVecs[0].length < numBytes)
3332 			size = fileVecs[0].length;
3333 		else
3334 			size = numBytes;
3335 
3336 		if (fileVecs[0].offset >= 0) {
3337 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3338 				&vecs[vecIndex], vecCount - vecIndex, &size);
3339 		} else {
3340 			// sparse read
3341 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3342 		}
3343 		if (status != B_OK)
3344 			return status;
3345 
3346 		// TODO: this is a work-around for buggy device drivers!
3347 		//	When our own drivers honour the length, we can:
3348 		//	a) also use this direct I/O for writes (otherwise, it would
3349 		//	   overwrite precious data)
3350 		//	b) panic if the term below is true (at least for writes)
3351 		if (size > fileVecs[0].length) {
3352 			//dprintf("warning: device driver %p doesn't respect total length "
3353 			//	"in read_pages() call!\n", ref->device);
3354 			size = fileVecs[0].length;
3355 		}
3356 
3357 		ASSERT(size <= fileVecs[0].length);
3358 
3359 		// If the file portion was contiguous, we're already done now
3360 		if (size == numBytes)
3361 			return B_OK;
3362 
3363 		// if we reached the end of the file, we can return as well
3364 		if (size != fileVecs[0].length) {
3365 			*_numBytes = size;
3366 			return B_OK;
3367 		}
3368 
3369 		fileVecIndex = 1;
3370 
3371 		// first, find out where we have to continue in our iovecs
3372 		for (; vecIndex < vecCount; vecIndex++) {
3373 			if (size < vecs[vecIndex].iov_len)
3374 				break;
3375 
3376 			size -= vecs[vecIndex].iov_len;
3377 		}
3378 
3379 		vecOffset = size;
3380 	} else {
3381 		fileVecIndex = 0;
3382 		size = 0;
3383 	}
3384 
3385 	// Too bad, let's process the rest of the file_io_vecs
3386 
3387 	size_t totalSize = size;
3388 	size_t bytesLeft = numBytes - size;
3389 
3390 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3391 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3392 		off_t fileOffset = fileVec.offset;
3393 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3394 
3395 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3396 
3397 		// process the complete fileVec
3398 		while (fileLeft > 0) {
3399 			iovec tempVecs[MAX_TEMP_IO_VECS];
3400 			uint32 tempCount = 0;
3401 
3402 			// size tracks how much of what is left of the current fileVec
3403 			// (fileLeft) has been assigned to tempVecs
3404 			size = 0;
3405 
3406 			// assign what is left of the current fileVec to the tempVecs
3407 			for (size = 0; size < fileLeft && vecIndex < vecCount
3408 					&& tempCount < MAX_TEMP_IO_VECS;) {
3409 				// try to satisfy one iovec per iteration (or as much as
3410 				// possible)
3411 
3412 				// bytes left of the current iovec
3413 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3414 				if (vecLeft == 0) {
3415 					vecOffset = 0;
3416 					vecIndex++;
3417 					continue;
3418 				}
3419 
3420 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3421 					vecIndex, vecOffset, size));
3422 
3423 				// actually available bytes
3424 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3425 
3426 				tempVecs[tempCount].iov_base
3427 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3428 				tempVecs[tempCount].iov_len = tempVecSize;
3429 				tempCount++;
3430 
3431 				size += tempVecSize;
3432 				vecOffset += tempVecSize;
3433 			}
3434 
3435 			size_t bytes = size;
3436 
3437 			if (fileOffset == -1) {
3438 				if (doWrite) {
3439 					panic("sparse write attempt: vnode %p", vnode);
3440 					status = B_IO_ERROR;
3441 				} else {
3442 					// sparse read
3443 					status = zero_pages(tempVecs, tempCount, &bytes);
3444 				}
3445 			} else if (doWrite) {
3446 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3447 					tempVecs, tempCount, &bytes);
3448 			} else {
3449 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3450 					tempVecs, tempCount, &bytes);
3451 			}
3452 			if (status != B_OK)
3453 				return status;
3454 
3455 			totalSize += bytes;
3456 			bytesLeft -= size;
3457 			if (fileOffset >= 0)
3458 				fileOffset += size;
3459 			fileLeft -= size;
3460 			//dprintf("-> file left = %Lu\n", fileLeft);
3461 
3462 			if (size != bytes || vecIndex >= vecCount) {
3463 				// there are no more bytes or iovecs, let's bail out
3464 				*_numBytes = totalSize;
3465 				return B_OK;
3466 			}
3467 		}
3468 	}
3469 
3470 	*_vecIndex = vecIndex;
3471 	*_vecOffset = vecOffset;
3472 	*_numBytes = totalSize;
3473 	return B_OK;
3474 }
3475 
3476 
3477 //	#pragma mark - public API for file systems
3478 
3479 
3480 extern "C" status_t
3481 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3482 	fs_vnode_ops* ops)
3483 {
3484 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3485 		volume, volume->id, vnodeID, privateNode));
3486 
3487 	if (privateNode == NULL)
3488 		return B_BAD_VALUE;
3489 
3490 	// create the node
3491 	bool nodeCreated;
3492 	struct vnode* vnode;
3493 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3494 		nodeCreated);
3495 	if (status != B_OK)
3496 		return status;
3497 
3498 	WriteLocker nodeLocker(sVnodeLock, true);
3499 		// create_new_vnode_and_lock() has locked for us
3500 
3501 	// file system integrity check:
3502 	// test if the vnode already exists and bail out if this is the case!
3503 	if (!nodeCreated) {
3504 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3505 			volume->id, vnodeID, privateNode, vnode->private_node);
3506 		return B_ERROR;
3507 	}
3508 
3509 	vnode->private_node = privateNode;
3510 	vnode->ops = ops;
3511 	vnode->SetUnpublished(true);
3512 
3513 	TRACE(("returns: %s\n", strerror(status)));
3514 
3515 	return status;
3516 }
3517 
3518 
3519 extern "C" status_t
3520 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3521 	fs_vnode_ops* ops, int type, uint32 flags)
3522 {
3523 	FUNCTION(("publish_vnode()\n"));
3524 
3525 	WriteLocker locker(sVnodeLock);
3526 
3527 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3528 
3529 	bool nodeCreated = false;
3530 	if (vnode == NULL) {
3531 		if (privateNode == NULL)
3532 			return B_BAD_VALUE;
3533 
3534 		// create the node
3535 		locker.Unlock();
3536 			// create_new_vnode_and_lock() will re-lock for us on success
3537 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3538 			nodeCreated);
3539 		if (status != B_OK)
3540 			return status;
3541 
3542 		locker.SetTo(sVnodeLock, true);
3543 	}
3544 
3545 	if (nodeCreated) {
3546 		vnode->private_node = privateNode;
3547 		vnode->ops = ops;
3548 		vnode->SetUnpublished(true);
3549 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3550 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3551 		// already known, but not published
3552 	} else
3553 		return B_BAD_VALUE;
3554 
3555 	bool publishSpecialSubNode = false;
3556 
3557 	vnode->SetType(type);
3558 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3559 	publishSpecialSubNode = is_special_node_type(type)
3560 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3561 
3562 	status_t status = B_OK;
3563 
3564 	// create sub vnodes, if necessary
3565 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3566 		locker.Unlock();
3567 
3568 		fs_volume* subVolume = volume;
3569 		if (volume->sub_volume != NULL) {
3570 			while (status == B_OK && subVolume->sub_volume != NULL) {
3571 				subVolume = subVolume->sub_volume;
3572 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3573 					vnode);
3574 			}
3575 		}
3576 
3577 		if (status == B_OK && publishSpecialSubNode)
3578 			status = create_special_sub_node(vnode, flags);
3579 
3580 		if (status != B_OK) {
3581 			// error -- clean up the created sub vnodes
3582 			while (subVolume->super_volume != volume) {
3583 				subVolume = subVolume->super_volume;
3584 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3585 			}
3586 		}
3587 
3588 		if (status == B_OK) {
3589 			ReadLocker vnodesReadLocker(sVnodeLock);
3590 			AutoLocker<Vnode> nodeLocker(vnode);
3591 			vnode->SetBusy(false);
3592 			vnode->SetUnpublished(false);
3593 		} else {
3594 			locker.Lock();
3595 			hash_remove(sVnodeTable, vnode);
3596 			remove_vnode_from_mount_list(vnode, vnode->mount);
3597 			free(vnode);
3598 		}
3599 	} else {
3600 		// we still hold the write lock -- mark the node unbusy and published
3601 		vnode->SetBusy(false);
3602 		vnode->SetUnpublished(false);
3603 	}
3604 
3605 	TRACE(("returns: %s\n", strerror(status)));
3606 
3607 	return status;
3608 }
3609 
3610 
3611 extern "C" status_t
3612 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3613 {
3614 	struct vnode* vnode;
3615 
3616 	if (volume == NULL)
3617 		return B_BAD_VALUE;
3618 
3619 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3620 	if (status != B_OK)
3621 		return status;
3622 
3623 	// If this is a layered FS, we need to get the node cookie for the requested
3624 	// layer.
3625 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3626 		fs_vnode resolvedNode;
3627 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3628 			&resolvedNode);
3629 		if (status != B_OK) {
3630 			panic("get_vnode(): Failed to get super node for vnode %p, "
3631 				"volume: %p", vnode, volume);
3632 			put_vnode(vnode);
3633 			return status;
3634 		}
3635 
3636 		if (_privateNode != NULL)
3637 			*_privateNode = resolvedNode.private_node;
3638 	} else if (_privateNode != NULL)
3639 		*_privateNode = vnode->private_node;
3640 
3641 	return B_OK;
3642 }
3643 
3644 
3645 extern "C" status_t
3646 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3647 {
3648 	struct vnode* vnode;
3649 
3650 	rw_lock_read_lock(&sVnodeLock);
3651 	vnode = lookup_vnode(volume->id, vnodeID);
3652 	rw_lock_read_unlock(&sVnodeLock);
3653 
3654 	if (vnode == NULL)
3655 		return B_BAD_VALUE;
3656 
3657 	inc_vnode_ref_count(vnode);
3658 	return B_OK;
3659 }
3660 
3661 
3662 extern "C" status_t
3663 put_vnode(fs_volume* volume, ino_t vnodeID)
3664 {
3665 	struct vnode* vnode;
3666 
3667 	rw_lock_read_lock(&sVnodeLock);
3668 	vnode = lookup_vnode(volume->id, vnodeID);
3669 	rw_lock_read_unlock(&sVnodeLock);
3670 
3671 	if (vnode == NULL)
3672 		return B_BAD_VALUE;
3673 
3674 	dec_vnode_ref_count(vnode, false, true);
3675 	return B_OK;
3676 }
3677 
3678 
3679 extern "C" status_t
3680 remove_vnode(fs_volume* volume, ino_t vnodeID)
3681 {
3682 	ReadLocker locker(sVnodeLock);
3683 
3684 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3685 	if (vnode == NULL)
3686 		return B_ENTRY_NOT_FOUND;
3687 
3688 	if (vnode->covered_by != NULL) {
3689 		// this vnode is in use
3690 		return B_BUSY;
3691 	}
3692 
3693 	vnode->Lock();
3694 
3695 	vnode->SetRemoved(true);
3696 	bool removeUnpublished = false;
3697 
3698 	if (vnode->IsUnpublished()) {
3699 		// prepare the vnode for deletion
3700 		removeUnpublished = true;
3701 		vnode->SetBusy(true);
3702 	}
3703 
3704 	vnode->Unlock();
3705 	locker.Unlock();
3706 
3707 	if (removeUnpublished) {
3708 		// If the vnode hasn't been published yet, we delete it here
3709 		atomic_add(&vnode->ref_count, -1);
3710 		free_vnode(vnode, true);
3711 	}
3712 
3713 	return B_OK;
3714 }
3715 
3716 
3717 extern "C" status_t
3718 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3719 {
3720 	struct vnode* vnode;
3721 
3722 	rw_lock_read_lock(&sVnodeLock);
3723 
3724 	vnode = lookup_vnode(volume->id, vnodeID);
3725 	if (vnode) {
3726 		AutoLocker<Vnode> nodeLocker(vnode);
3727 		vnode->SetRemoved(false);
3728 	}
3729 
3730 	rw_lock_read_unlock(&sVnodeLock);
3731 	return B_OK;
3732 }
3733 
3734 
3735 extern "C" status_t
3736 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3737 {
3738 	ReadLocker _(sVnodeLock);
3739 
3740 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3741 		if (_removed != NULL)
3742 			*_removed = vnode->IsRemoved();
3743 		return B_OK;
3744 	}
3745 
3746 	return B_BAD_VALUE;
3747 }
3748 
3749 
3750 extern "C" fs_volume*
3751 volume_for_vnode(fs_vnode* _vnode)
3752 {
3753 	if (_vnode == NULL)
3754 		return NULL;
3755 
3756 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3757 	return vnode->mount->volume;
3758 }
3759 
3760 
3761 extern "C" status_t
3762 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3763 	size_t* _numBytes)
3764 {
3765 	struct file_descriptor* descriptor;
3766 	struct vnode* vnode;
3767 
3768 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3769 	if (descriptor == NULL)
3770 		return B_FILE_ERROR;
3771 
3772 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3773 		count, 0, _numBytes);
3774 
3775 	put_fd(descriptor);
3776 	return status;
3777 }
3778 
3779 
3780 extern "C" status_t
3781 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3782 	size_t* _numBytes)
3783 {
3784 	struct file_descriptor* descriptor;
3785 	struct vnode* vnode;
3786 
3787 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3788 	if (descriptor == NULL)
3789 		return B_FILE_ERROR;
3790 
3791 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3792 		count, 0, _numBytes);
3793 
3794 	put_fd(descriptor);
3795 	return status;
3796 }
3797 
3798 
3799 extern "C" status_t
3800 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3801 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3802 	size_t* _bytes)
3803 {
3804 	struct file_descriptor* descriptor;
3805 	struct vnode* vnode;
3806 
3807 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3808 	if (descriptor == NULL)
3809 		return B_FILE_ERROR;
3810 
3811 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3812 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3813 		false);
3814 
3815 	put_fd(descriptor);
3816 	return status;
3817 }
3818 
3819 
3820 extern "C" status_t
3821 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3822 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3823 	size_t* _bytes)
3824 {
3825 	struct file_descriptor* descriptor;
3826 	struct vnode* vnode;
3827 
3828 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3829 	if (descriptor == NULL)
3830 		return B_FILE_ERROR;
3831 
3832 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3833 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3834 		true);
3835 
3836 	put_fd(descriptor);
3837 	return status;
3838 }
3839 
3840 
3841 extern "C" status_t
3842 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3843 {
3844 	// lookup mount -- the caller is required to make sure that the mount
3845 	// won't go away
3846 	MutexLocker locker(sMountMutex);
3847 	struct fs_mount* mount = find_mount(mountID);
3848 	if (mount == NULL)
3849 		return B_BAD_VALUE;
3850 	locker.Unlock();
3851 
3852 	return mount->entry_cache.Add(dirID, name, nodeID);
3853 }
3854 
3855 
3856 extern "C" status_t
3857 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3858 {
3859 	// lookup mount -- the caller is required to make sure that the mount
3860 	// won't go away
3861 	MutexLocker locker(sMountMutex);
3862 	struct fs_mount* mount = find_mount(mountID);
3863 	if (mount == NULL)
3864 		return B_BAD_VALUE;
3865 	locker.Unlock();
3866 
3867 	return mount->entry_cache.Remove(dirID, name);
3868 }
3869 
3870 
3871 //	#pragma mark - private VFS API
3872 //	Functions the VFS exports for other parts of the kernel
3873 
3874 
3875 /*! Acquires another reference to the vnode that has to be released
3876 	by calling vfs_put_vnode().
3877 */
3878 void
3879 vfs_acquire_vnode(struct vnode* vnode)
3880 {
3881 	inc_vnode_ref_count(vnode);
3882 }
3883 
3884 
3885 /*! This is currently called from file_cache_create() only.
3886 	It's probably a temporary solution as long as devfs requires that
3887 	fs_read_pages()/fs_write_pages() are called with the standard
3888 	open cookie and not with a device cookie.
3889 	If that's done differently, remove this call; it has no other
3890 	purpose.
3891 */
3892 extern "C" status_t
3893 vfs_get_cookie_from_fd(int fd, void** _cookie)
3894 {
3895 	struct file_descriptor* descriptor;
3896 
3897 	descriptor = get_fd(get_current_io_context(true), fd);
3898 	if (descriptor == NULL)
3899 		return B_FILE_ERROR;
3900 
3901 	*_cookie = descriptor->cookie;
3902 	return B_OK;
3903 }
3904 
3905 
3906 extern "C" status_t
3907 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
3908 {
3909 	*vnode = get_vnode_from_fd(fd, kernel);
3910 
3911 	if (*vnode == NULL)
3912 		return B_FILE_ERROR;
3913 
3914 	return B_NO_ERROR;
3915 }
3916 
3917 
3918 extern "C" status_t
3919 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
3920 {
3921 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
3922 		path, kernel));
3923 
3924 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3925 	if (pathBuffer.InitCheck() != B_OK)
3926 		return B_NO_MEMORY;
3927 
3928 	char* buffer = pathBuffer.LockBuffer();
3929 	strlcpy(buffer, path, pathBuffer.BufferSize());
3930 
3931 	struct vnode* vnode;
3932 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
3933 	if (status != B_OK)
3934 		return status;
3935 
3936 	*_vnode = vnode;
3937 	return B_OK;
3938 }
3939 
3940 
3941 extern "C" status_t
3942 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
3943 {
3944 	struct vnode* vnode;
3945 
3946 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
3947 	if (status != B_OK)
3948 		return status;
3949 
3950 	*_vnode = vnode;
3951 	return B_OK;
3952 }
3953 
3954 
3955 extern "C" status_t
3956 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
3957 	const char* name, struct vnode** _vnode)
3958 {
3959 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
3960 }
3961 
3962 
3963 extern "C" void
3964 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
3965 {
3966 	*_mountID = vnode->device;
3967 	*_vnodeID = vnode->id;
3968 }
3969 
3970 
3971 /*!
3972 	Calls fs_open() on the given vnode and returns a new
3973 	file descriptor for it
3974 */
3975 int
3976 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
3977 {
3978 	return open_vnode(vnode, openMode, kernel);
3979 }
3980 
3981 
3982 /*!	Looks up a vnode with the given mount and vnode ID.
3983 	Must only be used with "in-use" vnodes as it doesn't grab a reference
3984 	to the node.
3985 	It's currently only be used by file_cache_create().
3986 */
3987 extern "C" status_t
3988 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
3989 {
3990 	rw_lock_read_lock(&sVnodeLock);
3991 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
3992 	rw_lock_read_unlock(&sVnodeLock);
3993 
3994 	if (vnode == NULL)
3995 		return B_ERROR;
3996 
3997 	*_vnode = vnode;
3998 	return B_OK;
3999 }
4000 
4001 
4002 extern "C" status_t
4003 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4004 	bool traverseLeafLink, bool kernel, void** _node)
4005 {
4006 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4007 		volume, path, kernel));
4008 
4009 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4010 	if (pathBuffer.InitCheck() != B_OK)
4011 		return B_NO_MEMORY;
4012 
4013 	fs_mount* mount;
4014 	status_t status = get_mount(volume->id, &mount);
4015 	if (status != B_OK)
4016 		return status;
4017 
4018 	char* buffer = pathBuffer.LockBuffer();
4019 	strlcpy(buffer, path, pathBuffer.BufferSize());
4020 
4021 	struct vnode* vnode = mount->root_vnode;
4022 
4023 	if (buffer[0] == '/')
4024 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4025 	else {
4026 		inc_vnode_ref_count(vnode);
4027 			// vnode_path_to_vnode() releases a reference to the starting vnode
4028 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4029 			kernel, &vnode, NULL);
4030 	}
4031 
4032 	put_mount(mount);
4033 
4034 	if (status != B_OK)
4035 		return status;
4036 
4037 	if (vnode->device != volume->id) {
4038 		// wrong mount ID - must not gain access on foreign file system nodes
4039 		put_vnode(vnode);
4040 		return B_BAD_VALUE;
4041 	}
4042 
4043 	// Use get_vnode() to resolve the cookie for the right layer.
4044 	status = get_vnode(volume, vnode->id, _node);
4045 	put_vnode(vnode);
4046 
4047 	return status;
4048 }
4049 
4050 
4051 status_t
4052 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4053 	struct stat* stat, bool kernel)
4054 {
4055 	status_t status;
4056 
4057 	if (path) {
4058 		// path given: get the stat of the node referred to by (fd, path)
4059 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4060 		if (pathBuffer.InitCheck() != B_OK)
4061 			return B_NO_MEMORY;
4062 
4063 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4064 			traverseLeafLink, stat, kernel);
4065 	} else {
4066 		// no path given: get the FD and use the FD operation
4067 		struct file_descriptor* descriptor
4068 			= get_fd(get_current_io_context(kernel), fd);
4069 		if (descriptor == NULL)
4070 			return B_FILE_ERROR;
4071 
4072 		if (descriptor->ops->fd_read_stat)
4073 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4074 		else
4075 			status = EOPNOTSUPP;
4076 
4077 		put_fd(descriptor);
4078 	}
4079 
4080 	return status;
4081 }
4082 
4083 
4084 /*!	Finds the full path to the file that contains the module \a moduleName,
4085 	puts it into \a pathBuffer, and returns B_OK for success.
4086 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4087 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4088 	\a pathBuffer is clobbered in any case and must not be relied on if this
4089 	functions returns unsuccessfully.
4090 	\a basePath and \a pathBuffer must not point to the same space.
4091 */
4092 status_t
4093 vfs_get_module_path(const char* basePath, const char* moduleName,
4094 	char* pathBuffer, size_t bufferSize)
4095 {
4096 	struct vnode* dir;
4097 	struct vnode* file;
4098 	status_t status;
4099 	size_t length;
4100 	char* path;
4101 
4102 	if (bufferSize == 0
4103 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4104 		return B_BUFFER_OVERFLOW;
4105 
4106 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4107 	if (status != B_OK)
4108 		return status;
4109 
4110 	// the path buffer had been clobbered by the above call
4111 	length = strlcpy(pathBuffer, basePath, bufferSize);
4112 	if (pathBuffer[length - 1] != '/')
4113 		pathBuffer[length++] = '/';
4114 
4115 	path = pathBuffer + length;
4116 	bufferSize -= length;
4117 
4118 	while (moduleName) {
4119 		char* nextPath = strchr(moduleName, '/');
4120 		if (nextPath == NULL)
4121 			length = strlen(moduleName);
4122 		else {
4123 			length = nextPath - moduleName;
4124 			nextPath++;
4125 		}
4126 
4127 		if (length + 1 >= bufferSize) {
4128 			status = B_BUFFER_OVERFLOW;
4129 			goto err;
4130 		}
4131 
4132 		memcpy(path, moduleName, length);
4133 		path[length] = '\0';
4134 		moduleName = nextPath;
4135 
4136 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4137 		if (status != B_OK) {
4138 			// vnode_path_to_vnode() has already released the reference to dir
4139 			return status;
4140 		}
4141 
4142 		if (S_ISDIR(file->Type())) {
4143 			// goto the next directory
4144 			path[length] = '/';
4145 			path[length + 1] = '\0';
4146 			path += length + 1;
4147 			bufferSize -= length + 1;
4148 
4149 			dir = file;
4150 		} else if (S_ISREG(file->Type())) {
4151 			// it's a file so it should be what we've searched for
4152 			put_vnode(file);
4153 
4154 			return B_OK;
4155 		} else {
4156 			TRACE(("vfs_get_module_path(): something is strange here: "
4157 				"0x%08lx...\n", file->Type()));
4158 			status = B_ERROR;
4159 			dir = file;
4160 			goto err;
4161 		}
4162 	}
4163 
4164 	// if we got here, the moduleName just pointed to a directory, not to
4165 	// a real module - what should we do in this case?
4166 	status = B_ENTRY_NOT_FOUND;
4167 
4168 err:
4169 	put_vnode(dir);
4170 	return status;
4171 }
4172 
4173 
4174 /*!	\brief Normalizes a given path.
4175 
4176 	The path must refer to an existing or non-existing entry in an existing
4177 	directory, that is chopping off the leaf component the remaining path must
4178 	refer to an existing directory.
4179 
4180 	The returned will be canonical in that it will be absolute, will not
4181 	contain any "." or ".." components or duplicate occurrences of '/'s,
4182 	and none of the directory components will by symbolic links.
4183 
4184 	Any two paths referring to the same entry, will result in the same
4185 	normalized path (well, that is pretty much the definition of `normalized',
4186 	isn't it :-).
4187 
4188 	\param path The path to be normalized.
4189 	\param buffer The buffer into which the normalized path will be written.
4190 		   May be the same one as \a path.
4191 	\param bufferSize The size of \a buffer.
4192 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4193 	\param kernel \c true, if the IO context of the kernel shall be used,
4194 		   otherwise that of the team this thread belongs to. Only relevant,
4195 		   if the path is relative (to get the CWD).
4196 	\return \c B_OK if everything went fine, another error code otherwise.
4197 */
4198 status_t
4199 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4200 	bool traverseLink, bool kernel)
4201 {
4202 	if (!path || !buffer || bufferSize < 1)
4203 		return B_BAD_VALUE;
4204 
4205 	if (path != buffer) {
4206 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4207 			return B_BUFFER_OVERFLOW;
4208 	}
4209 
4210 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4211 }
4212 
4213 
4214 /*!	\brief Creates a special node in the file system.
4215 
4216 	The caller gets a reference to the newly created node (which is passed
4217 	back through \a _createdVnode) and is responsible for releasing it.
4218 
4219 	\param path The path where to create the entry for the node. Can be \c NULL,
4220 		in which case the node is created without an entry in the root FS -- it
4221 		will automatically be deleted when the last reference has been released.
4222 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4223 		the target file system will just create the node with its standard
4224 		operations. Depending on the type of the node a subnode might be created
4225 		automatically, though.
4226 	\param mode The type and permissions for the node to be created.
4227 	\param flags Flags to be passed to the creating FS.
4228 	\param kernel \c true, if called in the kernel context (relevant only if
4229 		\a path is not \c NULL and not absolute).
4230 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4231 		file system creating the node, with the private data pointer and
4232 		operations for the super node. Can be \c NULL.
4233 	\param _createVnode Pointer to pre-allocated storage where to store the
4234 		pointer to the newly created node.
4235 	\return \c B_OK, if everything went fine, another error code otherwise.
4236 */
4237 status_t
4238 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4239 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4240 	struct vnode** _createdVnode)
4241 {
4242 	struct vnode* dirNode;
4243 	char _leaf[B_FILE_NAME_LENGTH];
4244 	char* leaf = NULL;
4245 
4246 	if (path) {
4247 		// We've got a path. Get the dir vnode and the leaf name.
4248 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4249 		if (tmpPathBuffer.InitCheck() != B_OK)
4250 			return B_NO_MEMORY;
4251 
4252 		char* tmpPath = tmpPathBuffer.LockBuffer();
4253 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4254 			return B_NAME_TOO_LONG;
4255 
4256 		// get the dir vnode and the leaf name
4257 		leaf = _leaf;
4258 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4259 		if (error != B_OK)
4260 			return error;
4261 	} else {
4262 		// No path. Create the node in the root FS.
4263 		dirNode = sRoot;
4264 		inc_vnode_ref_count(dirNode);
4265 	}
4266 
4267 	VNodePutter _(dirNode);
4268 
4269 	// check support for creating special nodes
4270 	if (!HAS_FS_CALL(dirNode, create_special_node))
4271 		return B_UNSUPPORTED;
4272 
4273 	// create the node
4274 	fs_vnode superVnode;
4275 	ino_t nodeID;
4276 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4277 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4278 	if (status != B_OK)
4279 		return status;
4280 
4281 	// lookup the node
4282 	rw_lock_read_lock(&sVnodeLock);
4283 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4284 	rw_lock_read_unlock(&sVnodeLock);
4285 
4286 	if (*_createdVnode == NULL) {
4287 		panic("vfs_create_special_node(): lookup of node failed");
4288 		return B_ERROR;
4289 	}
4290 
4291 	return B_OK;
4292 }
4293 
4294 
4295 extern "C" void
4296 vfs_put_vnode(struct vnode* vnode)
4297 {
4298 	put_vnode(vnode);
4299 }
4300 
4301 
4302 extern "C" status_t
4303 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4304 {
4305 	// Get current working directory from io context
4306 	struct io_context* context = get_current_io_context(false);
4307 	status_t status = B_OK;
4308 
4309 	mutex_lock(&context->io_mutex);
4310 
4311 	if (context->cwd != NULL) {
4312 		*_mountID = context->cwd->device;
4313 		*_vnodeID = context->cwd->id;
4314 	} else
4315 		status = B_ERROR;
4316 
4317 	mutex_unlock(&context->io_mutex);
4318 	return status;
4319 }
4320 
4321 
4322 status_t
4323 vfs_unmount(dev_t mountID, uint32 flags)
4324 {
4325 	return fs_unmount(NULL, mountID, flags, true);
4326 }
4327 
4328 
4329 extern "C" status_t
4330 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4331 {
4332 	struct vnode* vnode;
4333 
4334 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4335 	if (status != B_OK)
4336 		return status;
4337 
4338 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4339 	put_vnode(vnode);
4340 	return B_OK;
4341 }
4342 
4343 
4344 extern "C" void
4345 vfs_free_unused_vnodes(int32 level)
4346 {
4347 	vnode_low_resource_handler(NULL,
4348 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, level);
4349 }
4350 
4351 
4352 extern "C" bool
4353 vfs_can_page(struct vnode* vnode, void* cookie)
4354 {
4355 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4356 
4357 	if (HAS_FS_CALL(vnode, can_page))
4358 		return FS_CALL(vnode, can_page, cookie);
4359 	return false;
4360 }
4361 
4362 
4363 extern "C" status_t
4364 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4365 	size_t count, uint32 flags, size_t* _numBytes)
4366 {
4367 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4368 		pos));
4369 
4370 #if VFS_PAGES_IO_TRACING
4371 	size_t bytesRequested = *_numBytes;
4372 #endif
4373 
4374 	IORequest request;
4375 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4376 	if (status == B_OK) {
4377 		status = vfs_vnode_io(vnode, cookie, &request);
4378 		if (status == B_OK)
4379 			status = request.Wait();
4380 		*_numBytes = request.TransferredBytes();
4381 	}
4382 
4383 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4384 		status, *_numBytes));
4385 
4386 	return status;
4387 }
4388 
4389 
4390 extern "C" status_t
4391 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4392 	size_t count, uint32 flags, size_t* _numBytes)
4393 {
4394 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4395 		pos));
4396 
4397 #if VFS_PAGES_IO_TRACING
4398 	size_t bytesRequested = *_numBytes;
4399 #endif
4400 
4401 	IORequest request;
4402 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4403 	if (status == B_OK) {
4404 		status = vfs_vnode_io(vnode, cookie, &request);
4405 		if (status == B_OK)
4406 			status = request.Wait();
4407 		*_numBytes = request.TransferredBytes();
4408 	}
4409 
4410 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4411 		status, *_numBytes));
4412 
4413 	return status;
4414 }
4415 
4416 
4417 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4418 	created if \a allocate is \c true.
4419 	In case it's successful, it will also grab a reference to the cache
4420 	it returns.
4421 */
4422 extern "C" status_t
4423 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4424 {
4425 	if (vnode->cache != NULL) {
4426 		vnode->cache->AcquireRef();
4427 		*_cache = vnode->cache;
4428 		return B_OK;
4429 	}
4430 
4431 	rw_lock_read_lock(&sVnodeLock);
4432 	vnode->Lock();
4433 
4434 	status_t status = B_OK;
4435 
4436 	// The cache could have been created in the meantime
4437 	if (vnode->cache == NULL) {
4438 		if (allocate) {
4439 			// TODO: actually the vnode needs to be busy already here, or
4440 			//	else this won't work...
4441 			bool wasBusy = vnode->IsBusy();
4442 			vnode->SetBusy(true);
4443 
4444 			vnode->Unlock();
4445 			rw_lock_read_unlock(&sVnodeLock);
4446 
4447 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4448 
4449 			rw_lock_read_lock(&sVnodeLock);
4450 			vnode->Lock();
4451 			vnode->SetBusy(wasBusy);
4452 		} else
4453 			status = B_BAD_VALUE;
4454 	}
4455 
4456 	vnode->Unlock();
4457 	rw_lock_read_unlock(&sVnodeLock);
4458 
4459 	if (status == B_OK) {
4460 		vnode->cache->AcquireRef();
4461 		*_cache = vnode->cache;
4462 	}
4463 
4464 	return status;
4465 }
4466 
4467 
4468 status_t
4469 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4470 	file_io_vec* vecs, size_t* _count)
4471 {
4472 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4473 		vnode, vecs, offset, size));
4474 
4475 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4476 }
4477 
4478 
4479 status_t
4480 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4481 {
4482 	status_t status = FS_CALL(vnode, read_stat, stat);
4483 
4484 	// fill in the st_dev and st_ino fields
4485 	if (status == B_OK) {
4486 		stat->st_dev = vnode->device;
4487 		stat->st_ino = vnode->id;
4488 		stat->st_rdev = -1;
4489 	}
4490 
4491 	return status;
4492 }
4493 
4494 
4495 status_t
4496 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4497 {
4498 	struct vnode* vnode;
4499 	status_t status = get_vnode(device, inode, &vnode, true, false);
4500 	if (status != B_OK)
4501 		return status;
4502 
4503 	status = FS_CALL(vnode, read_stat, stat);
4504 
4505 	// fill in the st_dev and st_ino fields
4506 	if (status == B_OK) {
4507 		stat->st_dev = vnode->device;
4508 		stat->st_ino = vnode->id;
4509 		stat->st_rdev = -1;
4510 	}
4511 
4512 	put_vnode(vnode);
4513 	return status;
4514 }
4515 
4516 
4517 status_t
4518 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4519 {
4520 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4521 }
4522 
4523 
4524 status_t
4525 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4526 	char* path, size_t pathLength)
4527 {
4528 	struct vnode* vnode;
4529 	status_t status;
4530 
4531 	// filter invalid leaf names
4532 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4533 		return B_BAD_VALUE;
4534 
4535 	// get the vnode matching the dir's node_ref
4536 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4537 		// special cases "." and "..": we can directly get the vnode of the
4538 		// referenced directory
4539 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4540 		leaf = NULL;
4541 	} else
4542 		status = get_vnode(device, inode, &vnode, true, false);
4543 	if (status != B_OK)
4544 		return status;
4545 
4546 	// get the directory path
4547 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4548 	put_vnode(vnode);
4549 		// we don't need the vnode anymore
4550 	if (status != B_OK)
4551 		return status;
4552 
4553 	// append the leaf name
4554 	if (leaf) {
4555 		// insert a directory separator if this is not the file system root
4556 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4557 				>= pathLength)
4558 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4559 			return B_NAME_TOO_LONG;
4560 		}
4561 	}
4562 
4563 	return B_OK;
4564 }
4565 
4566 
4567 /*!	If the given descriptor locked its vnode, that lock will be released. */
4568 void
4569 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4570 {
4571 	struct vnode* vnode = fd_vnode(descriptor);
4572 
4573 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4574 		vnode->mandatory_locked_by = NULL;
4575 }
4576 
4577 
4578 /*!	Closes all file descriptors of the specified I/O context that
4579 	have the O_CLOEXEC flag set.
4580 */
4581 void
4582 vfs_exec_io_context(io_context* context)
4583 {
4584 	uint32 i;
4585 
4586 	for (i = 0; i < context->table_size; i++) {
4587 		mutex_lock(&context->io_mutex);
4588 
4589 		struct file_descriptor* descriptor = context->fds[i];
4590 		bool remove = false;
4591 
4592 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4593 			context->fds[i] = NULL;
4594 			context->num_used_fds--;
4595 
4596 			remove = true;
4597 		}
4598 
4599 		mutex_unlock(&context->io_mutex);
4600 
4601 		if (remove) {
4602 			close_fd(descriptor);
4603 			put_fd(descriptor);
4604 		}
4605 	}
4606 }
4607 
4608 
4609 /*! Sets up a new io_control structure, and inherits the properties
4610 	of the parent io_control if it is given.
4611 */
4612 io_context*
4613 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4614 {
4615 	size_t tableSize;
4616 	struct io_context* context;
4617 
4618 	context = (io_context*)malloc(sizeof(struct io_context));
4619 	if (context == NULL)
4620 		return NULL;
4621 
4622 	memset(context, 0, sizeof(struct io_context));
4623 	context->ref_count = 1;
4624 
4625 	MutexLocker parentLocker;
4626 	if (parentContext) {
4627 		parentLocker.SetTo(parentContext->io_mutex, false);
4628 		tableSize = parentContext->table_size;
4629 	} else
4630 		tableSize = DEFAULT_FD_TABLE_SIZE;
4631 
4632 	// allocate space for FDs and their close-on-exec flag
4633 	context->fds = (file_descriptor**)malloc(
4634 		sizeof(struct file_descriptor*) * tableSize
4635 		+ sizeof(struct select_sync*) * tableSize
4636 		+ (tableSize + 7) / 8);
4637 	if (context->fds == NULL) {
4638 		free(context);
4639 		return NULL;
4640 	}
4641 
4642 	context->select_infos = (select_info**)(context->fds + tableSize);
4643 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4644 
4645 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4646 		+ sizeof(struct select_sync*) * tableSize
4647 		+ (tableSize + 7) / 8);
4648 
4649 	mutex_init(&context->io_mutex, "I/O context");
4650 
4651 	// Copy all parent file descriptors
4652 
4653 	if (parentContext) {
4654 		size_t i;
4655 
4656 		mutex_lock(&sIOContextRootLock);
4657 		context->root = parentContext->root;
4658 		if (context->root)
4659 			inc_vnode_ref_count(context->root);
4660 		mutex_unlock(&sIOContextRootLock);
4661 
4662 		context->cwd = parentContext->cwd;
4663 		if (context->cwd)
4664 			inc_vnode_ref_count(context->cwd);
4665 
4666 		for (i = 0; i < tableSize; i++) {
4667 			struct file_descriptor* descriptor = parentContext->fds[i];
4668 
4669 			if (descriptor != NULL) {
4670 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4671 				if (closeOnExec && purgeCloseOnExec)
4672 					continue;
4673 
4674 				context->fds[i] = descriptor;
4675 				context->num_used_fds++;
4676 				atomic_add(&descriptor->ref_count, 1);
4677 				atomic_add(&descriptor->open_count, 1);
4678 
4679 				if (closeOnExec)
4680 					fd_set_close_on_exec(context, i, true);
4681 			}
4682 		}
4683 
4684 		parentLocker.Unlock();
4685 	} else {
4686 		context->root = sRoot;
4687 		context->cwd = sRoot;
4688 
4689 		if (context->root)
4690 			inc_vnode_ref_count(context->root);
4691 
4692 		if (context->cwd)
4693 			inc_vnode_ref_count(context->cwd);
4694 	}
4695 
4696 	context->table_size = tableSize;
4697 
4698 	list_init(&context->node_monitors);
4699 	context->max_monitors = DEFAULT_NODE_MONITORS;
4700 
4701 	return context;
4702 }
4703 
4704 
4705 static status_t
4706 vfs_free_io_context(io_context* context)
4707 {
4708 	uint32 i;
4709 
4710 	if (context->root)
4711 		put_vnode(context->root);
4712 
4713 	if (context->cwd)
4714 		put_vnode(context->cwd);
4715 
4716 	mutex_lock(&context->io_mutex);
4717 
4718 	for (i = 0; i < context->table_size; i++) {
4719 		if (struct file_descriptor* descriptor = context->fds[i]) {
4720 			close_fd(descriptor);
4721 			put_fd(descriptor);
4722 		}
4723 	}
4724 
4725 	mutex_destroy(&context->io_mutex);
4726 
4727 	remove_node_monitors(context);
4728 	free(context->fds);
4729 	free(context);
4730 
4731 	return B_OK;
4732 }
4733 
4734 
4735 void
4736 vfs_get_io_context(io_context* context)
4737 {
4738 	atomic_add(&context->ref_count, 1);
4739 }
4740 
4741 
4742 void
4743 vfs_put_io_context(io_context* context)
4744 {
4745 	if (atomic_add(&context->ref_count, -1) == 1)
4746 		vfs_free_io_context(context);
4747 }
4748 
4749 
4750 static status_t
4751 vfs_resize_fd_table(struct io_context* context, const int newSize)
4752 {
4753 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4754 		return EINVAL;
4755 
4756 	MutexLocker _(context->io_mutex);
4757 
4758 	int oldSize = context->table_size;
4759 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4760 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4761 
4762 	// If the tables shrink, make sure none of the fds being dropped are in use.
4763 	if (newSize < oldSize) {
4764 		for (int i = oldSize; i-- > newSize;) {
4765 			if (context->fds[i])
4766 				return EBUSY;
4767 		}
4768 	}
4769 
4770 	// store pointers to the old tables
4771 	file_descriptor** oldFDs = context->fds;
4772 	select_info** oldSelectInfos = context->select_infos;
4773 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4774 
4775 	// allocate new tables
4776 	file_descriptor** newFDs = (file_descriptor**)malloc(
4777 		sizeof(struct file_descriptor*) * newSize
4778 		+ sizeof(struct select_sync*) * newSize
4779 		+ newCloseOnExitBitmapSize);
4780 	if (newFDs == NULL)
4781 		return ENOMEM;
4782 
4783 	context->fds = newFDs;
4784 	context->select_infos = (select_info**)(context->fds + newSize);
4785 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4786 	context->table_size = newSize;
4787 
4788 	// copy entries from old tables
4789 	int toCopy = min_c(oldSize, newSize);
4790 
4791 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4792 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4793 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4794 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4795 
4796 	// clear additional entries, if the tables grow
4797 	if (newSize > oldSize) {
4798 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4799 		memset(context->select_infos + oldSize, 0,
4800 			sizeof(void*) * (newSize - oldSize));
4801 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4802 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4803 	}
4804 
4805 	free(oldFDs);
4806 
4807 	return B_OK;
4808 }
4809 
4810 
4811 static status_t
4812 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4813 {
4814 	int	status = B_OK;
4815 
4816 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4817 		return EINVAL;
4818 
4819 	mutex_lock(&context->io_mutex);
4820 
4821 	if ((size_t)newSize < context->num_monitors) {
4822 		status = EBUSY;
4823 		goto out;
4824 	}
4825 	context->max_monitors = newSize;
4826 
4827 out:
4828 	mutex_unlock(&context->io_mutex);
4829 	return status;
4830 }
4831 
4832 
4833 int
4834 vfs_getrlimit(int resource, struct rlimit* rlp)
4835 {
4836 	if (!rlp)
4837 		return B_BAD_ADDRESS;
4838 
4839 	switch (resource) {
4840 		case RLIMIT_NOFILE:
4841 		{
4842 			struct io_context* context = get_current_io_context(false);
4843 			MutexLocker _(context->io_mutex);
4844 
4845 			rlp->rlim_cur = context->table_size;
4846 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4847 			return 0;
4848 		}
4849 
4850 		case RLIMIT_NOVMON:
4851 		{
4852 			struct io_context* context = get_current_io_context(false);
4853 			MutexLocker _(context->io_mutex);
4854 
4855 			rlp->rlim_cur = context->max_monitors;
4856 			rlp->rlim_max = MAX_NODE_MONITORS;
4857 			return 0;
4858 		}
4859 
4860 		default:
4861 			return B_BAD_VALUE;
4862 	}
4863 }
4864 
4865 
4866 int
4867 vfs_setrlimit(int resource, const struct rlimit* rlp)
4868 {
4869 	if (!rlp)
4870 		return B_BAD_ADDRESS;
4871 
4872 	switch (resource) {
4873 		case RLIMIT_NOFILE:
4874 			/* TODO: check getuid() */
4875 			if (rlp->rlim_max != RLIM_SAVED_MAX
4876 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
4877 				return B_NOT_ALLOWED;
4878 
4879 			return vfs_resize_fd_table(get_current_io_context(false),
4880 				rlp->rlim_cur);
4881 
4882 		case RLIMIT_NOVMON:
4883 			/* TODO: check getuid() */
4884 			if (rlp->rlim_max != RLIM_SAVED_MAX
4885 				&& rlp->rlim_max != MAX_NODE_MONITORS)
4886 				return B_NOT_ALLOWED;
4887 
4888 			return vfs_resize_monitor_table(get_current_io_context(false),
4889 				rlp->rlim_cur);
4890 
4891 		default:
4892 			return B_BAD_VALUE;
4893 	}
4894 }
4895 
4896 
4897 status_t
4898 vfs_init(kernel_args* args)
4899 {
4900 	vnode::StaticInit();
4901 
4902 	struct vnode dummyVnode;
4903 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
4904 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
4905 	if (sVnodeTable == NULL)
4906 		panic("vfs_init: error creating vnode hash table\n");
4907 
4908 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
4909 
4910 	struct fs_mount dummyMount;
4911 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
4912 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
4913 	if (sMountsTable == NULL)
4914 		panic("vfs_init: error creating mounts hash table\n");
4915 
4916 	node_monitor_init();
4917 
4918 	sRoot = NULL;
4919 
4920 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
4921 
4922 	if (block_cache_init() != B_OK)
4923 		return B_ERROR;
4924 
4925 #ifdef ADD_DEBUGGER_COMMANDS
4926 	// add some debugger commands
4927 	add_debugger_command("vnode", &dump_vnode,
4928 		"info about the specified vnode");
4929 	add_debugger_command("vnodes", &dump_vnodes,
4930 		"list all vnodes (from the specified device)");
4931 	add_debugger_command("vnode_caches", &dump_vnode_caches,
4932 		"list all vnode caches");
4933 	add_debugger_command("mount", &dump_mount,
4934 		"info about the specified fs_mount");
4935 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
4936 	add_debugger_command("io_context", &dump_io_context,
4937 		"info about the I/O context");
4938 	add_debugger_command("vnode_usage", &dump_vnode_usage,
4939 		"info about vnode usage");
4940 #endif
4941 
4942 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
4943 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, 0);
4944 
4945 	file_map_init();
4946 
4947 	return file_cache_init();
4948 }
4949 
4950 
4951 //	#pragma mark - fd_ops implementations
4952 
4953 
4954 /*!
4955 	Calls fs_open() on the given vnode and returns a new
4956 	file descriptor for it
4957 */
4958 static int
4959 open_vnode(struct vnode* vnode, int openMode, bool kernel)
4960 {
4961 	void* cookie;
4962 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
4963 	if (status != B_OK)
4964 		return status;
4965 
4966 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
4967 	if (fd < 0) {
4968 		FS_CALL(vnode, close, cookie);
4969 		FS_CALL(vnode, free_cookie, cookie);
4970 	}
4971 	return fd;
4972 }
4973 
4974 
4975 /*!
4976 	Calls fs_open() on the given vnode and returns a new
4977 	file descriptor for it
4978 */
4979 static int
4980 create_vnode(struct vnode* directory, const char* name, int openMode,
4981 	int perms, bool kernel)
4982 {
4983 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
4984 	status_t status = B_ERROR;
4985 	struct vnode* vnode;
4986 	void* cookie;
4987 	ino_t newID;
4988 
4989 	// This is somewhat tricky: If the entry already exists, the FS responsible
4990 	// for the directory might not necessarily also be the one responsible for
4991 	// the node the entry refers to. So we can actually never call the create()
4992 	// hook without O_EXCL. Instead we try to look the entry up first. If it
4993 	// already exists, we just open the node (unless O_EXCL), otherwise we call
4994 	// create() with O_EXCL. This introduces a race condition, since someone
4995 	// else might have created the entry in the meantime. We hope the respective
4996 	// FS returns the correct error code and retry (up to 3 times) again.
4997 
4998 	for (int i = 0; i < 3 && status != B_OK; i++) {
4999 		// look the node up
5000 		status = lookup_dir_entry(directory, name, &vnode);
5001 		if (status == B_OK) {
5002 			VNodePutter putter(vnode);
5003 
5004 			if ((openMode & O_EXCL) != 0)
5005 				return B_FILE_EXISTS;
5006 
5007 			// If the node is a symlink, we have to follow it, unless
5008 			// O_NOTRAVERSE is set.
5009 			if (S_ISLNK(vnode->Type()) && traverse) {
5010 				putter.Put();
5011 				char clonedName[B_FILE_NAME_LENGTH + 1];
5012 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5013 						>= B_FILE_NAME_LENGTH) {
5014 					return B_NAME_TOO_LONG;
5015 				}
5016 
5017 				inc_vnode_ref_count(directory);
5018 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5019 					kernel, &vnode, NULL);
5020 				if (status != B_OK)
5021 					return status;
5022 
5023 				putter.SetTo(vnode);
5024 			}
5025 
5026 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5027 				put_vnode(vnode);
5028 				return B_LINK_LIMIT;
5029 			}
5030 
5031 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5032 			// on success keep the vnode reference for the FD
5033 			if (fd >= 0)
5034 				putter.Detach();
5035 
5036 			return fd;
5037 		}
5038 
5039 		// it doesn't exist yet -- try to create it
5040 
5041 		if (!HAS_FS_CALL(directory, create))
5042 			return EROFS;
5043 
5044 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5045 			&cookie, &newID);
5046 		if (status != B_OK
5047 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5048 			return status;
5049 		}
5050 	}
5051 
5052 	if (status != B_OK)
5053 		return status;
5054 
5055 	// the node has been created successfully
5056 
5057 	rw_lock_read_lock(&sVnodeLock);
5058 	vnode = lookup_vnode(directory->device, newID);
5059 	rw_lock_read_unlock(&sVnodeLock);
5060 
5061 	if (vnode == NULL) {
5062 		panic("vfs: fs_create() returned success but there is no vnode, "
5063 			"mount ID %ld!\n", directory->device);
5064 		return B_BAD_VALUE;
5065 	}
5066 
5067 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5068 	if (fd >= 0)
5069 		return fd;
5070 
5071 	status = fd;
5072 
5073 	// something went wrong, clean up
5074 
5075 	FS_CALL(vnode, close, cookie);
5076 	FS_CALL(vnode, free_cookie, cookie);
5077 	put_vnode(vnode);
5078 
5079 	FS_CALL(directory, unlink, name);
5080 
5081 	return status;
5082 }
5083 
5084 
5085 /*! Calls fs open_dir() on the given vnode and returns a new
5086 	file descriptor for it
5087 */
5088 static int
5089 open_dir_vnode(struct vnode* vnode, bool kernel)
5090 {
5091 	void* cookie;
5092 	int status;
5093 
5094 	status = FS_CALL(vnode, open_dir, &cookie);
5095 	if (status != B_OK)
5096 		return status;
5097 
5098 	// directory is opened, create a fd
5099 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5100 	if (status >= 0)
5101 		return status;
5102 
5103 	FS_CALL(vnode, close_dir, cookie);
5104 	FS_CALL(vnode, free_dir_cookie, cookie);
5105 
5106 	return status;
5107 }
5108 
5109 
5110 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5111 	file descriptor for it.
5112 	Used by attr_dir_open(), and attr_dir_open_fd().
5113 */
5114 static int
5115 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5116 {
5117 	void* cookie;
5118 	int status;
5119 
5120 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5121 		return EOPNOTSUPP;
5122 
5123 	status = FS_CALL(vnode, open_attr_dir, &cookie);
5124 	if (status != B_OK)
5125 		return status;
5126 
5127 	// directory is opened, create a fd
5128 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5129 	if (status >= 0)
5130 		return status;
5131 
5132 	FS_CALL(vnode, close_attr_dir, cookie);
5133 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5134 
5135 	return status;
5136 }
5137 
5138 
5139 static int
5140 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5141 	int openMode, int perms, bool kernel)
5142 {
5143 	struct vnode* directory;
5144 	int status;
5145 
5146 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5147 		"kernel %d\n", name, openMode, perms, kernel));
5148 
5149 	// get directory to put the new file in
5150 	status = get_vnode(mountID, directoryID, &directory, true, false);
5151 	if (status != B_OK)
5152 		return status;
5153 
5154 	status = create_vnode(directory, name, openMode, perms, kernel);
5155 	put_vnode(directory);
5156 
5157 	return status;
5158 }
5159 
5160 
5161 static int
5162 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5163 {
5164 	char name[B_FILE_NAME_LENGTH];
5165 	struct vnode* directory;
5166 	int status;
5167 
5168 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5169 		openMode, perms, kernel));
5170 
5171 	// get directory to put the new file in
5172 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5173 	if (status < 0)
5174 		return status;
5175 
5176 	status = create_vnode(directory, name, openMode, perms, kernel);
5177 
5178 	put_vnode(directory);
5179 	return status;
5180 }
5181 
5182 
5183 static int
5184 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5185 	int openMode, bool kernel)
5186 {
5187 	if (name == NULL || *name == '\0')
5188 		return B_BAD_VALUE;
5189 
5190 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5191 		mountID, directoryID, name, openMode));
5192 
5193 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5194 
5195 	// get the vnode matching the entry_ref
5196 	struct vnode* vnode;
5197 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5198 		kernel, &vnode);
5199 	if (status != B_OK)
5200 		return status;
5201 
5202 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5203 		put_vnode(vnode);
5204 		return B_LINK_LIMIT;
5205 	}
5206 
5207 	int fd = open_vnode(vnode, openMode, kernel);
5208 	if (fd < 0)
5209 		put_vnode(vnode);
5210 
5211 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID,
5212 		vnode->id, name);
5213 	return fd;
5214 }
5215 
5216 
5217 static int
5218 file_open(int fd, char* path, int openMode, bool kernel)
5219 {
5220 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5221 
5222 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5223 		fd, path, openMode, kernel));
5224 
5225 	// get the vnode matching the vnode + path combination
5226 	struct vnode* vnode;
5227 	ino_t parentID;
5228 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5229 		&parentID, kernel);
5230 	if (status != B_OK)
5231 		return status;
5232 
5233 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5234 		put_vnode(vnode);
5235 		return B_LINK_LIMIT;
5236 	}
5237 
5238 	// open the vnode
5239 	int newFD = open_vnode(vnode, openMode, kernel);
5240 	// put only on error -- otherwise our reference was transferred to the FD
5241 	if (newFD < 0)
5242 		put_vnode(vnode);
5243 
5244 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5245 		vnode->device, parentID, vnode->id, NULL);
5246 
5247 	return newFD;
5248 }
5249 
5250 
5251 static status_t
5252 file_close(struct file_descriptor* descriptor)
5253 {
5254 	struct vnode* vnode = descriptor->u.vnode;
5255 	status_t status = B_OK;
5256 
5257 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5258 
5259 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5260 		vnode->id);
5261 	if (HAS_FS_CALL(vnode, close)) {
5262 		status = FS_CALL(vnode, close, descriptor->cookie);
5263 	}
5264 
5265 	if (status == B_OK) {
5266 		// remove all outstanding locks for this team
5267 		release_advisory_lock(vnode, NULL);
5268 	}
5269 	return status;
5270 }
5271 
5272 
5273 static void
5274 file_free_fd(struct file_descriptor* descriptor)
5275 {
5276 	struct vnode* vnode = descriptor->u.vnode;
5277 
5278 	if (vnode != NULL) {
5279 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5280 		put_vnode(vnode);
5281 	}
5282 }
5283 
5284 
5285 static status_t
5286 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5287 	size_t* length)
5288 {
5289 	struct vnode* vnode = descriptor->u.vnode;
5290 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5291 		*length));
5292 
5293 	if (S_ISDIR(vnode->Type()))
5294 		return B_IS_A_DIRECTORY;
5295 
5296 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5297 }
5298 
5299 
5300 static status_t
5301 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5302 	size_t* length)
5303 {
5304 	struct vnode* vnode = descriptor->u.vnode;
5305 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5306 
5307 	if (S_ISDIR(vnode->Type()))
5308 		return B_IS_A_DIRECTORY;
5309 	if (!HAS_FS_CALL(vnode, write))
5310 		return EROFS;
5311 
5312 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5313 }
5314 
5315 
5316 static off_t
5317 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5318 {
5319 	struct vnode* vnode = descriptor->u.vnode;
5320 	off_t offset;
5321 
5322 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5323 
5324 	// some kinds of files are not seekable
5325 	switch (vnode->Type() & S_IFMT) {
5326 		case S_IFIFO:
5327 		case S_IFSOCK:
5328 			return ESPIPE;
5329 
5330 		// The Open Group Base Specs don't mention any file types besides pipes,
5331 		// fifos, and sockets specially, so we allow seeking them.
5332 		case S_IFREG:
5333 		case S_IFBLK:
5334 		case S_IFDIR:
5335 		case S_IFLNK:
5336 		case S_IFCHR:
5337 			break;
5338 	}
5339 
5340 	switch (seekType) {
5341 		case SEEK_SET:
5342 			offset = 0;
5343 			break;
5344 		case SEEK_CUR:
5345 			offset = descriptor->pos;
5346 			break;
5347 		case SEEK_END:
5348 		{
5349 			// stat() the node
5350 			if (!HAS_FS_CALL(vnode, read_stat))
5351 				return EOPNOTSUPP;
5352 
5353 			struct stat stat;
5354 			status_t status = FS_CALL(vnode, read_stat, &stat);
5355 			if (status != B_OK)
5356 				return status;
5357 
5358 			offset = stat.st_size;
5359 			break;
5360 		}
5361 		default:
5362 			return B_BAD_VALUE;
5363 	}
5364 
5365 	// assumes off_t is 64 bits wide
5366 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5367 		return EOVERFLOW;
5368 
5369 	pos += offset;
5370 	if (pos < 0)
5371 		return B_BAD_VALUE;
5372 
5373 	return descriptor->pos = pos;
5374 }
5375 
5376 
5377 static status_t
5378 file_select(struct file_descriptor* descriptor, uint8 event,
5379 	struct selectsync* sync)
5380 {
5381 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5382 
5383 	struct vnode* vnode = descriptor->u.vnode;
5384 
5385 	// If the FS has no select() hook, notify select() now.
5386 	if (!HAS_FS_CALL(vnode, select))
5387 		return notify_select_event(sync, event);
5388 
5389 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5390 }
5391 
5392 
5393 static status_t
5394 file_deselect(struct file_descriptor* descriptor, uint8 event,
5395 	struct selectsync* sync)
5396 {
5397 	struct vnode* vnode = descriptor->u.vnode;
5398 
5399 	if (!HAS_FS_CALL(vnode, deselect))
5400 		return B_OK;
5401 
5402 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5403 }
5404 
5405 
5406 static status_t
5407 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5408 	bool kernel)
5409 {
5410 	struct vnode* vnode;
5411 	status_t status;
5412 
5413 	if (name == NULL || *name == '\0')
5414 		return B_BAD_VALUE;
5415 
5416 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5417 		"perms = %d)\n", mountID, parentID, name, perms));
5418 
5419 	status = get_vnode(mountID, parentID, &vnode, true, false);
5420 	if (status != B_OK)
5421 		return status;
5422 
5423 	if (HAS_FS_CALL(vnode, create_dir))
5424 		status = FS_CALL(vnode, create_dir, name, perms);
5425 	else
5426 		status = EROFS;
5427 
5428 	put_vnode(vnode);
5429 	return status;
5430 }
5431 
5432 
5433 static status_t
5434 dir_create(int fd, char* path, int perms, bool kernel)
5435 {
5436 	char filename[B_FILE_NAME_LENGTH];
5437 	struct vnode* vnode;
5438 	status_t status;
5439 
5440 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5441 		kernel));
5442 
5443 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5444 	if (status < 0)
5445 		return status;
5446 
5447 	if (HAS_FS_CALL(vnode, create_dir)) {
5448 		status = FS_CALL(vnode, create_dir, filename, perms);
5449 	} else
5450 		status = EROFS;
5451 
5452 	put_vnode(vnode);
5453 	return status;
5454 }
5455 
5456 
5457 static int
5458 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5459 {
5460 	struct vnode* vnode;
5461 	int status;
5462 
5463 	FUNCTION(("dir_open_entry_ref()\n"));
5464 
5465 	if (name && *name == '\0')
5466 		return B_BAD_VALUE;
5467 
5468 	// get the vnode matching the entry_ref/node_ref
5469 	if (name) {
5470 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5471 			&vnode);
5472 	} else
5473 		status = get_vnode(mountID, parentID, &vnode, true, false);
5474 	if (status != B_OK)
5475 		return status;
5476 
5477 	int fd = open_dir_vnode(vnode, kernel);
5478 	if (fd < 0)
5479 		put_vnode(vnode);
5480 
5481 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5482 		vnode->id, name);
5483 	return fd;
5484 }
5485 
5486 
5487 static int
5488 dir_open(int fd, char* path, bool kernel)
5489 {
5490 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5491 		kernel));
5492 
5493 	// get the vnode matching the vnode + path combination
5494 	struct vnode* vnode = NULL;
5495 	ino_t parentID;
5496 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5497 		kernel);
5498 	if (status != B_OK)
5499 		return status;
5500 
5501 	// open the dir
5502 	int newFD = open_dir_vnode(vnode, kernel);
5503 	if (newFD < 0)
5504 		put_vnode(vnode);
5505 
5506 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID,
5507 		vnode->id, NULL);
5508 	return newFD;
5509 }
5510 
5511 
5512 static status_t
5513 dir_close(struct file_descriptor* descriptor)
5514 {
5515 	struct vnode* vnode = descriptor->u.vnode;
5516 
5517 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5518 
5519 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5520 		vnode->id);
5521 	if (HAS_FS_CALL(vnode, close_dir))
5522 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5523 
5524 	return B_OK;
5525 }
5526 
5527 
5528 static void
5529 dir_free_fd(struct file_descriptor* descriptor)
5530 {
5531 	struct vnode* vnode = descriptor->u.vnode;
5532 
5533 	if (vnode != NULL) {
5534 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5535 		put_vnode(vnode);
5536 	}
5537 }
5538 
5539 
5540 static status_t
5541 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5542 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5543 {
5544 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5545 		bufferSize, _count);
5546 }
5547 
5548 
5549 static status_t
5550 fix_dirent(struct vnode* parent, struct dirent* entry,
5551 	struct io_context* ioContext)
5552 {
5553 	// set d_pdev and d_pino
5554 	entry->d_pdev = parent->device;
5555 	entry->d_pino = parent->id;
5556 
5557 	// If this is the ".." entry and the directory is the root of a FS,
5558 	// we need to replace d_dev and d_ino with the actual values.
5559 	if (strcmp(entry->d_name, "..") == 0
5560 		&& parent->mount->root_vnode == parent
5561 		&& parent->mount->covers_vnode) {
5562 		inc_vnode_ref_count(parent);
5563 			// vnode_path_to_vnode() puts the node
5564 
5565 		// Make sure the IO context root is not bypassed.
5566 		if (parent == ioContext->root) {
5567 			entry->d_dev = parent->device;
5568 			entry->d_ino = parent->id;
5569 		} else {
5570 			// ".." is guaranteed not to be clobbered by this call
5571 			struct vnode* vnode;
5572 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5573 				ioContext, &vnode, NULL);
5574 
5575 			if (status == B_OK) {
5576 				entry->d_dev = vnode->device;
5577 				entry->d_ino = vnode->id;
5578 			}
5579 		}
5580 	} else {
5581 		// resolve mount points
5582 		ReadLocker _(&sVnodeLock);
5583 
5584 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5585 		if (vnode != NULL) {
5586 			if (vnode->covered_by != NULL) {
5587 				entry->d_dev = vnode->covered_by->device;
5588 				entry->d_ino = vnode->covered_by->id;
5589 			}
5590 		}
5591 	}
5592 
5593 	return B_OK;
5594 }
5595 
5596 
5597 static status_t
5598 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5599 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5600 {
5601 	if (!HAS_FS_CALL(vnode, read_dir))
5602 		return EOPNOTSUPP;
5603 
5604 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5605 		_count);
5606 	if (error != B_OK)
5607 		return error;
5608 
5609 	// we need to adjust the read dirents
5610 	uint32 count = *_count;
5611 	for (uint32 i = 0; i < count; i++) {
5612 		error = fix_dirent(vnode, buffer, ioContext);
5613 		if (error != B_OK)
5614 			return error;
5615 
5616 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5617 	}
5618 
5619 	return error;
5620 }
5621 
5622 
5623 static status_t
5624 dir_rewind(struct file_descriptor* descriptor)
5625 {
5626 	struct vnode* vnode = descriptor->u.vnode;
5627 
5628 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5629 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5630 	}
5631 
5632 	return EOPNOTSUPP;
5633 }
5634 
5635 
5636 static status_t
5637 dir_remove(int fd, char* path, bool kernel)
5638 {
5639 	char name[B_FILE_NAME_LENGTH];
5640 	struct vnode* directory;
5641 	status_t status;
5642 
5643 	if (path != NULL) {
5644 		// we need to make sure our path name doesn't stop with "/", ".",
5645 		// or ".."
5646 		char* lastSlash = strrchr(path, '/');
5647 		if (lastSlash != NULL) {
5648 			char* leaf = lastSlash + 1;
5649 			if (!strcmp(leaf, ".."))
5650 				return B_NOT_ALLOWED;
5651 
5652 			// omit multiple slashes
5653 			while (lastSlash > path && lastSlash[-1] == '/') {
5654 				lastSlash--;
5655 			}
5656 
5657 			if (!leaf[0]
5658 				|| !strcmp(leaf, ".")) {
5659 				// "name/" -> "name", or "name/." -> "name"
5660 				lastSlash[0] = '\0';
5661 			}
5662 		}
5663 
5664 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5665 			return B_NOT_ALLOWED;
5666 	}
5667 
5668 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5669 	if (status != B_OK)
5670 		return status;
5671 
5672 	if (HAS_FS_CALL(directory, remove_dir))
5673 		status = FS_CALL(directory, remove_dir, name);
5674 	else
5675 		status = EROFS;
5676 
5677 	put_vnode(directory);
5678 	return status;
5679 }
5680 
5681 
5682 static status_t
5683 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5684 	size_t length)
5685 {
5686 	struct vnode* vnode = descriptor->u.vnode;
5687 
5688 	if (HAS_FS_CALL(vnode, ioctl))
5689 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5690 
5691 	return EOPNOTSUPP;
5692 }
5693 
5694 
5695 static status_t
5696 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5697 {
5698 	struct flock flock;
5699 
5700 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5701 		fd, op, argument, kernel ? "kernel" : "user"));
5702 
5703 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5704 		fd);
5705 	if (descriptor == NULL)
5706 		return B_FILE_ERROR;
5707 
5708 	struct vnode* vnode = fd_vnode(descriptor);
5709 
5710 	status_t status = B_OK;
5711 
5712 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5713 		if (descriptor->type != FDTYPE_FILE)
5714 			status = B_BAD_VALUE;
5715 		else if (user_memcpy(&flock, (struct flock*)argument,
5716 				sizeof(struct flock)) != B_OK)
5717 			status = B_BAD_ADDRESS;
5718 
5719 		if (status != B_OK) {
5720 			put_fd(descriptor);
5721 			return status;
5722 		}
5723 	}
5724 
5725 	switch (op) {
5726 		case F_SETFD:
5727 		{
5728 			struct io_context* context = get_current_io_context(kernel);
5729 			// Set file descriptor flags
5730 
5731 			// O_CLOEXEC is the only flag available at this time
5732 			mutex_lock(&context->io_mutex);
5733 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5734 			mutex_unlock(&context->io_mutex);
5735 
5736 			status = B_OK;
5737 			break;
5738 		}
5739 
5740 		case F_GETFD:
5741 		{
5742 			struct io_context* context = get_current_io_context(kernel);
5743 
5744 			// Get file descriptor flags
5745 			mutex_lock(&context->io_mutex);
5746 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5747 			mutex_unlock(&context->io_mutex);
5748 			break;
5749 		}
5750 
5751 		case F_SETFL:
5752 			// Set file descriptor open mode
5753 
5754 			// we only accept changes to O_APPEND and O_NONBLOCK
5755 			argument &= O_APPEND | O_NONBLOCK;
5756 			if (descriptor->ops->fd_set_flags != NULL) {
5757 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5758 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5759 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5760 					(int)argument);
5761 			} else
5762 				status = EOPNOTSUPP;
5763 
5764 			if (status == B_OK) {
5765 				// update this descriptor's open_mode field
5766 				descriptor->open_mode = (descriptor->open_mode
5767 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5768 			}
5769 
5770 			break;
5771 
5772 		case F_GETFL:
5773 			// Get file descriptor open mode
5774 			status = descriptor->open_mode;
5775 			break;
5776 
5777 		case F_DUPFD:
5778 		{
5779 			struct io_context* context = get_current_io_context(kernel);
5780 
5781 			status = new_fd_etc(context, descriptor, (int)argument);
5782 			if (status >= 0) {
5783 				mutex_lock(&context->io_mutex);
5784 				fd_set_close_on_exec(context, fd, false);
5785 				mutex_unlock(&context->io_mutex);
5786 
5787 				atomic_add(&descriptor->ref_count, 1);
5788 			}
5789 			break;
5790 		}
5791 
5792 		case F_GETLK:
5793 			if (vnode != NULL) {
5794 				status = get_advisory_lock(vnode, &flock);
5795 				if (status == B_OK) {
5796 					// copy back flock structure
5797 					status = user_memcpy((struct flock*)argument, &flock,
5798 						sizeof(struct flock));
5799 				}
5800 			} else
5801 				status = B_BAD_VALUE;
5802 			break;
5803 
5804 		case F_SETLK:
5805 		case F_SETLKW:
5806 			status = normalize_flock(descriptor, &flock);
5807 			if (status != B_OK)
5808 				break;
5809 
5810 			if (vnode == NULL) {
5811 				status = B_BAD_VALUE;
5812 			} else if (flock.l_type == F_UNLCK) {
5813 				status = release_advisory_lock(vnode, &flock);
5814 			} else {
5815 				// the open mode must match the lock type
5816 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5817 						&& flock.l_type == F_WRLCK)
5818 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5819 						&& flock.l_type == F_RDLCK))
5820 					status = B_FILE_ERROR;
5821 				else {
5822 					status = acquire_advisory_lock(vnode, -1,
5823 						&flock, op == F_SETLKW);
5824 				}
5825 			}
5826 			break;
5827 
5828 		// ToDo: add support for more ops?
5829 
5830 		default:
5831 			status = B_BAD_VALUE;
5832 	}
5833 
5834 	put_fd(descriptor);
5835 	return status;
5836 }
5837 
5838 
5839 static status_t
5840 common_sync(int fd, bool kernel)
5841 {
5842 	struct file_descriptor* descriptor;
5843 	struct vnode* vnode;
5844 	status_t status;
5845 
5846 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
5847 
5848 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5849 	if (descriptor == NULL)
5850 		return B_FILE_ERROR;
5851 
5852 	if (HAS_FS_CALL(vnode, fsync))
5853 		status = FS_CALL_NO_PARAMS(vnode, fsync);
5854 	else
5855 		status = EOPNOTSUPP;
5856 
5857 	put_fd(descriptor);
5858 	return status;
5859 }
5860 
5861 
5862 static status_t
5863 common_lock_node(int fd, bool kernel)
5864 {
5865 	struct file_descriptor* descriptor;
5866 	struct vnode* vnode;
5867 
5868 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5869 	if (descriptor == NULL)
5870 		return B_FILE_ERROR;
5871 
5872 	status_t status = B_OK;
5873 
5874 	// We need to set the locking atomically - someone
5875 	// else might set one at the same time
5876 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
5877 			(file_descriptor*)NULL) != NULL)
5878 		status = B_BUSY;
5879 
5880 	put_fd(descriptor);
5881 	return status;
5882 }
5883 
5884 
5885 static status_t
5886 common_unlock_node(int fd, bool kernel)
5887 {
5888 	struct file_descriptor* descriptor;
5889 	struct vnode* vnode;
5890 
5891 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5892 	if (descriptor == NULL)
5893 		return B_FILE_ERROR;
5894 
5895 	status_t status = B_OK;
5896 
5897 	// We need to set the locking atomically - someone
5898 	// else might set one at the same time
5899 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
5900 			(file_descriptor*)NULL, descriptor) != descriptor)
5901 		status = B_BAD_VALUE;
5902 
5903 	put_fd(descriptor);
5904 	return status;
5905 }
5906 
5907 
5908 static status_t
5909 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
5910 	bool kernel)
5911 {
5912 	struct vnode* vnode;
5913 	status_t status;
5914 
5915 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
5916 	if (status != B_OK)
5917 		return status;
5918 
5919 	if (HAS_FS_CALL(vnode, read_symlink)) {
5920 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
5921 	} else
5922 		status = B_BAD_VALUE;
5923 
5924 	put_vnode(vnode);
5925 	return status;
5926 }
5927 
5928 
5929 static status_t
5930 common_create_symlink(int fd, char* path, const char* toPath, int mode,
5931 	bool kernel)
5932 {
5933 	// path validity checks have to be in the calling function!
5934 	char name[B_FILE_NAME_LENGTH];
5935 	struct vnode* vnode;
5936 	status_t status;
5937 
5938 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
5939 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
5940 
5941 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
5942 	if (status != B_OK)
5943 		return status;
5944 
5945 	if (HAS_FS_CALL(vnode, create_symlink))
5946 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
5947 	else {
5948 		status = HAS_FS_CALL(vnode, write)
5949 			? B_NOT_SUPPORTED : B_READ_ONLY_DEVICE;
5950 	}
5951 
5952 	put_vnode(vnode);
5953 
5954 	return status;
5955 }
5956 
5957 
5958 static status_t
5959 common_create_link(int pathFD, char* path, int toFD, char* toPath,
5960 	bool traverseLeafLink, bool kernel)
5961 {
5962 	// path validity checks have to be in the calling function!
5963 
5964 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
5965 		toPath, kernel));
5966 
5967 	char name[B_FILE_NAME_LENGTH];
5968 	struct vnode* directory;
5969 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
5970 		kernel);
5971 	if (status != B_OK)
5972 		return status;
5973 
5974 	struct vnode* vnode;
5975 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
5976 		kernel);
5977 	if (status != B_OK)
5978 		goto err;
5979 
5980 	if (directory->mount != vnode->mount) {
5981 		status = B_CROSS_DEVICE_LINK;
5982 		goto err1;
5983 	}
5984 
5985 	if (HAS_FS_CALL(directory, link))
5986 		status = FS_CALL(directory, link, name, vnode);
5987 	else
5988 		status = EROFS;
5989 
5990 err1:
5991 	put_vnode(vnode);
5992 err:
5993 	put_vnode(directory);
5994 
5995 	return status;
5996 }
5997 
5998 
5999 static status_t
6000 common_unlink(int fd, char* path, bool kernel)
6001 {
6002 	char filename[B_FILE_NAME_LENGTH];
6003 	struct vnode* vnode;
6004 	status_t status;
6005 
6006 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6007 		kernel));
6008 
6009 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6010 	if (status < 0)
6011 		return status;
6012 
6013 	if (HAS_FS_CALL(vnode, unlink))
6014 		status = FS_CALL(vnode, unlink, filename);
6015 	else
6016 		status = EROFS;
6017 
6018 	put_vnode(vnode);
6019 
6020 	return status;
6021 }
6022 
6023 
6024 static status_t
6025 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6026 {
6027 	struct vnode* vnode;
6028 	status_t status;
6029 
6030 	// TODO: honor effectiveUserGroup argument
6031 
6032 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6033 	if (status != B_OK)
6034 		return status;
6035 
6036 	if (HAS_FS_CALL(vnode, access))
6037 		status = FS_CALL(vnode, access, mode);
6038 	else
6039 		status = B_OK;
6040 
6041 	put_vnode(vnode);
6042 
6043 	return status;
6044 }
6045 
6046 
6047 static status_t
6048 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6049 {
6050 	struct vnode* fromVnode;
6051 	struct vnode* toVnode;
6052 	char fromName[B_FILE_NAME_LENGTH];
6053 	char toName[B_FILE_NAME_LENGTH];
6054 	status_t status;
6055 
6056 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6057 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6058 
6059 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6060 	if (status != B_OK)
6061 		return status;
6062 
6063 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6064 	if (status != B_OK)
6065 		goto err1;
6066 
6067 	if (fromVnode->device != toVnode->device) {
6068 		status = B_CROSS_DEVICE_LINK;
6069 		goto err2;
6070 	}
6071 
6072 	if (fromName[0] == '\0' || toName == '\0'
6073 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6074 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6075 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6076 		status = B_BAD_VALUE;
6077 		goto err2;
6078 	}
6079 
6080 	if (HAS_FS_CALL(fromVnode, rename))
6081 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6082 	else
6083 		status = EROFS;
6084 
6085 err2:
6086 	put_vnode(toVnode);
6087 err1:
6088 	put_vnode(fromVnode);
6089 
6090 	return status;
6091 }
6092 
6093 
6094 static status_t
6095 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6096 {
6097 	struct vnode* vnode = descriptor->u.vnode;
6098 
6099 	FUNCTION(("common_read_stat: stat %p\n", stat));
6100 
6101 	// TODO: remove this once all file systems properly set them!
6102 	stat->st_crtim.tv_nsec = 0;
6103 	stat->st_ctim.tv_nsec = 0;
6104 	stat->st_mtim.tv_nsec = 0;
6105 	stat->st_atim.tv_nsec = 0;
6106 
6107 	status_t status = FS_CALL(vnode, read_stat, stat);
6108 
6109 	// fill in the st_dev and st_ino fields
6110 	if (status == B_OK) {
6111 		stat->st_dev = vnode->device;
6112 		stat->st_ino = vnode->id;
6113 		stat->st_rdev = -1;
6114 	}
6115 
6116 	return status;
6117 }
6118 
6119 
6120 static status_t
6121 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6122 	int statMask)
6123 {
6124 	struct vnode* vnode = descriptor->u.vnode;
6125 
6126 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6127 		vnode, stat, statMask));
6128 
6129 	if (!HAS_FS_CALL(vnode, write_stat))
6130 		return EROFS;
6131 
6132 	return FS_CALL(vnode, write_stat, stat, statMask);
6133 }
6134 
6135 
6136 static status_t
6137 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6138 	struct stat* stat, bool kernel)
6139 {
6140 	struct vnode* vnode;
6141 	status_t status;
6142 
6143 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6144 		stat));
6145 
6146 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6147 		kernel);
6148 	if (status < 0)
6149 		return status;
6150 
6151 	status = FS_CALL(vnode, read_stat, stat);
6152 
6153 	// fill in the st_dev and st_ino fields
6154 	if (status == B_OK) {
6155 		stat->st_dev = vnode->device;
6156 		stat->st_ino = vnode->id;
6157 		stat->st_rdev = -1;
6158 	}
6159 
6160 	put_vnode(vnode);
6161 	return status;
6162 }
6163 
6164 
6165 static status_t
6166 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6167 	const struct stat* stat, int statMask, bool kernel)
6168 {
6169 	struct vnode* vnode;
6170 	status_t status;
6171 
6172 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6173 		"kernel %d\n", fd, path, stat, statMask, kernel));
6174 
6175 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6176 		kernel);
6177 	if (status < 0)
6178 		return status;
6179 
6180 	if (HAS_FS_CALL(vnode, write_stat))
6181 		status = FS_CALL(vnode, write_stat, stat, statMask);
6182 	else
6183 		status = EROFS;
6184 
6185 	put_vnode(vnode);
6186 
6187 	return status;
6188 }
6189 
6190 
6191 static int
6192 attr_dir_open(int fd, char* path, bool kernel)
6193 {
6194 	struct vnode* vnode;
6195 	int status;
6196 
6197 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6198 		kernel));
6199 
6200 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6201 	if (status != B_OK)
6202 		return status;
6203 
6204 	status = open_attr_dir_vnode(vnode, kernel);
6205 	if (status < 0)
6206 		put_vnode(vnode);
6207 
6208 	return status;
6209 }
6210 
6211 
6212 static status_t
6213 attr_dir_close(struct file_descriptor* descriptor)
6214 {
6215 	struct vnode* vnode = descriptor->u.vnode;
6216 
6217 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6218 
6219 	if (HAS_FS_CALL(vnode, close_attr_dir))
6220 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6221 
6222 	return B_OK;
6223 }
6224 
6225 
6226 static void
6227 attr_dir_free_fd(struct file_descriptor* descriptor)
6228 {
6229 	struct vnode* vnode = descriptor->u.vnode;
6230 
6231 	if (vnode != NULL) {
6232 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6233 		put_vnode(vnode);
6234 	}
6235 }
6236 
6237 
6238 static status_t
6239 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6240 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6241 {
6242 	struct vnode* vnode = descriptor->u.vnode;
6243 
6244 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6245 
6246 	if (HAS_FS_CALL(vnode, read_attr_dir))
6247 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6248 			bufferSize, _count);
6249 
6250 	return EOPNOTSUPP;
6251 }
6252 
6253 
6254 static status_t
6255 attr_dir_rewind(struct file_descriptor* descriptor)
6256 {
6257 	struct vnode* vnode = descriptor->u.vnode;
6258 
6259 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6260 
6261 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6262 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6263 
6264 	return EOPNOTSUPP;
6265 }
6266 
6267 
6268 static int
6269 attr_create(int fd, char* path, const char* name, uint32 type,
6270 	int openMode, bool kernel)
6271 {
6272 	if (name == NULL || *name == '\0')
6273 		return B_BAD_VALUE;
6274 
6275 	struct vnode* vnode;
6276 	status_t status = fd_and_path_to_vnode(fd, path,
6277 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6278 	if (status != B_OK)
6279 		return status;
6280 
6281 	if (!HAS_FS_CALL(vnode, create_attr)) {
6282 		status = EROFS;
6283 		goto err;
6284 	}
6285 
6286 	void* cookie;
6287 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6288 	if (status != B_OK)
6289 		goto err;
6290 
6291 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6292 	if (fd >= 0)
6293 		return fd;
6294 
6295 	status = fd;
6296 
6297 	FS_CALL(vnode, close_attr, cookie);
6298 	FS_CALL(vnode, free_attr_cookie, cookie);
6299 
6300 	FS_CALL(vnode, remove_attr, name);
6301 
6302 err:
6303 	put_vnode(vnode);
6304 
6305 	return status;
6306 }
6307 
6308 
6309 static int
6310 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6311 {
6312 	if (name == NULL || *name == '\0')
6313 		return B_BAD_VALUE;
6314 
6315 	struct vnode* vnode;
6316 	status_t status = fd_and_path_to_vnode(fd, path,
6317 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6318 	if (status != B_OK)
6319 		return status;
6320 
6321 	if (!HAS_FS_CALL(vnode, open_attr)) {
6322 		status = EOPNOTSUPP;
6323 		goto err;
6324 	}
6325 
6326 	void* cookie;
6327 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6328 	if (status != B_OK)
6329 		goto err;
6330 
6331 	// now we only need a file descriptor for this attribute and we're done
6332 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6333 	if (fd >= 0)
6334 		return fd;
6335 
6336 	status = fd;
6337 
6338 	FS_CALL(vnode, close_attr, cookie);
6339 	FS_CALL(vnode, free_attr_cookie, cookie);
6340 
6341 err:
6342 	put_vnode(vnode);
6343 
6344 	return status;
6345 }
6346 
6347 
6348 static status_t
6349 attr_close(struct file_descriptor* descriptor)
6350 {
6351 	struct vnode* vnode = descriptor->u.vnode;
6352 
6353 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6354 
6355 	if (HAS_FS_CALL(vnode, close_attr))
6356 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6357 
6358 	return B_OK;
6359 }
6360 
6361 
6362 static void
6363 attr_free_fd(struct file_descriptor* descriptor)
6364 {
6365 	struct vnode* vnode = descriptor->u.vnode;
6366 
6367 	if (vnode != NULL) {
6368 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6369 		put_vnode(vnode);
6370 	}
6371 }
6372 
6373 
6374 static status_t
6375 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6376 	size_t* length)
6377 {
6378 	struct vnode* vnode = descriptor->u.vnode;
6379 
6380 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6381 		*length));
6382 
6383 	if (!HAS_FS_CALL(vnode, read_attr))
6384 		return EOPNOTSUPP;
6385 
6386 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6387 }
6388 
6389 
6390 static status_t
6391 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6392 	size_t* length)
6393 {
6394 	struct vnode* vnode = descriptor->u.vnode;
6395 
6396 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6397 	if (!HAS_FS_CALL(vnode, write_attr))
6398 		return EOPNOTSUPP;
6399 
6400 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6401 }
6402 
6403 
6404 static off_t
6405 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6406 {
6407 	off_t offset;
6408 
6409 	switch (seekType) {
6410 		case SEEK_SET:
6411 			offset = 0;
6412 			break;
6413 		case SEEK_CUR:
6414 			offset = descriptor->pos;
6415 			break;
6416 		case SEEK_END:
6417 		{
6418 			struct vnode* vnode = descriptor->u.vnode;
6419 			if (!HAS_FS_CALL(vnode, read_stat))
6420 				return EOPNOTSUPP;
6421 
6422 			struct stat stat;
6423 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6424 				&stat);
6425 			if (status != B_OK)
6426 				return status;
6427 
6428 			offset = stat.st_size;
6429 			break;
6430 		}
6431 		default:
6432 			return B_BAD_VALUE;
6433 	}
6434 
6435 	// assumes off_t is 64 bits wide
6436 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6437 		return EOVERFLOW;
6438 
6439 	pos += offset;
6440 	if (pos < 0)
6441 		return B_BAD_VALUE;
6442 
6443 	return descriptor->pos = pos;
6444 }
6445 
6446 
6447 static status_t
6448 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6449 {
6450 	struct vnode* vnode = descriptor->u.vnode;
6451 
6452 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6453 
6454 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6455 		return EOPNOTSUPP;
6456 
6457 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6458 }
6459 
6460 
6461 static status_t
6462 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6463 	int statMask)
6464 {
6465 	struct vnode* vnode = descriptor->u.vnode;
6466 
6467 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6468 
6469 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6470 		return EROFS;
6471 
6472 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6473 }
6474 
6475 
6476 static status_t
6477 attr_remove(int fd, const char* name, bool kernel)
6478 {
6479 	struct file_descriptor* descriptor;
6480 	struct vnode* vnode;
6481 	status_t status;
6482 
6483 	if (name == NULL || *name == '\0')
6484 		return B_BAD_VALUE;
6485 
6486 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6487 		kernel));
6488 
6489 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6490 	if (descriptor == NULL)
6491 		return B_FILE_ERROR;
6492 
6493 	if (HAS_FS_CALL(vnode, remove_attr))
6494 		status = FS_CALL(vnode, remove_attr, name);
6495 	else
6496 		status = EROFS;
6497 
6498 	put_fd(descriptor);
6499 
6500 	return status;
6501 }
6502 
6503 
6504 static status_t
6505 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6506 	bool kernel)
6507 {
6508 	struct file_descriptor* fromDescriptor;
6509 	struct file_descriptor* toDescriptor;
6510 	struct vnode* fromVnode;
6511 	struct vnode* toVnode;
6512 	status_t status;
6513 
6514 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6515 		|| *toName == '\0')
6516 		return B_BAD_VALUE;
6517 
6518 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6519 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6520 
6521 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6522 	if (fromDescriptor == NULL)
6523 		return B_FILE_ERROR;
6524 
6525 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6526 	if (toDescriptor == NULL) {
6527 		status = B_FILE_ERROR;
6528 		goto err;
6529 	}
6530 
6531 	// are the files on the same volume?
6532 	if (fromVnode->device != toVnode->device) {
6533 		status = B_CROSS_DEVICE_LINK;
6534 		goto err1;
6535 	}
6536 
6537 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6538 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6539 	} else
6540 		status = EROFS;
6541 
6542 err1:
6543 	put_fd(toDescriptor);
6544 err:
6545 	put_fd(fromDescriptor);
6546 
6547 	return status;
6548 }
6549 
6550 
6551 static int
6552 index_dir_open(dev_t mountID, bool kernel)
6553 {
6554 	struct fs_mount* mount;
6555 	void* cookie;
6556 
6557 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6558 
6559 	status_t status = get_mount(mountID, &mount);
6560 	if (status != B_OK)
6561 		return status;
6562 
6563 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6564 		status = EOPNOTSUPP;
6565 		goto error;
6566 	}
6567 
6568 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6569 	if (status != B_OK)
6570 		goto error;
6571 
6572 	// get fd for the index directory
6573 	int fd;
6574 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6575 	if (fd >= 0)
6576 		return fd;
6577 
6578 	// something went wrong
6579 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6580 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6581 
6582 	status = fd;
6583 
6584 error:
6585 	put_mount(mount);
6586 	return status;
6587 }
6588 
6589 
6590 static status_t
6591 index_dir_close(struct file_descriptor* descriptor)
6592 {
6593 	struct fs_mount* mount = descriptor->u.mount;
6594 
6595 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6596 
6597 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6598 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6599 
6600 	return B_OK;
6601 }
6602 
6603 
6604 static void
6605 index_dir_free_fd(struct file_descriptor* descriptor)
6606 {
6607 	struct fs_mount* mount = descriptor->u.mount;
6608 
6609 	if (mount != NULL) {
6610 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6611 		put_mount(mount);
6612 	}
6613 }
6614 
6615 
6616 static status_t
6617 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6618 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6619 {
6620 	struct fs_mount* mount = descriptor->u.mount;
6621 
6622 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6623 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6624 			bufferSize, _count);
6625 	}
6626 
6627 	return EOPNOTSUPP;
6628 }
6629 
6630 
6631 static status_t
6632 index_dir_rewind(struct file_descriptor* descriptor)
6633 {
6634 	struct fs_mount* mount = descriptor->u.mount;
6635 
6636 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6637 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6638 
6639 	return EOPNOTSUPP;
6640 }
6641 
6642 
6643 static status_t
6644 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6645 	bool kernel)
6646 {
6647 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6648 		name, kernel));
6649 
6650 	struct fs_mount* mount;
6651 	status_t status = get_mount(mountID, &mount);
6652 	if (status != B_OK)
6653 		return status;
6654 
6655 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6656 		status = EROFS;
6657 		goto out;
6658 	}
6659 
6660 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6661 
6662 out:
6663 	put_mount(mount);
6664 	return status;
6665 }
6666 
6667 
6668 #if 0
6669 static status_t
6670 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6671 {
6672 	struct vnode* vnode = descriptor->u.vnode;
6673 
6674 	// ToDo: currently unused!
6675 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6676 	if (!HAS_FS_CALL(vnode, read_index_stat))
6677 		return EOPNOTSUPP;
6678 
6679 	return EOPNOTSUPP;
6680 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6681 }
6682 
6683 
6684 static void
6685 index_free_fd(struct file_descriptor* descriptor)
6686 {
6687 	struct vnode* vnode = descriptor->u.vnode;
6688 
6689 	if (vnode != NULL) {
6690 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6691 		put_vnode(vnode);
6692 	}
6693 }
6694 #endif
6695 
6696 
6697 static status_t
6698 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6699 	bool kernel)
6700 {
6701 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6702 		name, kernel));
6703 
6704 	struct fs_mount* mount;
6705 	status_t status = get_mount(mountID, &mount);
6706 	if (status != B_OK)
6707 		return status;
6708 
6709 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6710 		status = EOPNOTSUPP;
6711 		goto out;
6712 	}
6713 
6714 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6715 
6716 out:
6717 	put_mount(mount);
6718 	return status;
6719 }
6720 
6721 
6722 static status_t
6723 index_remove(dev_t mountID, const char* name, bool kernel)
6724 {
6725 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6726 		name, kernel));
6727 
6728 	struct fs_mount* mount;
6729 	status_t status = get_mount(mountID, &mount);
6730 	if (status != B_OK)
6731 		return status;
6732 
6733 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6734 		status = EROFS;
6735 		goto out;
6736 	}
6737 
6738 	status = FS_MOUNT_CALL(mount, remove_index, name);
6739 
6740 out:
6741 	put_mount(mount);
6742 	return status;
6743 }
6744 
6745 
6746 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6747 		It would be nice if the FS would find some more kernel support
6748 		for them.
6749 		For example, query parsing should be moved into the kernel.
6750 */
6751 static int
6752 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6753 	int32 token, bool kernel)
6754 {
6755 	struct fs_mount* mount;
6756 	void* cookie;
6757 
6758 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6759 		query, kernel));
6760 
6761 	status_t status = get_mount(device, &mount);
6762 	if (status != B_OK)
6763 		return status;
6764 
6765 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6766 		status = EOPNOTSUPP;
6767 		goto error;
6768 	}
6769 
6770 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6771 		&cookie);
6772 	if (status != B_OK)
6773 		goto error;
6774 
6775 	// get fd for the index directory
6776 	int fd;
6777 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
6778 	if (fd >= 0)
6779 		return fd;
6780 
6781 	status = fd;
6782 
6783 	// something went wrong
6784 	FS_MOUNT_CALL(mount, close_query, cookie);
6785 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6786 
6787 error:
6788 	put_mount(mount);
6789 	return status;
6790 }
6791 
6792 
6793 static status_t
6794 query_close(struct file_descriptor* descriptor)
6795 {
6796 	struct fs_mount* mount = descriptor->u.mount;
6797 
6798 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6799 
6800 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6801 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6802 
6803 	return B_OK;
6804 }
6805 
6806 
6807 static void
6808 query_free_fd(struct file_descriptor* descriptor)
6809 {
6810 	struct fs_mount* mount = descriptor->u.mount;
6811 
6812 	if (mount != NULL) {
6813 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6814 		put_mount(mount);
6815 	}
6816 }
6817 
6818 
6819 static status_t
6820 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6821 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6822 {
6823 	struct fs_mount* mount = descriptor->u.mount;
6824 
6825 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6826 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6827 			bufferSize, _count);
6828 	}
6829 
6830 	return EOPNOTSUPP;
6831 }
6832 
6833 
6834 static status_t
6835 query_rewind(struct file_descriptor* descriptor)
6836 {
6837 	struct fs_mount* mount = descriptor->u.mount;
6838 
6839 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6840 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6841 
6842 	return EOPNOTSUPP;
6843 }
6844 
6845 
6846 //	#pragma mark - General File System functions
6847 
6848 
6849 static dev_t
6850 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
6851 	const char* args, bool kernel)
6852 {
6853 	struct ::fs_mount* mount;
6854 	status_t status = B_OK;
6855 	fs_volume* volume = NULL;
6856 	int32 layer = 0;
6857 
6858 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
6859 
6860 	// The path is always safe, we just have to make sure that fsName is
6861 	// almost valid - we can't make any assumptions about args, though.
6862 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
6863 	// We'll get it from the DDM later.
6864 	if (fsName == NULL) {
6865 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
6866 			return B_BAD_VALUE;
6867 	} else if (fsName[0] == '\0')
6868 		return B_BAD_VALUE;
6869 
6870 	RecursiveLocker mountOpLocker(sMountOpLock);
6871 
6872 	// Helper to delete a newly created file device on failure.
6873 	// Not exactly beautiful, but helps to keep the code below cleaner.
6874 	struct FileDeviceDeleter {
6875 		FileDeviceDeleter() : id(-1) {}
6876 		~FileDeviceDeleter()
6877 		{
6878 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
6879 		}
6880 
6881 		partition_id id;
6882 	} fileDeviceDeleter;
6883 
6884 	// If the file system is not a "virtual" one, the device argument should
6885 	// point to a real file/device (if given at all).
6886 	// get the partition
6887 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
6888 	KPartition* partition = NULL;
6889 	KPath normalizedDevice;
6890 	bool newlyCreatedFileDevice = false;
6891 
6892 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
6893 		// normalize the device path
6894 		status = normalizedDevice.SetTo(device, true);
6895 		if (status != B_OK)
6896 			return status;
6897 
6898 		// get a corresponding partition from the DDM
6899 		partition = ddm->RegisterPartition(normalizedDevice.Path());
6900 		if (partition == NULL) {
6901 			// Partition not found: This either means, the user supplied
6902 			// an invalid path, or the path refers to an image file. We try
6903 			// to let the DDM create a file device for the path.
6904 			partition_id deviceID = ddm->CreateFileDevice(
6905 				normalizedDevice.Path(), &newlyCreatedFileDevice);
6906 			if (deviceID >= 0) {
6907 				partition = ddm->RegisterPartition(deviceID);
6908 				if (newlyCreatedFileDevice)
6909 					fileDeviceDeleter.id = deviceID;
6910 			}
6911 		}
6912 
6913 		if (!partition) {
6914 			TRACE(("fs_mount(): Partition `%s' not found.\n",
6915 				normalizedDevice.Path()));
6916 			return B_ENTRY_NOT_FOUND;
6917 		}
6918 
6919 		device = normalizedDevice.Path();
6920 			// correct path to file device
6921 	}
6922 	PartitionRegistrar partitionRegistrar(partition, true);
6923 
6924 	// Write lock the partition's device. For the time being, we keep the lock
6925 	// until we're done mounting -- not nice, but ensure, that no-one is
6926 	// interfering.
6927 	// TODO: Just mark the partition busy while mounting!
6928 	KDiskDevice* diskDevice = NULL;
6929 	if (partition) {
6930 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6931 		if (!diskDevice) {
6932 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
6933 			return B_ERROR;
6934 		}
6935 	}
6936 
6937 	DeviceWriteLocker writeLocker(diskDevice, true);
6938 		// this takes over the write lock acquired before
6939 
6940 	if (partition != NULL) {
6941 		// make sure, that the partition is not busy
6942 		if (partition->IsBusy()) {
6943 			TRACE(("fs_mount(): Partition is busy.\n"));
6944 			return B_BUSY;
6945 		}
6946 
6947 		// if no FS name had been supplied, we get it from the partition
6948 		if (fsName == NULL) {
6949 			KDiskSystem* diskSystem = partition->DiskSystem();
6950 			if (!diskSystem) {
6951 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
6952 					"recognize it.\n"));
6953 				return B_BAD_VALUE;
6954 			}
6955 
6956 			if (!diskSystem->IsFileSystem()) {
6957 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
6958 					"partitioning system.\n"));
6959 				return B_BAD_VALUE;
6960 			}
6961 
6962 			// The disk system name will not change, and the KDiskSystem
6963 			// object will not go away while the disk device is locked (and
6964 			// the partition has a reference to it), so this is safe.
6965 			fsName = diskSystem->Name();
6966 		}
6967 	}
6968 
6969 	mount = new(std::nothrow) (struct ::fs_mount);
6970 	if (mount == NULL)
6971 		return B_NO_MEMORY;
6972 
6973 	mount->device_name = strdup(device);
6974 		// "device" can be NULL
6975 
6976 	status = mount->entry_cache.Init();
6977 	if (status != B_OK)
6978 		goto err1;
6979 
6980 	// initialize structure
6981 	mount->id = sNextMountID++;
6982 	mount->partition = NULL;
6983 	mount->root_vnode = NULL;
6984 	mount->covers_vnode = NULL;
6985 	mount->unmounting = false;
6986 	mount->owns_file_device = false;
6987 	mount->volume = NULL;
6988 
6989 	// build up the volume(s)
6990 	while (true) {
6991 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
6992 		if (layerFSName == NULL) {
6993 			if (layer == 0) {
6994 				status = B_NO_MEMORY;
6995 				goto err1;
6996 			}
6997 
6998 			break;
6999 		}
7000 
7001 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7002 		if (volume == NULL) {
7003 			status = B_NO_MEMORY;
7004 			free(layerFSName);
7005 			goto err1;
7006 		}
7007 
7008 		volume->id = mount->id;
7009 		volume->partition = partition != NULL ? partition->ID() : -1;
7010 		volume->layer = layer++;
7011 		volume->private_volume = NULL;
7012 		volume->ops = NULL;
7013 		volume->sub_volume = NULL;
7014 		volume->super_volume = NULL;
7015 		volume->file_system = NULL;
7016 		volume->file_system_name = NULL;
7017 
7018 		volume->file_system_name = get_file_system_name(layerFSName);
7019 		if (volume->file_system_name == NULL) {
7020 			status = B_NO_MEMORY;
7021 			free(layerFSName);
7022 			free(volume);
7023 			goto err1;
7024 		}
7025 
7026 		volume->file_system = get_file_system(layerFSName);
7027 		if (volume->file_system == NULL) {
7028 			status = ENODEV;
7029 			free(layerFSName);
7030 			free(volume->file_system_name);
7031 			free(volume);
7032 			goto err1;
7033 		}
7034 
7035 		if (mount->volume == NULL)
7036 			mount->volume = volume;
7037 		else {
7038 			volume->super_volume = mount->volume;
7039 			mount->volume->sub_volume = volume;
7040 			mount->volume = volume;
7041 		}
7042 	}
7043 
7044 	// insert mount struct into list before we call FS's mount() function
7045 	// so that vnodes can be created for this mount
7046 	mutex_lock(&sMountMutex);
7047 	hash_insert(sMountsTable, mount);
7048 	mutex_unlock(&sMountMutex);
7049 
7050 	ino_t rootID;
7051 
7052 	if (!sRoot) {
7053 		// we haven't mounted anything yet
7054 		if (strcmp(path, "/") != 0) {
7055 			status = B_ERROR;
7056 			goto err2;
7057 		}
7058 
7059 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7060 			args, &rootID);
7061 		if (status != 0)
7062 			goto err2;
7063 	} else {
7064 		status = path_to_vnode(path, true, &mount->covers_vnode, NULL, kernel);
7065 		if (status != B_OK)
7066 			goto err2;
7067 
7068 		// make sure covered_vnode is a directory
7069 		if (!S_ISDIR(mount->covers_vnode->Type())) {
7070 			status = B_NOT_A_DIRECTORY;
7071 			goto err3;
7072 		}
7073 
7074 		if (mount->covers_vnode->mount->root_vnode == mount->covers_vnode) {
7075 			// this is already a mount point
7076 			status = B_BUSY;
7077 			goto err3;
7078 		}
7079 
7080 		// mount it/them
7081 		fs_volume* volume = mount->volume;
7082 		while (volume) {
7083 			status = volume->file_system->mount(volume, device, flags, args,
7084 				&rootID);
7085 			if (status != B_OK) {
7086 				if (volume->sub_volume)
7087 					goto err4;
7088 				goto err3;
7089 			}
7090 
7091 			volume = volume->super_volume;
7092 		}
7093 
7094 		volume = mount->volume;
7095 		while (volume) {
7096 			if (volume->ops->all_layers_mounted != NULL)
7097 				volume->ops->all_layers_mounted(volume);
7098 			volume = volume->super_volume;
7099 		}
7100 	}
7101 
7102 	// the root node is supposed to be owned by the file system - it must
7103 	// exist at this point
7104 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7105 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7106 		panic("fs_mount: file system does not own its root node!\n");
7107 		status = B_ERROR;
7108 		goto err4;
7109 	}
7110 
7111 	// No race here, since fs_mount() is the only function changing
7112 	// covers_vnode (and holds sMountOpLock at that time).
7113 	rw_lock_write_lock(&sVnodeLock);
7114 	if (mount->covers_vnode)
7115 		mount->covers_vnode->covered_by = mount->root_vnode;
7116 	rw_lock_write_unlock(&sVnodeLock);
7117 
7118 	if (!sRoot) {
7119 		sRoot = mount->root_vnode;
7120 		mutex_lock(&sIOContextRootLock);
7121 		get_current_io_context(true)->root = sRoot;
7122 		mutex_unlock(&sIOContextRootLock);
7123 		inc_vnode_ref_count(sRoot);
7124 	}
7125 
7126 	// supply the partition (if any) with the mount cookie and mark it mounted
7127 	if (partition) {
7128 		partition->SetMountCookie(mount->volume->private_volume);
7129 		partition->SetVolumeID(mount->id);
7130 
7131 		// keep a partition reference as long as the partition is mounted
7132 		partitionRegistrar.Detach();
7133 		mount->partition = partition;
7134 		mount->owns_file_device = newlyCreatedFileDevice;
7135 		fileDeviceDeleter.id = -1;
7136 	}
7137 
7138 	notify_mount(mount->id,
7139 		mount->covers_vnode ? mount->covers_vnode->device : -1,
7140 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7141 
7142 	return mount->id;
7143 
7144 err4:
7145 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7146 err3:
7147 	if (mount->covers_vnode != NULL)
7148 		put_vnode(mount->covers_vnode);
7149 err2:
7150 	mutex_lock(&sMountMutex);
7151 	hash_remove(sMountsTable, mount);
7152 	mutex_unlock(&sMountMutex);
7153 err1:
7154 	delete mount;
7155 
7156 	return status;
7157 }
7158 
7159 
7160 static status_t
7161 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7162 {
7163 	struct fs_mount* mount;
7164 	status_t err;
7165 
7166 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7167 		kernel));
7168 
7169 	struct vnode* pathVnode = NULL;
7170 	if (path != NULL) {
7171 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7172 		if (err != B_OK)
7173 			return B_ENTRY_NOT_FOUND;
7174 	}
7175 
7176 	RecursiveLocker mountOpLocker(sMountOpLock);
7177 
7178 	// this lock is not strictly necessary, but here in case of KDEBUG
7179 	// to keep the ASSERT in find_mount() working.
7180 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7181 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7182 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7183 	if (mount == NULL) {
7184 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7185 			pathVnode);
7186 	}
7187 
7188 	if (path != NULL) {
7189 		put_vnode(pathVnode);
7190 
7191 		if (mount->root_vnode != pathVnode) {
7192 			// not mountpoint
7193 			return B_BAD_VALUE;
7194 		}
7195 	}
7196 
7197 	// if the volume is associated with a partition, lock the device of the
7198 	// partition as long as we are unmounting
7199 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7200 	KPartition* partition = mount->partition;
7201 	KDiskDevice* diskDevice = NULL;
7202 	if (partition != NULL) {
7203 		if (partition->Device() == NULL) {
7204 			dprintf("fs_unmount(): There is no device!\n");
7205 			return B_ERROR;
7206 		}
7207 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7208 		if (!diskDevice) {
7209 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7210 			return B_ERROR;
7211 		}
7212 	}
7213 	DeviceWriteLocker writeLocker(diskDevice, true);
7214 
7215 	// make sure, that the partition is not busy
7216 	if (partition != NULL) {
7217 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7218 			TRACE(("fs_unmount(): Partition is busy.\n"));
7219 			return B_BUSY;
7220 		}
7221 	}
7222 
7223 	// grab the vnode master mutex to keep someone from creating
7224 	// a vnode while we're figuring out if we can continue
7225 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7226 
7227 	bool disconnectedDescriptors = false;
7228 
7229 	while (true) {
7230 		bool busy = false;
7231 
7232 		// cycle through the list of vnodes associated with this mount and
7233 		// make sure all of them are not busy or have refs on them
7234 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7235 		while (struct vnode* vnode = iterator.Next()) {
7236 			// The root vnode ref_count needs to be 1 here (the mount has a
7237 			// reference).
7238 			if (vnode->IsBusy()
7239 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7240 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7241 				// there are still vnodes in use on this mount, so we cannot
7242 				// unmount yet
7243 				busy = true;
7244 				break;
7245 			}
7246 		}
7247 
7248 		if (!busy)
7249 			break;
7250 
7251 		if ((flags & B_FORCE_UNMOUNT) == 0)
7252 			return B_BUSY;
7253 
7254 		if (disconnectedDescriptors) {
7255 			// wait a bit until the last access is finished, and then try again
7256 			vnodesWriteLocker.Unlock();
7257 			snooze(100000);
7258 			// TODO: if there is some kind of bug that prevents the ref counts
7259 			// from getting back to zero, this will fall into an endless loop...
7260 			vnodesWriteLocker.Lock();
7261 			continue;
7262 		}
7263 
7264 		// the file system is still busy - but we're forced to unmount it,
7265 		// so let's disconnect all open file descriptors
7266 
7267 		mount->unmounting = true;
7268 			// prevent new vnodes from being created
7269 
7270 		vnodesWriteLocker.Unlock();
7271 
7272 		disconnect_mount_or_vnode_fds(mount, NULL);
7273 		disconnectedDescriptors = true;
7274 
7275 		vnodesWriteLocker.Lock();
7276 	}
7277 
7278 	// we can safely continue, mark all of the vnodes busy and this mount
7279 	// structure in unmounting state
7280 	mount->unmounting = true;
7281 
7282 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7283 	while (struct vnode* vnode = iterator.Next()) {
7284 		vnode->SetBusy(true);
7285 		vnode_to_be_freed(vnode);
7286 	}
7287 
7288 	// The ref_count of the root node is 1 at this point, see above why this is
7289 	mount->root_vnode->ref_count--;
7290 	vnode_to_be_freed(mount->root_vnode);
7291 
7292 	mount->covers_vnode->covered_by = NULL;
7293 
7294 	vnodesWriteLocker.Unlock();
7295 
7296 	put_vnode(mount->covers_vnode);
7297 
7298 	// Free all vnodes associated with this mount.
7299 	// They will be removed from the mount list by free_vnode(), so
7300 	// we don't have to do this.
7301 	while (struct vnode* vnode = mount->vnodes.Head())
7302 		free_vnode(vnode, false);
7303 
7304 	// remove the mount structure from the hash table
7305 	mutex_lock(&sMountMutex);
7306 	hash_remove(sMountsTable, mount);
7307 	mutex_unlock(&sMountMutex);
7308 
7309 	mountOpLocker.Unlock();
7310 
7311 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7312 	notify_unmount(mount->id);
7313 
7314 	// dereference the partition and mark it unmounted
7315 	if (partition) {
7316 		partition->SetVolumeID(-1);
7317 		partition->SetMountCookie(NULL);
7318 
7319 		if (mount->owns_file_device)
7320 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7321 		partition->Unregister();
7322 	}
7323 
7324 	delete mount;
7325 	return B_OK;
7326 }
7327 
7328 
7329 static status_t
7330 fs_sync(dev_t device)
7331 {
7332 	struct fs_mount* mount;
7333 	status_t status = get_mount(device, &mount);
7334 	if (status != B_OK)
7335 		return status;
7336 
7337 	struct vnode marker;
7338 	memset(&marker, 0, sizeof(marker));
7339 	marker.SetBusy(true);
7340 	marker.SetRemoved(true);
7341 
7342 	// First, synchronize all file caches
7343 
7344 	while (true) {
7345 		WriteLocker locker(sVnodeLock);
7346 			// Note: That's the easy way. Which is probably OK for sync(),
7347 			// since it's a relatively rare call and doesn't need to allow for
7348 			// a lot of concurrency. Using a read lock would be possible, but
7349 			// also more involved, since we had to lock the individual nodes
7350 			// and take care of the locking order, which we might not want to
7351 			// do while holding fs_mount::rlock.
7352 
7353 		// synchronize access to vnode list
7354 		recursive_lock_lock(&mount->rlock);
7355 
7356 		struct vnode* vnode;
7357 		if (!marker.IsRemoved()) {
7358 			vnode = mount->vnodes.GetNext(&marker);
7359 			mount->vnodes.Remove(&marker);
7360 			marker.SetRemoved(true);
7361 		} else
7362 			vnode = mount->vnodes.First();
7363 
7364 		while (vnode != NULL && (vnode->cache == NULL
7365 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7366 			// TODO: we could track writes (and writable mapped vnodes)
7367 			//	and have a simple flag that we could test for here
7368 			vnode = mount->vnodes.GetNext(vnode);
7369 		}
7370 
7371 		if (vnode != NULL) {
7372 			// insert marker vnode again
7373 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7374 			marker.SetRemoved(false);
7375 		}
7376 
7377 		recursive_lock_unlock(&mount->rlock);
7378 
7379 		if (vnode == NULL)
7380 			break;
7381 
7382 		vnode = lookup_vnode(mount->id, vnode->id);
7383 		if (vnode == NULL || vnode->IsBusy())
7384 			continue;
7385 
7386 		if (vnode->ref_count == 0) {
7387 			// this vnode has been unused before
7388 			vnode_used(vnode);
7389 		}
7390 		inc_vnode_ref_count(vnode);
7391 
7392 		locker.Unlock();
7393 
7394 		if (vnode->cache != NULL && !vnode->IsRemoved())
7395 			vnode->cache->WriteModified();
7396 
7397 		put_vnode(vnode);
7398 	}
7399 
7400 	// And then, let the file systems do their synchronizing work
7401 
7402 	if (HAS_FS_MOUNT_CALL(mount, sync))
7403 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7404 
7405 	put_mount(mount);
7406 	return status;
7407 }
7408 
7409 
7410 static status_t
7411 fs_read_info(dev_t device, struct fs_info* info)
7412 {
7413 	struct fs_mount* mount;
7414 	status_t status = get_mount(device, &mount);
7415 	if (status != B_OK)
7416 		return status;
7417 
7418 	memset(info, 0, sizeof(struct fs_info));
7419 
7420 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7421 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7422 
7423 	// fill in info the file system doesn't (have to) know about
7424 	if (status == B_OK) {
7425 		info->dev = mount->id;
7426 		info->root = mount->root_vnode->id;
7427 
7428 		fs_volume* volume = mount->volume;
7429 		while (volume->super_volume != NULL)
7430 			volume = volume->super_volume;
7431 
7432 		strlcpy(info->fsh_name, volume->file_system_name,
7433 			sizeof(info->fsh_name));
7434 		if (mount->device_name != NULL) {
7435 			strlcpy(info->device_name, mount->device_name,
7436 				sizeof(info->device_name));
7437 		}
7438 	}
7439 
7440 	// if the call is not supported by the file system, there are still
7441 	// the parts that we filled out ourselves
7442 
7443 	put_mount(mount);
7444 	return status;
7445 }
7446 
7447 
7448 static status_t
7449 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7450 {
7451 	struct fs_mount* mount;
7452 	status_t status = get_mount(device, &mount);
7453 	if (status != B_OK)
7454 		return status;
7455 
7456 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7457 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7458 	else
7459 		status = EROFS;
7460 
7461 	put_mount(mount);
7462 	return status;
7463 }
7464 
7465 
7466 static dev_t
7467 fs_next_device(int32* _cookie)
7468 {
7469 	struct fs_mount* mount = NULL;
7470 	dev_t device = *_cookie;
7471 
7472 	mutex_lock(&sMountMutex);
7473 
7474 	// Since device IDs are assigned sequentially, this algorithm
7475 	// does work good enough. It makes sure that the device list
7476 	// returned is sorted, and that no device is skipped when an
7477 	// already visited device got unmounted.
7478 
7479 	while (device < sNextMountID) {
7480 		mount = find_mount(device++);
7481 		if (mount != NULL && mount->volume->private_volume != NULL)
7482 			break;
7483 	}
7484 
7485 	*_cookie = device;
7486 
7487 	if (mount != NULL)
7488 		device = mount->id;
7489 	else
7490 		device = B_BAD_VALUE;
7491 
7492 	mutex_unlock(&sMountMutex);
7493 
7494 	return device;
7495 }
7496 
7497 
7498 ssize_t
7499 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7500 	void *buffer, size_t readBytes)
7501 {
7502 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7503 	if (attrFD < 0)
7504 		return attrFD;
7505 
7506 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7507 
7508 	_kern_close(attrFD);
7509 
7510 	return bytesRead;
7511 }
7512 
7513 
7514 static status_t
7515 get_cwd(char* buffer, size_t size, bool kernel)
7516 {
7517 	// Get current working directory from io context
7518 	struct io_context* context = get_current_io_context(kernel);
7519 	status_t status;
7520 
7521 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7522 
7523 	mutex_lock(&context->io_mutex);
7524 
7525 	struct vnode* vnode = context->cwd;
7526 	if (vnode)
7527 		inc_vnode_ref_count(vnode);
7528 
7529 	mutex_unlock(&context->io_mutex);
7530 
7531 	if (vnode) {
7532 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7533 		put_vnode(vnode);
7534 	} else
7535 		status = B_ERROR;
7536 
7537 	return status;
7538 }
7539 
7540 
7541 static status_t
7542 set_cwd(int fd, char* path, bool kernel)
7543 {
7544 	struct io_context* context;
7545 	struct vnode* vnode = NULL;
7546 	struct vnode* oldDirectory;
7547 	status_t status;
7548 
7549 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7550 
7551 	// Get vnode for passed path, and bail if it failed
7552 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7553 	if (status < 0)
7554 		return status;
7555 
7556 	if (!S_ISDIR(vnode->Type())) {
7557 		// nope, can't cwd to here
7558 		status = B_NOT_A_DIRECTORY;
7559 		goto err;
7560 	}
7561 
7562 	// Get current io context and lock
7563 	context = get_current_io_context(kernel);
7564 	mutex_lock(&context->io_mutex);
7565 
7566 	// save the old current working directory first
7567 	oldDirectory = context->cwd;
7568 	context->cwd = vnode;
7569 
7570 	mutex_unlock(&context->io_mutex);
7571 
7572 	if (oldDirectory)
7573 		put_vnode(oldDirectory);
7574 
7575 	return B_NO_ERROR;
7576 
7577 err:
7578 	put_vnode(vnode);
7579 	return status;
7580 }
7581 
7582 
7583 //	#pragma mark - kernel mirrored syscalls
7584 
7585 
7586 dev_t
7587 _kern_mount(const char* path, const char* device, const char* fsName,
7588 	uint32 flags, const char* args, size_t argsLength)
7589 {
7590 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7591 	if (pathBuffer.InitCheck() != B_OK)
7592 		return B_NO_MEMORY;
7593 
7594 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7595 }
7596 
7597 
7598 status_t
7599 _kern_unmount(const char* path, uint32 flags)
7600 {
7601 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7602 	if (pathBuffer.InitCheck() != B_OK)
7603 		return B_NO_MEMORY;
7604 
7605 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7606 }
7607 
7608 
7609 status_t
7610 _kern_read_fs_info(dev_t device, struct fs_info* info)
7611 {
7612 	if (info == NULL)
7613 		return B_BAD_VALUE;
7614 
7615 	return fs_read_info(device, info);
7616 }
7617 
7618 
7619 status_t
7620 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7621 {
7622 	if (info == NULL)
7623 		return B_BAD_VALUE;
7624 
7625 	return fs_write_info(device, info, mask);
7626 }
7627 
7628 
7629 status_t
7630 _kern_sync(void)
7631 {
7632 	// Note: _kern_sync() is also called from _user_sync()
7633 	int32 cookie = 0;
7634 	dev_t device;
7635 	while ((device = next_dev(&cookie)) >= 0) {
7636 		status_t status = fs_sync(device);
7637 		if (status != B_OK && status != B_BAD_VALUE) {
7638 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7639 				strerror(status));
7640 		}
7641 	}
7642 
7643 	return B_OK;
7644 }
7645 
7646 
7647 dev_t
7648 _kern_next_device(int32* _cookie)
7649 {
7650 	return fs_next_device(_cookie);
7651 }
7652 
7653 
7654 status_t
7655 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7656 	size_t infoSize)
7657 {
7658 	if (infoSize != sizeof(fd_info))
7659 		return B_BAD_VALUE;
7660 
7661 	struct io_context* context = NULL;
7662 	struct team* team = NULL;
7663 
7664 	cpu_status state = disable_interrupts();
7665 	GRAB_TEAM_LOCK();
7666 
7667 	bool contextLocked = false;
7668 	team = team_get_team_struct_locked(teamID);
7669 	if (team) {
7670 		// We cannot lock the IO context while holding the team lock, nor can
7671 		// we just drop the team lock, since it might be deleted in the
7672 		// meantime. team_remove_team() acquires the thread lock when removing
7673 		// the team from the team hash table, though. Hence we switch to the
7674 		// thread lock and use mutex_lock_threads_locked().
7675 		context = (io_context*)team->io_context;
7676 
7677 		GRAB_THREAD_LOCK();
7678 		RELEASE_TEAM_LOCK();
7679 		contextLocked = mutex_lock_threads_locked(&context->io_mutex) == B_OK;
7680 		RELEASE_THREAD_LOCK();
7681 	} else
7682 		RELEASE_TEAM_LOCK();
7683 
7684 	restore_interrupts(state);
7685 
7686 	if (!contextLocked) {
7687 		// team doesn't exit or seems to be gone
7688 		return B_BAD_TEAM_ID;
7689 	}
7690 
7691 	// the team cannot be deleted completely while we're owning its
7692 	// io_context mutex, so we can safely play with it now
7693 
7694 	uint32 slot = *_cookie;
7695 
7696 	struct file_descriptor* descriptor;
7697 	while (slot < context->table_size
7698 		&& (descriptor = context->fds[slot]) == NULL) {
7699 		slot++;
7700 	}
7701 
7702 	if (slot >= context->table_size) {
7703 		mutex_unlock(&context->io_mutex);
7704 		return B_ENTRY_NOT_FOUND;
7705 	}
7706 
7707 	info->number = slot;
7708 	info->open_mode = descriptor->open_mode;
7709 
7710 	struct vnode* vnode = fd_vnode(descriptor);
7711 	if (vnode != NULL) {
7712 		info->device = vnode->device;
7713 		info->node = vnode->id;
7714 	} else if (descriptor->u.mount != NULL) {
7715 		info->device = descriptor->u.mount->id;
7716 		info->node = -1;
7717 	}
7718 
7719 	mutex_unlock(&context->io_mutex);
7720 
7721 	*_cookie = slot + 1;
7722 	return B_OK;
7723 }
7724 
7725 
7726 int
7727 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7728 	int perms)
7729 {
7730 	if ((openMode & O_CREAT) != 0) {
7731 		return file_create_entry_ref(device, inode, name, openMode, perms,
7732 			true);
7733 	}
7734 
7735 	return file_open_entry_ref(device, inode, name, openMode, true);
7736 }
7737 
7738 
7739 /*!	\brief Opens a node specified by a FD + path pair.
7740 
7741 	At least one of \a fd and \a path must be specified.
7742 	If only \a fd is given, the function opens the node identified by this
7743 	FD. If only a path is given, this path is opened. If both are given and
7744 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7745 	of the directory (!) identified by \a fd.
7746 
7747 	\param fd The FD. May be < 0.
7748 	\param path The absolute or relative path. May be \c NULL.
7749 	\param openMode The open mode.
7750 	\return A FD referring to the newly opened node, or an error code,
7751 			if an error occurs.
7752 */
7753 int
7754 _kern_open(int fd, const char* path, int openMode, int perms)
7755 {
7756 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7757 	if (pathBuffer.InitCheck() != B_OK)
7758 		return B_NO_MEMORY;
7759 
7760 	if (openMode & O_CREAT)
7761 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7762 
7763 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7764 }
7765 
7766 
7767 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7768 
7769 	The supplied name may be \c NULL, in which case directory identified
7770 	by \a device and \a inode will be opened. Otherwise \a device and
7771 	\a inode identify the parent directory of the directory to be opened
7772 	and \a name its entry name.
7773 
7774 	\param device If \a name is specified the ID of the device the parent
7775 		   directory of the directory to be opened resides on, otherwise
7776 		   the device of the directory itself.
7777 	\param inode If \a name is specified the node ID of the parent
7778 		   directory of the directory to be opened, otherwise node ID of the
7779 		   directory itself.
7780 	\param name The entry name of the directory to be opened. If \c NULL,
7781 		   the \a device + \a inode pair identify the node to be opened.
7782 	\return The FD of the newly opened directory or an error code, if
7783 			something went wrong.
7784 */
7785 int
7786 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7787 {
7788 	return dir_open_entry_ref(device, inode, name, true);
7789 }
7790 
7791 
7792 /*!	\brief Opens a directory specified by a FD + path pair.
7793 
7794 	At least one of \a fd and \a path must be specified.
7795 	If only \a fd is given, the function opens the directory identified by this
7796 	FD. If only a path is given, this path is opened. If both are given and
7797 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7798 	of the directory (!) identified by \a fd.
7799 
7800 	\param fd The FD. May be < 0.
7801 	\param path The absolute or relative path. May be \c NULL.
7802 	\return A FD referring to the newly opened directory, or an error code,
7803 			if an error occurs.
7804 */
7805 int
7806 _kern_open_dir(int fd, const char* path)
7807 {
7808 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7809 	if (pathBuffer.InitCheck() != B_OK)
7810 		return B_NO_MEMORY;
7811 
7812 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7813 }
7814 
7815 
7816 status_t
7817 _kern_fcntl(int fd, int op, uint32 argument)
7818 {
7819 	return common_fcntl(fd, op, argument, true);
7820 }
7821 
7822 
7823 status_t
7824 _kern_fsync(int fd)
7825 {
7826 	return common_sync(fd, true);
7827 }
7828 
7829 
7830 status_t
7831 _kern_lock_node(int fd)
7832 {
7833 	return common_lock_node(fd, true);
7834 }
7835 
7836 
7837 status_t
7838 _kern_unlock_node(int fd)
7839 {
7840 	return common_unlock_node(fd, true);
7841 }
7842 
7843 
7844 status_t
7845 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
7846 	int perms)
7847 {
7848 	return dir_create_entry_ref(device, inode, name, perms, true);
7849 }
7850 
7851 
7852 /*!	\brief Creates a directory specified by a FD + path pair.
7853 
7854 	\a path must always be specified (it contains the name of the new directory
7855 	at least). If only a path is given, this path identifies the location at
7856 	which the directory shall be created. If both \a fd and \a path are given
7857 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
7858 	of the directory (!) identified by \a fd.
7859 
7860 	\param fd The FD. May be < 0.
7861 	\param path The absolute or relative path. Must not be \c NULL.
7862 	\param perms The access permissions the new directory shall have.
7863 	\return \c B_OK, if the directory has been created successfully, another
7864 			error code otherwise.
7865 */
7866 status_t
7867 _kern_create_dir(int fd, const char* path, int perms)
7868 {
7869 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7870 	if (pathBuffer.InitCheck() != B_OK)
7871 		return B_NO_MEMORY;
7872 
7873 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
7874 }
7875 
7876 
7877 status_t
7878 _kern_remove_dir(int fd, const char* path)
7879 {
7880 	if (path) {
7881 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7882 		if (pathBuffer.InitCheck() != B_OK)
7883 			return B_NO_MEMORY;
7884 
7885 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
7886 	}
7887 
7888 	return dir_remove(fd, NULL, true);
7889 }
7890 
7891 
7892 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
7893 
7894 	At least one of \a fd and \a path must be specified.
7895 	If only \a fd is given, the function the symlink to be read is the node
7896 	identified by this FD. If only a path is given, this path identifies the
7897 	symlink to be read. If both are given and the path is absolute, \a fd is
7898 	ignored; a relative path is reckoned off of the directory (!) identified
7899 	by \a fd.
7900 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
7901 	will still be updated to reflect the required buffer size.
7902 
7903 	\param fd The FD. May be < 0.
7904 	\param path The absolute or relative path. May be \c NULL.
7905 	\param buffer The buffer into which the contents of the symlink shall be
7906 		   written.
7907 	\param _bufferSize A pointer to the size of the supplied buffer.
7908 	\return The length of the link on success or an appropriate error code
7909 */
7910 status_t
7911 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
7912 {
7913 	if (path) {
7914 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7915 		if (pathBuffer.InitCheck() != B_OK)
7916 			return B_NO_MEMORY;
7917 
7918 		return common_read_link(fd, pathBuffer.LockBuffer(),
7919 			buffer, _bufferSize, true);
7920 	}
7921 
7922 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
7923 }
7924 
7925 
7926 /*!	\brief Creates a symlink specified by a FD + path pair.
7927 
7928 	\a path must always be specified (it contains the name of the new symlink
7929 	at least). If only a path is given, this path identifies the location at
7930 	which the symlink shall be created. If both \a fd and \a path are given and
7931 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7932 	of the directory (!) identified by \a fd.
7933 
7934 	\param fd The FD. May be < 0.
7935 	\param toPath The absolute or relative path. Must not be \c NULL.
7936 	\param mode The access permissions the new symlink shall have.
7937 	\return \c B_OK, if the symlink has been created successfully, another
7938 			error code otherwise.
7939 */
7940 status_t
7941 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
7942 {
7943 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7944 	if (pathBuffer.InitCheck() != B_OK)
7945 		return B_NO_MEMORY;
7946 
7947 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
7948 		toPath, mode, true);
7949 }
7950 
7951 
7952 status_t
7953 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
7954 	bool traverseLeafLink)
7955 {
7956 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7957 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
7958 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7959 		return B_NO_MEMORY;
7960 
7961 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
7962 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
7963 }
7964 
7965 
7966 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
7967 
7968 	\a path must always be specified (it contains at least the name of the entry
7969 	to be deleted). If only a path is given, this path identifies the entry
7970 	directly. If both \a fd and \a path are given and the path is absolute,
7971 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7972 	identified by \a fd.
7973 
7974 	\param fd The FD. May be < 0.
7975 	\param path The absolute or relative path. Must not be \c NULL.
7976 	\return \c B_OK, if the entry has been removed successfully, another
7977 			error code otherwise.
7978 */
7979 status_t
7980 _kern_unlink(int fd, const char* path)
7981 {
7982 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7983 	if (pathBuffer.InitCheck() != B_OK)
7984 		return B_NO_MEMORY;
7985 
7986 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
7987 }
7988 
7989 
7990 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
7991 		   by another FD + path pair.
7992 
7993 	\a oldPath and \a newPath must always be specified (they contain at least
7994 	the name of the entry). If only a path is given, this path identifies the
7995 	entry directly. If both a FD and a path are given and the path is absolute,
7996 	the FD is ignored; a relative path is reckoned off of the directory (!)
7997 	identified by the respective FD.
7998 
7999 	\param oldFD The FD of the old location. May be < 0.
8000 	\param oldPath The absolute or relative path of the old location. Must not
8001 		   be \c NULL.
8002 	\param newFD The FD of the new location. May be < 0.
8003 	\param newPath The absolute or relative path of the new location. Must not
8004 		   be \c NULL.
8005 	\return \c B_OK, if the entry has been moved successfully, another
8006 			error code otherwise.
8007 */
8008 status_t
8009 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8010 {
8011 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8012 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8013 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8014 		return B_NO_MEMORY;
8015 
8016 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8017 		newFD, newPathBuffer.LockBuffer(), true);
8018 }
8019 
8020 
8021 status_t
8022 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8023 {
8024 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8025 	if (pathBuffer.InitCheck() != B_OK)
8026 		return B_NO_MEMORY;
8027 
8028 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8029 		true);
8030 }
8031 
8032 
8033 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8034 
8035 	If only \a fd is given, the stat operation associated with the type
8036 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8037 	given, this path identifies the entry for whose node to retrieve the
8038 	stat data. If both \a fd and \a path are given and the path is absolute,
8039 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8040 	identified by \a fd and specifies the entry whose stat data shall be
8041 	retrieved.
8042 
8043 	\param fd The FD. May be < 0.
8044 	\param path The absolute or relative path. Must not be \c NULL.
8045 	\param traverseLeafLink If \a path is given, \c true specifies that the
8046 		   function shall not stick to symlinks, but traverse them.
8047 	\param stat The buffer the stat data shall be written into.
8048 	\param statSize The size of the supplied stat buffer.
8049 	\return \c B_OK, if the the stat data have been read successfully, another
8050 			error code otherwise.
8051 */
8052 status_t
8053 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8054 	struct stat* stat, size_t statSize)
8055 {
8056 	struct stat completeStat;
8057 	struct stat* originalStat = NULL;
8058 	status_t status;
8059 
8060 	if (statSize > sizeof(struct stat))
8061 		return B_BAD_VALUE;
8062 
8063 	// this supports different stat extensions
8064 	if (statSize < sizeof(struct stat)) {
8065 		originalStat = stat;
8066 		stat = &completeStat;
8067 	}
8068 
8069 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8070 
8071 	if (status == B_OK && originalStat != NULL)
8072 		memcpy(originalStat, stat, statSize);
8073 
8074 	return status;
8075 }
8076 
8077 
8078 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8079 
8080 	If only \a fd is given, the stat operation associated with the type
8081 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8082 	given, this path identifies the entry for whose node to write the
8083 	stat data. If both \a fd and \a path are given and the path is absolute,
8084 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8085 	identified by \a fd and specifies the entry whose stat data shall be
8086 	written.
8087 
8088 	\param fd The FD. May be < 0.
8089 	\param path The absolute or relative path. Must not be \c NULL.
8090 	\param traverseLeafLink If \a path is given, \c true specifies that the
8091 		   function shall not stick to symlinks, but traverse them.
8092 	\param stat The buffer containing the stat data to be written.
8093 	\param statSize The size of the supplied stat buffer.
8094 	\param statMask A mask specifying which parts of the stat data shall be
8095 		   written.
8096 	\return \c B_OK, if the the stat data have been written successfully,
8097 			another error code otherwise.
8098 */
8099 status_t
8100 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8101 	const struct stat* stat, size_t statSize, int statMask)
8102 {
8103 	struct stat completeStat;
8104 
8105 	if (statSize > sizeof(struct stat))
8106 		return B_BAD_VALUE;
8107 
8108 	// this supports different stat extensions
8109 	if (statSize < sizeof(struct stat)) {
8110 		memset((uint8*)&completeStat + statSize, 0,
8111 			sizeof(struct stat) - statSize);
8112 		memcpy(&completeStat, stat, statSize);
8113 		stat = &completeStat;
8114 	}
8115 
8116 	status_t status;
8117 
8118 	if (path) {
8119 		// path given: write the stat of the node referred to by (fd, path)
8120 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8121 		if (pathBuffer.InitCheck() != B_OK)
8122 			return B_NO_MEMORY;
8123 
8124 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8125 			traverseLeafLink, stat, statMask, true);
8126 	} else {
8127 		// no path given: get the FD and use the FD operation
8128 		struct file_descriptor* descriptor
8129 			= get_fd(get_current_io_context(true), fd);
8130 		if (descriptor == NULL)
8131 			return B_FILE_ERROR;
8132 
8133 		if (descriptor->ops->fd_write_stat)
8134 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8135 		else
8136 			status = EOPNOTSUPP;
8137 
8138 		put_fd(descriptor);
8139 	}
8140 
8141 	return status;
8142 }
8143 
8144 
8145 int
8146 _kern_open_attr_dir(int fd, const char* path)
8147 {
8148 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8149 	if (pathBuffer.InitCheck() != B_OK)
8150 		return B_NO_MEMORY;
8151 
8152 	if (path != NULL)
8153 		pathBuffer.SetTo(path);
8154 
8155 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
8156 }
8157 
8158 
8159 int
8160 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8161 	int openMode)
8162 {
8163 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8164 	if (pathBuffer.InitCheck() != B_OK)
8165 		return B_NO_MEMORY;
8166 
8167 	if ((openMode & O_CREAT) != 0) {
8168 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8169 			true);
8170 	}
8171 
8172 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8173 }
8174 
8175 
8176 status_t
8177 _kern_remove_attr(int fd, const char* name)
8178 {
8179 	return attr_remove(fd, name, true);
8180 }
8181 
8182 
8183 status_t
8184 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8185 	const char* toName)
8186 {
8187 	return attr_rename(fromFile, fromName, toFile, toName, true);
8188 }
8189 
8190 
8191 int
8192 _kern_open_index_dir(dev_t device)
8193 {
8194 	return index_dir_open(device, true);
8195 }
8196 
8197 
8198 status_t
8199 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8200 {
8201 	return index_create(device, name, type, flags, true);
8202 }
8203 
8204 
8205 status_t
8206 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8207 {
8208 	return index_name_read_stat(device, name, stat, true);
8209 }
8210 
8211 
8212 status_t
8213 _kern_remove_index(dev_t device, const char* name)
8214 {
8215 	return index_remove(device, name, true);
8216 }
8217 
8218 
8219 status_t
8220 _kern_getcwd(char* buffer, size_t size)
8221 {
8222 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8223 
8224 	// Call vfs to get current working directory
8225 	return get_cwd(buffer, size, true);
8226 }
8227 
8228 
8229 status_t
8230 _kern_setcwd(int fd, const char* path)
8231 {
8232 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8233 	if (pathBuffer.InitCheck() != B_OK)
8234 		return B_NO_MEMORY;
8235 
8236 	if (path != NULL)
8237 		pathBuffer.SetTo(path);
8238 
8239 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8240 }
8241 
8242 
8243 //	#pragma mark - userland syscalls
8244 
8245 
8246 dev_t
8247 _user_mount(const char* userPath, const char* userDevice,
8248 	const char* userFileSystem, uint32 flags, const char* userArgs,
8249 	size_t argsLength)
8250 {
8251 	char fileSystem[B_FILE_NAME_LENGTH];
8252 	KPath path, device;
8253 	char* args = NULL;
8254 	status_t status;
8255 
8256 	if (!IS_USER_ADDRESS(userPath)
8257 		|| !IS_USER_ADDRESS(userFileSystem)
8258 		|| !IS_USER_ADDRESS(userDevice))
8259 		return B_BAD_ADDRESS;
8260 
8261 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8262 		return B_NO_MEMORY;
8263 
8264 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8265 		return B_BAD_ADDRESS;
8266 
8267 	if (userFileSystem != NULL
8268 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8269 		return B_BAD_ADDRESS;
8270 
8271 	if (userDevice != NULL
8272 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8273 			< B_OK)
8274 		return B_BAD_ADDRESS;
8275 
8276 	if (userArgs != NULL && argsLength > 0) {
8277 		// this is a safety restriction
8278 		if (argsLength >= 65536)
8279 			return B_NAME_TOO_LONG;
8280 
8281 		args = (char*)malloc(argsLength + 1);
8282 		if (args == NULL)
8283 			return B_NO_MEMORY;
8284 
8285 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8286 			free(args);
8287 			return B_BAD_ADDRESS;
8288 		}
8289 	}
8290 	path.UnlockBuffer();
8291 	device.UnlockBuffer();
8292 
8293 	status = fs_mount(path.LockBuffer(),
8294 		userDevice != NULL ? device.Path() : NULL,
8295 		userFileSystem ? fileSystem : NULL, flags, args, false);
8296 
8297 	free(args);
8298 	return status;
8299 }
8300 
8301 
8302 status_t
8303 _user_unmount(const char* userPath, uint32 flags)
8304 {
8305 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8306 	if (pathBuffer.InitCheck() != B_OK)
8307 		return B_NO_MEMORY;
8308 
8309 	char* path = pathBuffer.LockBuffer();
8310 
8311 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8312 		return B_BAD_ADDRESS;
8313 
8314 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8315 }
8316 
8317 
8318 status_t
8319 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8320 {
8321 	struct fs_info info;
8322 	status_t status;
8323 
8324 	if (userInfo == NULL)
8325 		return B_BAD_VALUE;
8326 
8327 	if (!IS_USER_ADDRESS(userInfo))
8328 		return B_BAD_ADDRESS;
8329 
8330 	status = fs_read_info(device, &info);
8331 	if (status != B_OK)
8332 		return status;
8333 
8334 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8335 		return B_BAD_ADDRESS;
8336 
8337 	return B_OK;
8338 }
8339 
8340 
8341 status_t
8342 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8343 {
8344 	struct fs_info info;
8345 
8346 	if (userInfo == NULL)
8347 		return B_BAD_VALUE;
8348 
8349 	if (!IS_USER_ADDRESS(userInfo)
8350 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8351 		return B_BAD_ADDRESS;
8352 
8353 	return fs_write_info(device, &info, mask);
8354 }
8355 
8356 
8357 dev_t
8358 _user_next_device(int32* _userCookie)
8359 {
8360 	int32 cookie;
8361 	dev_t device;
8362 
8363 	if (!IS_USER_ADDRESS(_userCookie)
8364 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8365 		return B_BAD_ADDRESS;
8366 
8367 	device = fs_next_device(&cookie);
8368 
8369 	if (device >= B_OK) {
8370 		// update user cookie
8371 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8372 			return B_BAD_ADDRESS;
8373 	}
8374 
8375 	return device;
8376 }
8377 
8378 
8379 status_t
8380 _user_sync(void)
8381 {
8382 	return _kern_sync();
8383 }
8384 
8385 
8386 status_t
8387 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8388 	size_t infoSize)
8389 {
8390 	struct fd_info info;
8391 	uint32 cookie;
8392 
8393 	// only root can do this (or should root's group be enough?)
8394 	if (geteuid() != 0)
8395 		return B_NOT_ALLOWED;
8396 
8397 	if (infoSize != sizeof(fd_info))
8398 		return B_BAD_VALUE;
8399 
8400 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8401 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8402 		return B_BAD_ADDRESS;
8403 
8404 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8405 	if (status != B_OK)
8406 		return status;
8407 
8408 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8409 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8410 		return B_BAD_ADDRESS;
8411 
8412 	return status;
8413 }
8414 
8415 
8416 status_t
8417 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8418 	char* userPath, size_t pathLength)
8419 {
8420 	if (!IS_USER_ADDRESS(userPath))
8421 		return B_BAD_ADDRESS;
8422 
8423 	KPath path(B_PATH_NAME_LENGTH + 1);
8424 	if (path.InitCheck() != B_OK)
8425 		return B_NO_MEMORY;
8426 
8427 	// copy the leaf name onto the stack
8428 	char stackLeaf[B_FILE_NAME_LENGTH];
8429 	if (leaf) {
8430 		if (!IS_USER_ADDRESS(leaf))
8431 			return B_BAD_ADDRESS;
8432 
8433 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8434 		if (length < 0)
8435 			return length;
8436 		if (length >= B_FILE_NAME_LENGTH)
8437 			return B_NAME_TOO_LONG;
8438 
8439 		leaf = stackLeaf;
8440 	}
8441 
8442 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8443 		path.LockBuffer(), path.BufferSize());
8444 	if (status != B_OK)
8445 		return status;
8446 
8447 	path.UnlockBuffer();
8448 
8449 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8450 	if (length < 0)
8451 		return length;
8452 	if (length >= (int)pathLength)
8453 		return B_BUFFER_OVERFLOW;
8454 
8455 	return B_OK;
8456 }
8457 
8458 
8459 status_t
8460 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8461 {
8462 	if (userPath == NULL || buffer == NULL)
8463 		return B_BAD_VALUE;
8464 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8465 		return B_BAD_ADDRESS;
8466 
8467 	// copy path from userland
8468 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8469 	if (pathBuffer.InitCheck() != B_OK)
8470 		return B_NO_MEMORY;
8471 	char* path = pathBuffer.LockBuffer();
8472 
8473 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8474 		return B_BAD_ADDRESS;
8475 
8476 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8477 		false);
8478 	if (error != B_OK)
8479 		return error;
8480 
8481 	// copy back to userland
8482 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8483 	if (len < 0)
8484 		return len;
8485 	if (len >= B_PATH_NAME_LENGTH)
8486 		return B_BUFFER_OVERFLOW;
8487 
8488 	return B_OK;
8489 }
8490 
8491 
8492 int
8493 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8494 	int openMode, int perms)
8495 {
8496 	char name[B_FILE_NAME_LENGTH];
8497 
8498 	if (userName == NULL || device < 0 || inode < 0)
8499 		return B_BAD_VALUE;
8500 	if (!IS_USER_ADDRESS(userName)
8501 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8502 		return B_BAD_ADDRESS;
8503 
8504 	if ((openMode & O_CREAT) != 0) {
8505 		return file_create_entry_ref(device, inode, name, openMode, perms,
8506 		 false);
8507 	}
8508 
8509 	return file_open_entry_ref(device, inode, name, openMode, false);
8510 }
8511 
8512 
8513 int
8514 _user_open(int fd, const char* userPath, int openMode, int perms)
8515 {
8516 	KPath path(B_PATH_NAME_LENGTH + 1);
8517 	if (path.InitCheck() != B_OK)
8518 		return B_NO_MEMORY;
8519 
8520 	char* buffer = path.LockBuffer();
8521 
8522 	if (!IS_USER_ADDRESS(userPath)
8523 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8524 		return B_BAD_ADDRESS;
8525 
8526 	if ((openMode & O_CREAT) != 0)
8527 		return file_create(fd, buffer, openMode, perms, false);
8528 
8529 	return file_open(fd, buffer, openMode, false);
8530 }
8531 
8532 
8533 int
8534 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8535 {
8536 	if (userName != NULL) {
8537 		char name[B_FILE_NAME_LENGTH];
8538 
8539 		if (!IS_USER_ADDRESS(userName)
8540 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8541 			return B_BAD_ADDRESS;
8542 
8543 		return dir_open_entry_ref(device, inode, name, false);
8544 	}
8545 	return dir_open_entry_ref(device, inode, NULL, false);
8546 }
8547 
8548 
8549 int
8550 _user_open_dir(int fd, const char* userPath)
8551 {
8552 	if (userPath == NULL)
8553 		return dir_open(fd, NULL, false);
8554 
8555 	KPath path(B_PATH_NAME_LENGTH + 1);
8556 	if (path.InitCheck() != B_OK)
8557 		return B_NO_MEMORY;
8558 
8559 	char* buffer = path.LockBuffer();
8560 
8561 	if (!IS_USER_ADDRESS(userPath)
8562 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8563 		return B_BAD_ADDRESS;
8564 
8565 	return dir_open(fd, buffer, false);
8566 }
8567 
8568 
8569 /*!	\brief Opens a directory's parent directory and returns the entry name
8570 		   of the former.
8571 
8572 	Aside from that is returns the directory's entry name, this method is
8573 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8574 	equivalent, if \a userName is \c NULL.
8575 
8576 	If a name buffer is supplied and the name does not fit the buffer, the
8577 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8578 
8579 	\param fd A FD referring to a directory.
8580 	\param userName Buffer the directory's entry name shall be written into.
8581 		   May be \c NULL.
8582 	\param nameLength Size of the name buffer.
8583 	\return The file descriptor of the opened parent directory, if everything
8584 			went fine, an error code otherwise.
8585 */
8586 int
8587 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8588 {
8589 	bool kernel = false;
8590 
8591 	if (userName && !IS_USER_ADDRESS(userName))
8592 		return B_BAD_ADDRESS;
8593 
8594 	// open the parent dir
8595 	int parentFD = dir_open(fd, (char*)"..", kernel);
8596 	if (parentFD < 0)
8597 		return parentFD;
8598 	FDCloser fdCloser(parentFD, kernel);
8599 
8600 	if (userName) {
8601 		// get the vnodes
8602 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8603 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8604 		VNodePutter parentVNodePutter(parentVNode);
8605 		VNodePutter dirVNodePutter(dirVNode);
8606 		if (!parentVNode || !dirVNode)
8607 			return B_FILE_ERROR;
8608 
8609 		// get the vnode name
8610 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8611 		struct dirent* buffer = (struct dirent*)_buffer;
8612 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8613 			sizeof(_buffer), get_current_io_context(false));
8614 		if (status != B_OK)
8615 			return status;
8616 
8617 		// copy the name to the userland buffer
8618 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8619 		if (len < 0)
8620 			return len;
8621 		if (len >= (int)nameLength)
8622 			return B_BUFFER_OVERFLOW;
8623 	}
8624 
8625 	return fdCloser.Detach();
8626 }
8627 
8628 
8629 status_t
8630 _user_fcntl(int fd, int op, uint32 argument)
8631 {
8632 	status_t status = common_fcntl(fd, op, argument, false);
8633 	if (op == F_SETLKW)
8634 		syscall_restart_handle_post(status);
8635 
8636 	return status;
8637 }
8638 
8639 
8640 status_t
8641 _user_fsync(int fd)
8642 {
8643 	return common_sync(fd, false);
8644 }
8645 
8646 
8647 status_t
8648 _user_flock(int fd, int operation)
8649 {
8650 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8651 
8652 	// Check if the operation is valid
8653 	switch (operation & ~LOCK_NB) {
8654 		case LOCK_UN:
8655 		case LOCK_SH:
8656 		case LOCK_EX:
8657 			break;
8658 
8659 		default:
8660 			return B_BAD_VALUE;
8661 	}
8662 
8663 	struct file_descriptor* descriptor;
8664 	struct vnode* vnode;
8665 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8666 	if (descriptor == NULL)
8667 		return B_FILE_ERROR;
8668 
8669 	if (descriptor->type != FDTYPE_FILE) {
8670 		put_fd(descriptor);
8671 		return B_BAD_VALUE;
8672 	}
8673 
8674 	struct flock flock;
8675 	flock.l_start = 0;
8676 	flock.l_len = OFF_MAX;
8677 	flock.l_whence = 0;
8678 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8679 
8680 	status_t status;
8681 	if ((operation & LOCK_UN) != 0)
8682 		status = release_advisory_lock(vnode, &flock);
8683 	else {
8684 		status = acquire_advisory_lock(vnode,
8685 			thread_get_current_thread()->team->session_id, &flock,
8686 			(operation & LOCK_NB) == 0);
8687 	}
8688 
8689 	syscall_restart_handle_post(status);
8690 
8691 	put_fd(descriptor);
8692 	return status;
8693 }
8694 
8695 
8696 status_t
8697 _user_lock_node(int fd)
8698 {
8699 	return common_lock_node(fd, false);
8700 }
8701 
8702 
8703 status_t
8704 _user_unlock_node(int fd)
8705 {
8706 	return common_unlock_node(fd, false);
8707 }
8708 
8709 
8710 status_t
8711 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8712 	int perms)
8713 {
8714 	char name[B_FILE_NAME_LENGTH];
8715 	status_t status;
8716 
8717 	if (!IS_USER_ADDRESS(userName))
8718 		return B_BAD_ADDRESS;
8719 
8720 	status = user_strlcpy(name, userName, sizeof(name));
8721 	if (status < 0)
8722 		return status;
8723 
8724 	return dir_create_entry_ref(device, inode, name, perms, false);
8725 }
8726 
8727 
8728 status_t
8729 _user_create_dir(int fd, const char* userPath, int perms)
8730 {
8731 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8732 	if (pathBuffer.InitCheck() != B_OK)
8733 		return B_NO_MEMORY;
8734 
8735 	char* path = pathBuffer.LockBuffer();
8736 
8737 	if (!IS_USER_ADDRESS(userPath)
8738 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8739 		return B_BAD_ADDRESS;
8740 
8741 	return dir_create(fd, path, perms, false);
8742 }
8743 
8744 
8745 status_t
8746 _user_remove_dir(int fd, const char* userPath)
8747 {
8748 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8749 	if (pathBuffer.InitCheck() != B_OK)
8750 		return B_NO_MEMORY;
8751 
8752 	char* path = pathBuffer.LockBuffer();
8753 
8754 	if (userPath != NULL) {
8755 		if (!IS_USER_ADDRESS(userPath)
8756 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8757 			return B_BAD_ADDRESS;
8758 	}
8759 
8760 	return dir_remove(fd, userPath ? path : NULL, false);
8761 }
8762 
8763 
8764 status_t
8765 _user_read_link(int fd, const char* userPath, char* userBuffer,
8766 	size_t* userBufferSize)
8767 {
8768 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8769 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8770 		return B_NO_MEMORY;
8771 
8772 	size_t bufferSize;
8773 
8774 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8775 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
8776 		return B_BAD_ADDRESS;
8777 
8778 	char* path = pathBuffer.LockBuffer();
8779 	char* buffer = linkBuffer.LockBuffer();
8780 
8781 	if (userPath) {
8782 		if (!IS_USER_ADDRESS(userPath)
8783 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8784 			return B_BAD_ADDRESS;
8785 
8786 		if (bufferSize > B_PATH_NAME_LENGTH)
8787 			bufferSize = B_PATH_NAME_LENGTH;
8788 	}
8789 
8790 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8791 		&bufferSize, false);
8792 
8793 	// we also update the bufferSize in case of errors
8794 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8795 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
8796 		return B_BAD_ADDRESS;
8797 
8798 	if (status != B_OK)
8799 		return status;
8800 
8801 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8802 		return B_BAD_ADDRESS;
8803 
8804 	return B_OK;
8805 }
8806 
8807 
8808 status_t
8809 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8810 	int mode)
8811 {
8812 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8813 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8814 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8815 		return B_NO_MEMORY;
8816 
8817 	char* path = pathBuffer.LockBuffer();
8818 	char* toPath = toPathBuffer.LockBuffer();
8819 
8820 	if (!IS_USER_ADDRESS(userPath)
8821 		|| !IS_USER_ADDRESS(userToPath)
8822 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8823 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8824 		return B_BAD_ADDRESS;
8825 
8826 	return common_create_symlink(fd, path, toPath, mode, false);
8827 }
8828 
8829 
8830 status_t
8831 _user_create_link(int pathFD, const char* userPath, int toFD,
8832 	const char* userToPath, bool traverseLeafLink)
8833 {
8834 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8835 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8836 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8837 		return B_NO_MEMORY;
8838 
8839 	char* path = pathBuffer.LockBuffer();
8840 	char* toPath = toPathBuffer.LockBuffer();
8841 
8842 	if (!IS_USER_ADDRESS(userPath)
8843 		|| !IS_USER_ADDRESS(userToPath)
8844 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8845 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8846 		return B_BAD_ADDRESS;
8847 
8848 	status_t status = check_path(toPath);
8849 	if (status != B_OK)
8850 		return status;
8851 
8852 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
8853 		false);
8854 }
8855 
8856 
8857 status_t
8858 _user_unlink(int fd, const char* userPath)
8859 {
8860 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8861 	if (pathBuffer.InitCheck() != B_OK)
8862 		return B_NO_MEMORY;
8863 
8864 	char* path = pathBuffer.LockBuffer();
8865 
8866 	if (!IS_USER_ADDRESS(userPath)
8867 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8868 		return B_BAD_ADDRESS;
8869 
8870 	return common_unlink(fd, path, false);
8871 }
8872 
8873 
8874 status_t
8875 _user_rename(int oldFD, const char* userOldPath, int newFD,
8876 	const char* userNewPath)
8877 {
8878 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
8879 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
8880 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8881 		return B_NO_MEMORY;
8882 
8883 	char* oldPath = oldPathBuffer.LockBuffer();
8884 	char* newPath = newPathBuffer.LockBuffer();
8885 
8886 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
8887 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
8888 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
8889 		return B_BAD_ADDRESS;
8890 
8891 	return common_rename(oldFD, oldPath, newFD, newPath, false);
8892 }
8893 
8894 
8895 status_t
8896 _user_create_fifo(int fd, const char* userPath, mode_t perms)
8897 {
8898 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8899 	if (pathBuffer.InitCheck() != B_OK)
8900 		return B_NO_MEMORY;
8901 
8902 	char* path = pathBuffer.LockBuffer();
8903 
8904 	if (!IS_USER_ADDRESS(userPath)
8905 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
8906 		return B_BAD_ADDRESS;
8907 	}
8908 
8909 	// split into directory vnode and filename path
8910 	char filename[B_FILE_NAME_LENGTH];
8911 	struct vnode* dir;
8912 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
8913 	if (status != B_OK)
8914 		return status;
8915 
8916 	VNodePutter _(dir);
8917 
8918 	// the underlying FS needs to support creating FIFOs
8919 	if (!HAS_FS_CALL(dir, create_special_node))
8920 		return B_UNSUPPORTED;
8921 
8922 	// create the entry	-- the FIFO sub node is set up automatically
8923 	fs_vnode superVnode;
8924 	ino_t nodeID;
8925 	status = FS_CALL(dir, create_special_node, filename, NULL,
8926 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
8927 
8928 	// create_special_node() acquired a reference for us that we don't need.
8929 	if (status == B_OK)
8930 		put_vnode(dir->mount->volume, nodeID);
8931 
8932 	return status;
8933 }
8934 
8935 
8936 status_t
8937 _user_create_pipe(int* userFDs)
8938 {
8939 	// rootfs should support creating FIFOs, but let's be sure
8940 	if (!HAS_FS_CALL(sRoot, create_special_node))
8941 		return B_UNSUPPORTED;
8942 
8943 	// create the node	-- the FIFO sub node is set up automatically
8944 	fs_vnode superVnode;
8945 	ino_t nodeID;
8946 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
8947 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
8948 	if (status != B_OK)
8949 		return status;
8950 
8951 	// We've got one reference to the node and need another one.
8952 	struct vnode* vnode;
8953 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
8954 	if (status != B_OK) {
8955 		// that should not happen
8956 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
8957 			sRoot->mount->id, sRoot->id);
8958 		return status;
8959 	}
8960 
8961 	// Everything looks good so far. Open two FDs for reading respectively
8962 	// writing.
8963 	int fds[2];
8964 	fds[0] = open_vnode(vnode, O_RDONLY, false);
8965 	fds[1] = open_vnode(vnode, O_WRONLY, false);
8966 
8967 	FDCloser closer0(fds[0], false);
8968 	FDCloser closer1(fds[1], false);
8969 
8970 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
8971 
8972 	// copy FDs to userland
8973 	if (status == B_OK) {
8974 		if (!IS_USER_ADDRESS(userFDs)
8975 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
8976 			status = B_BAD_ADDRESS;
8977 		}
8978 	}
8979 
8980 	// keep FDs, if everything went fine
8981 	if (status == B_OK) {
8982 		closer0.Detach();
8983 		closer1.Detach();
8984 	}
8985 
8986 	return status;
8987 }
8988 
8989 
8990 status_t
8991 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
8992 {
8993 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8994 	if (pathBuffer.InitCheck() != B_OK)
8995 		return B_NO_MEMORY;
8996 
8997 	char* path = pathBuffer.LockBuffer();
8998 
8999 	if (!IS_USER_ADDRESS(userPath)
9000 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9001 		return B_BAD_ADDRESS;
9002 
9003 	return common_access(fd, path, mode, effectiveUserGroup, false);
9004 }
9005 
9006 
9007 status_t
9008 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9009 	struct stat* userStat, size_t statSize)
9010 {
9011 	struct stat stat;
9012 	status_t status;
9013 
9014 	if (statSize > sizeof(struct stat))
9015 		return B_BAD_VALUE;
9016 
9017 	if (!IS_USER_ADDRESS(userStat))
9018 		return B_BAD_ADDRESS;
9019 
9020 	if (userPath) {
9021 		// path given: get the stat of the node referred to by (fd, path)
9022 		if (!IS_USER_ADDRESS(userPath))
9023 			return B_BAD_ADDRESS;
9024 
9025 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9026 		if (pathBuffer.InitCheck() != B_OK)
9027 			return B_NO_MEMORY;
9028 
9029 		char* path = pathBuffer.LockBuffer();
9030 
9031 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9032 		if (length < B_OK)
9033 			return length;
9034 		if (length >= B_PATH_NAME_LENGTH)
9035 			return B_NAME_TOO_LONG;
9036 
9037 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9038 	} else {
9039 		// no path given: get the FD and use the FD operation
9040 		struct file_descriptor* descriptor
9041 			= get_fd(get_current_io_context(false), fd);
9042 		if (descriptor == NULL)
9043 			return B_FILE_ERROR;
9044 
9045 		if (descriptor->ops->fd_read_stat)
9046 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9047 		else
9048 			status = EOPNOTSUPP;
9049 
9050 		put_fd(descriptor);
9051 	}
9052 
9053 	if (status != B_OK)
9054 		return status;
9055 
9056 	return user_memcpy(userStat, &stat, statSize);
9057 }
9058 
9059 
9060 status_t
9061 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9062 	const struct stat* userStat, size_t statSize, int statMask)
9063 {
9064 	if (statSize > sizeof(struct stat))
9065 		return B_BAD_VALUE;
9066 
9067 	struct stat stat;
9068 
9069 	if (!IS_USER_ADDRESS(userStat)
9070 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9071 		return B_BAD_ADDRESS;
9072 
9073 	// clear additional stat fields
9074 	if (statSize < sizeof(struct stat))
9075 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9076 
9077 	status_t status;
9078 
9079 	if (userPath) {
9080 		// path given: write the stat of the node referred to by (fd, path)
9081 		if (!IS_USER_ADDRESS(userPath))
9082 			return B_BAD_ADDRESS;
9083 
9084 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9085 		if (pathBuffer.InitCheck() != B_OK)
9086 			return B_NO_MEMORY;
9087 
9088 		char* path = pathBuffer.LockBuffer();
9089 
9090 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9091 		if (length < B_OK)
9092 			return length;
9093 		if (length >= B_PATH_NAME_LENGTH)
9094 			return B_NAME_TOO_LONG;
9095 
9096 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9097 			statMask, false);
9098 	} else {
9099 		// no path given: get the FD and use the FD operation
9100 		struct file_descriptor* descriptor
9101 			= get_fd(get_current_io_context(false), fd);
9102 		if (descriptor == NULL)
9103 			return B_FILE_ERROR;
9104 
9105 		if (descriptor->ops->fd_write_stat) {
9106 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9107 				statMask);
9108 		} else
9109 			status = EOPNOTSUPP;
9110 
9111 		put_fd(descriptor);
9112 	}
9113 
9114 	return status;
9115 }
9116 
9117 
9118 int
9119 _user_open_attr_dir(int fd, const char* userPath)
9120 {
9121 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9122 	if (pathBuffer.InitCheck() != B_OK)
9123 		return B_NO_MEMORY;
9124 
9125 	char* path = pathBuffer.LockBuffer();
9126 
9127 	if (userPath != NULL) {
9128 		if (!IS_USER_ADDRESS(userPath)
9129 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9130 			return B_BAD_ADDRESS;
9131 	}
9132 
9133 	return attr_dir_open(fd, userPath ? path : NULL, false);
9134 }
9135 
9136 
9137 ssize_t
9138 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9139 	size_t readBytes)
9140 {
9141 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9142 	if (attr < 0)
9143 		return attr;
9144 
9145 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9146 	_user_close(attr);
9147 
9148 	return bytes;
9149 }
9150 
9151 
9152 ssize_t
9153 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9154 	const void* buffer, size_t writeBytes)
9155 {
9156 	// Try to support the BeOS typical truncation as well as the position
9157 	// argument
9158 	int attr = attr_create(fd, NULL, attribute, type,
9159 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9160 	if (attr < 0)
9161 		return attr;
9162 
9163 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9164 	_user_close(attr);
9165 
9166 	return bytes;
9167 }
9168 
9169 
9170 status_t
9171 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9172 {
9173 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9174 	if (attr < 0)
9175 		return attr;
9176 
9177 	struct file_descriptor* descriptor
9178 		= get_fd(get_current_io_context(false), attr);
9179 	if (descriptor == NULL) {
9180 		_user_close(attr);
9181 		return B_FILE_ERROR;
9182 	}
9183 
9184 	struct stat stat;
9185 	status_t status;
9186 	if (descriptor->ops->fd_read_stat)
9187 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9188 	else
9189 		status = EOPNOTSUPP;
9190 
9191 	put_fd(descriptor);
9192 	_user_close(attr);
9193 
9194 	if (status == B_OK) {
9195 		attr_info info;
9196 		info.type = stat.st_type;
9197 		info.size = stat.st_size;
9198 
9199 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9200 			return B_BAD_ADDRESS;
9201 	}
9202 
9203 	return status;
9204 }
9205 
9206 
9207 int
9208 _user_open_attr(int fd, const char* userPath, const char* userName,
9209 	uint32 type, int openMode)
9210 {
9211 	char name[B_FILE_NAME_LENGTH];
9212 
9213 	if (!IS_USER_ADDRESS(userName)
9214 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9215 		return B_BAD_ADDRESS;
9216 
9217 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9218 	if (pathBuffer.InitCheck() != B_OK)
9219 		return B_NO_MEMORY;
9220 
9221 	char* path = pathBuffer.LockBuffer();
9222 
9223 	if (userPath != NULL) {
9224 		if (!IS_USER_ADDRESS(userPath)
9225 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9226 			return B_BAD_ADDRESS;
9227 	}
9228 
9229 	if ((openMode & O_CREAT) != 0) {
9230 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9231 			false);
9232 	}
9233 
9234 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9235 }
9236 
9237 
9238 status_t
9239 _user_remove_attr(int fd, const char* userName)
9240 {
9241 	char name[B_FILE_NAME_LENGTH];
9242 
9243 	if (!IS_USER_ADDRESS(userName)
9244 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9245 		return B_BAD_ADDRESS;
9246 
9247 	return attr_remove(fd, name, false);
9248 }
9249 
9250 
9251 status_t
9252 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9253 	const char* userToName)
9254 {
9255 	if (!IS_USER_ADDRESS(userFromName)
9256 		|| !IS_USER_ADDRESS(userToName))
9257 		return B_BAD_ADDRESS;
9258 
9259 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9260 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9261 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9262 		return B_NO_MEMORY;
9263 
9264 	char* fromName = fromNameBuffer.LockBuffer();
9265 	char* toName = toNameBuffer.LockBuffer();
9266 
9267 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9268 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9269 		return B_BAD_ADDRESS;
9270 
9271 	return attr_rename(fromFile, fromName, toFile, toName, false);
9272 }
9273 
9274 
9275 int
9276 _user_open_index_dir(dev_t device)
9277 {
9278 	return index_dir_open(device, false);
9279 }
9280 
9281 
9282 status_t
9283 _user_create_index(dev_t device, const char* userName, uint32 type,
9284 	uint32 flags)
9285 {
9286 	char name[B_FILE_NAME_LENGTH];
9287 
9288 	if (!IS_USER_ADDRESS(userName)
9289 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9290 		return B_BAD_ADDRESS;
9291 
9292 	return index_create(device, name, type, flags, false);
9293 }
9294 
9295 
9296 status_t
9297 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9298 {
9299 	char name[B_FILE_NAME_LENGTH];
9300 	struct stat stat;
9301 	status_t status;
9302 
9303 	if (!IS_USER_ADDRESS(userName)
9304 		|| !IS_USER_ADDRESS(userStat)
9305 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9306 		return B_BAD_ADDRESS;
9307 
9308 	status = index_name_read_stat(device, name, &stat, false);
9309 	if (status == B_OK) {
9310 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9311 			return B_BAD_ADDRESS;
9312 	}
9313 
9314 	return status;
9315 }
9316 
9317 
9318 status_t
9319 _user_remove_index(dev_t device, const char* userName)
9320 {
9321 	char name[B_FILE_NAME_LENGTH];
9322 
9323 	if (!IS_USER_ADDRESS(userName)
9324 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9325 		return B_BAD_ADDRESS;
9326 
9327 	return index_remove(device, name, false);
9328 }
9329 
9330 
9331 status_t
9332 _user_getcwd(char* userBuffer, size_t size)
9333 {
9334 	if (size == 0)
9335 		return B_BAD_VALUE;
9336 	if (!IS_USER_ADDRESS(userBuffer))
9337 		return B_BAD_ADDRESS;
9338 
9339 	if (size > kMaxPathLength)
9340 		size = kMaxPathLength;
9341 
9342 	KPath pathBuffer(size);
9343 	if (pathBuffer.InitCheck() != B_OK)
9344 		return B_NO_MEMORY;
9345 
9346 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9347 
9348 	char* path = pathBuffer.LockBuffer();
9349 
9350 	status_t status = get_cwd(path, size, false);
9351 	if (status != B_OK)
9352 		return status;
9353 
9354 	// Copy back the result
9355 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9356 		return B_BAD_ADDRESS;
9357 
9358 	return status;
9359 }
9360 
9361 
9362 status_t
9363 _user_setcwd(int fd, const char* userPath)
9364 {
9365 	TRACE(("user_setcwd: path = %p\n", userPath));
9366 
9367 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9368 	if (pathBuffer.InitCheck() != B_OK)
9369 		return B_NO_MEMORY;
9370 
9371 	char* path = pathBuffer.LockBuffer();
9372 
9373 	if (userPath != NULL) {
9374 		if (!IS_USER_ADDRESS(userPath)
9375 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9376 			return B_BAD_ADDRESS;
9377 	}
9378 
9379 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9380 }
9381 
9382 
9383 status_t
9384 _user_change_root(const char* userPath)
9385 {
9386 	// only root is allowed to chroot()
9387 	if (geteuid() != 0)
9388 		return EPERM;
9389 
9390 	// alloc path buffer
9391 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9392 	if (pathBuffer.InitCheck() != B_OK)
9393 		return B_NO_MEMORY;
9394 
9395 	// copy userland path to kernel
9396 	char* path = pathBuffer.LockBuffer();
9397 	if (userPath != NULL) {
9398 		if (!IS_USER_ADDRESS(userPath)
9399 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9400 			return B_BAD_ADDRESS;
9401 	}
9402 
9403 	// get the vnode
9404 	struct vnode* vnode;
9405 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9406 	if (status != B_OK)
9407 		return status;
9408 
9409 	// set the new root
9410 	struct io_context* context = get_current_io_context(false);
9411 	mutex_lock(&sIOContextRootLock);
9412 	struct vnode* oldRoot = context->root;
9413 	context->root = vnode;
9414 	mutex_unlock(&sIOContextRootLock);
9415 
9416 	put_vnode(oldRoot);
9417 
9418 	return B_OK;
9419 }
9420 
9421 
9422 int
9423 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9424 	uint32 flags, port_id port, int32 token)
9425 {
9426 	char* query;
9427 
9428 	if (device < 0 || userQuery == NULL || queryLength == 0)
9429 		return B_BAD_VALUE;
9430 
9431 	// this is a safety restriction
9432 	if (queryLength >= 65536)
9433 		return B_NAME_TOO_LONG;
9434 
9435 	query = (char*)malloc(queryLength + 1);
9436 	if (query == NULL)
9437 		return B_NO_MEMORY;
9438 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9439 		free(query);
9440 		return B_BAD_ADDRESS;
9441 	}
9442 
9443 	int fd = query_open(device, query, flags, port, token, false);
9444 
9445 	free(query);
9446 	return fd;
9447 }
9448 
9449 
9450 #include "vfs_request_io.cpp"
9451